--- nsdl/nsdlloncapaorg/harvester.pl 2003/07/29 14:13:36 1.3 +++ nsdl/nsdlloncapaorg/harvester.pl 2003/10/21 15:58:26 1.7 @@ -12,6 +12,9 @@ use strict; use LWP::UserAgent; use Getopt::Std; use Digest::MD5 qw(md5_hex); +use IO::File; + +my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data'; my $pub_month; my $pub_year; @@ -25,10 +28,29 @@ my $content_regex = 'File Not Found'; # Configuration my $debug = 0; -my $url = 'http://s10.lite.msu.edu/cgi-bin/metadata_harvest.pl'; + # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab -my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu'); +my @servers = ( +'newscience.westshore.cc.mi.us', +'s10.lite.msu.edu', +'s12.lite.msu.edu', +'lon-capa.chem.sunysb.edu', +'schubert.tmcc.edu', +'dalton.chem.sfu.ca', +'capa2.phy.ohiou.edu', +'pollux.physics.fsu.edu', +'loncapa.physics.sc.edu', +'loncapa.math.ucf.edu', +'zappa.ags.udel.edu', +'loncapa.gwu.edu', +'neptune.physics.ndsu.nodak.edu', +'capa1.uwsp.edu', +'natasha.it.fit.edu', +'loncapa.Mines.EDU', +'loncapa.chm.nau.edu'); +foreach (@servers) { + my $url='http://'.$_.'/cgi-bin/metadata_harvest.pl'; # End Configuration my $ua = new LWP::UserAgent; @@ -40,6 +62,7 @@ $request->authorization_basic('reaper', my $response = $ua->request( $request ); if ( $response->is_success ) { + print 'SUCCESS: ' . $response->message.' for '.$url."\n\n"; $content = $response->content; # Delete all blank lines $content =~ s/(?is_success ) { # Push the content into an array @loncapa = split /\n/, $content; } else { - die 'LON-CAPA request failed: ' . $response->message; + print 'LON-CAPA request failed: ' . $response->message.' for '.$url."\n\n"; + next; } #@loncapa=undef; @@ -60,7 +84,6 @@ if ( $response->is_success ) { #} my %records = ();; -print ''."\n\n"; foreach my $metadata (@loncapa) { chomp $metadata; @@ -84,15 +107,19 @@ foreach my $metadata (@loncapa) { next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; my $baseid=$tkline[3]; + my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//); $baseid=~s/\W/\_/g; $baseid=~s/^\_res\_//g; + my $fileid=md5_hex($baseid); next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); my $keywords = $tkline[4]; my $version = $tkline[5]; my $notes = $tkline[6]; my $abstract = $tkline[7]; - next if ($abstract eq ''); + unless ($abstract) { $abstract=$subject; } + unless ($abstract) { $abstract=$title; } + unless ($abstract) { $abstract=$keywords; } my $type = $tkline[8]; my $learning_resource_type; if ( $type eq 'problem' ) { @@ -149,8 +176,19 @@ foreach my $metadata (@loncapa) { # Defaults mean access open to any registered LON-CAPA user # Private means open only to author of material next if ( $copyright eq 'private'); + next if ( $copyright eq 'domain'); my $platform = "5"; # HTML Browser (not specified but construed from metadata) - print (<'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml'); + print XML (< + $abstract $rev_year-$rev_month-$rev_day - ENDMETA + close (XML); +} }