--- nsdl/nsdlloncapaorg/harvester.pl 2005/11/25 19:29:56 1.8 +++ nsdl/nsdlloncapaorg/harvester.pl 2006/05/10 16:28:56 1.9 @@ -61,7 +61,8 @@ my @servers = ( 'meitner.physics.hope.edu', 'loncapa.vcu.edu', 'lon-capa.ucsc.edu', -'lon-capa.bsu.edu' +'lon-capa.bsu.edu', +'harvard.lon-capa.org' ); foreach (@servers) { @@ -141,9 +142,16 @@ foreach my $metadata (@loncapa) { my $version = $tkline[5]; my $notes = $tkline[6]; my $abstract = $tkline[7]; - unless ($abstract) { $abstract=$subject; } - unless ($abstract) { $abstract=$title; } - unless ($abstract) { $abstract=$keywords; } + $abstract=~s/ s / /gs; + $abstract=~s/\s+/ /gs; + my $postsubject=$subject; + unless ($postsubject) { + $postsubject=$keywords; + } else { + $postsubject.=' ('.$keywords.')'; + } + unless ($postsubject=~/\w/) { $knockout{'nosubject_'.$rawtype}++; next; } + unless ($abstract) { $knockout{'noabstract_'.$rawtype}++; next; } my $type = $rawtype; if ($type=~/htm/) { $type='htm'; } @@ -204,9 +212,7 @@ foreach my $metadata (@loncapa) { # Domain means restricted to a particular LON-CAPA domain # Defaults mean access open to any registered LON-CAPA user # Private means open only to author of material - if ( $copyright eq 'private') { $knockout{'private_'.$rawtype}++; next; } - if ( $copyright eq 'domain') { $knockout{'domain_'.$rawtype}++; next; } - if ( $copyright eq 'custom') { $knockout{'custom_'.$rawtype}++; next; } + unless ($copyright eq 'public') { $knockout{'notpublic_'.$rawtype}++; next; } my $platform = "5"; # HTML Browser (not specified but construed from metadata) # # We actually do this @@ -231,10 +237,9 @@ foreach my $metadata (@loncapa) { http://www.openarchives.org/OAI/2.0/oai_dc.xsd" > $title - $author_fname $author_lname + $author $resourceurl - $keywords - $subject + $postsubject $primary_language $abstract $rev_year-$rev_month-$rev_day