#!/usr/local/bin/perl # # lon-capa.pl # Parse the LON-CAPA metadata # # Andy Dong 10/23/2002 # # Contact Gerd Kortemeyer (korte@lite.msu.edu) use strict; use LWP::UserAgent; use Getopt::Std; use Digest::MD5 qw(md5_hex); use IO::File; my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data'; my $pub_month; my $pub_year; my @loncapa; # HTTP requests my $content; my $content_regex = 'File Not Found'; # Configuration my $debug = 0; # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu', 'neptune.physics.ndsu.nodak.edu', 'capa1.uwsp.edu'); foreach (@servers) { my $url='http://'.$_.'/cgi-bin/metadata_harvest.pl'; # End Configuration my $ua = new LWP::UserAgent; $ua->timeout(600); my $request = new HTTP::Request GET => $url; $request->authorization_basic('reaper', 'cat4u'); my $response = $ua->request( $request ); if ( $response->is_success ) { print 'SUCCESS: ' . $response->message.' for '.$url."\n\n"; $content = $response->content; # Delete all blank lines $content =~ s/(?message.' for '.$url."\n\n"; next; } #@loncapa=undef; #open (LON_FILE, 'metadata_harvest.txt') || die; #while () { # chomp; # push(@loncapa,$_); #} my %records = ();; foreach my $metadata (@loncapa) { chomp $metadata; $metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs; my @tkline = split('\|', $metadata); my $title = $tkline[0]; next if ( $title eq '' ); my $author = $tkline[1]; next if ( $author eq '' ); my @authorname = split(' ', $author); my $author_fname = $authorname[0]; my $author_lname = $authorname[1]; # We have to make an exception for Multimedia Physics which is an organization not a person my $object_type; if ( $author_lname eq 'Physics' ) { $object_type = 'organization'; } else { $object_type = 'person'; } my $subject = $tkline[2]; next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; my $baseid=$tkline[3]; my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//); $baseid=~s/\W/\_/g; $baseid=~s/^\_res\_//g; my $fileid=md5_hex($baseid); next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); my $keywords = $tkline[4]; my $version = $tkline[5]; my $notes = $tkline[6]; my $abstract = $tkline[7]; next if ($abstract eq ''); my $type = $tkline[8]; my $learning_resource_type; if ( $type eq 'problem' ) { $learning_resource_type = 114; } elsif ( $type eq 'exam' ) { $learning_resource_type = 114; } elsif ( $type eq 'quiz' ) { $learning_resource_type = 114; } elsif ( $type eq 'assess' ) { $learning_resource_type = 114; } elsif ( $type eq 'survey' ) { $learning_resource_type = 114; } elsif ( $type eq 'form' ) { $learning_resource_type = 114; } elsif ( $type eq 'library' ) { $learning_resource_type = 107; } elsif ( $type eq 'page' ) { $learning_resource_type = 104; } elsif ( $type eq 'sequence' ) { $learning_resource_type = 104; } elsif ( $type eq 'spreadsheet' ) { $learning_resource_type = 114; } else { $learning_resource_type = 0; } my $media_format; if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) { $media_format = 70; } else { $media_format = 0; } my $language = $tkline[9]; # Look only for seniso next if ( $language ne 'seniso'); my $primary_language='en-US'; my $creation_date = $tkline[10]; my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); my $revision_date = $tkline[11]; my ($rev_year,$rev_month,$rev_day) = ( $revision_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); my $owner = $tkline[12]; my $rights_description; my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) # Public means no login required if ( $copyright eq 'public' ) { $rights_description = 'LON-CAPA Public Resource. No login required.'; } elsif ($copyright eq 'domain') { $rights_description = 'Restricted to certain LON-CAPA domains.'; } else { $rights_description = 'LON-CAPA Default Use Restriction. Login required.'; } # Domain means restricted to a particular LON-CAPA domain # Defaults mean access open to any registered LON-CAPA user # Private means open only to author of material next if ( $copyright eq 'private'); my $platform = "5"; # HTML Browser (not specified but construed from metadata) # # Create path # unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); } unless (-e $basepath.'/'.$adom.'/'.$auname) { mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname; } open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml'); print XML (< $title $author_fname $author_lname $resourceurl $keywords $subject $primary_language $abstract $rev_year-$rev_month-$rev_day ENDMETA close (XML); } }