#!/usr/local/bin/perl # # lon-capa.pl # Parse the LON-CAPA metadata # # Andy Dong 10/23/2002 # # Contact Gerd Kortemeyer (korte@lite.msu.edu) use strict; use LWP::UserAgent; use Getopt::Std; use DBI; use DBD::ODBC; require OAIcataloging_v2; # -u flag specifies [u]pdate database; otherwise output to STDOUT my $usage = << "EOT"; Usage: lon-capa.pl -u -u (U)pdate the database Without -u it simply prints SQL UPDATE statements to STDOUT EOT my %args; getopts('u', \%args) || die $usage; my $useDatabase = 1 if ($args{'u'}); #my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1_dev.odbc'; my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1.odbc'; my $DBI_USER='autocataloger'; my $DBI_PWD='regolatacotua'; my $dbh; my $pub_month; my $pub_year; my @loncapa; # HTTP requests my $content; my $content_regex = 'File Not Found'; # Configuration my $debug = 0; my $url = 'http://data.lite.msu.edu/cgi-bin/metadata_harvest.pl'; # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu'); # End Configuration #my $ua = new LWP::UserAgent; #$ua->timeout(600); #my $request = new HTTP::Request GET => $url; #$request->authorization_basic('reaper', 'cat4u'); #my $response = $ua->request( $request ); #if ( $response->is_success ) { # $content = $response->content; # Delete all blank lines # $content =~ s/(?message; #} @loncapa=undef; open (LON_FILE, 'metadata_harvest.txt') || die; while () { chomp; push(@loncapa,$_); } my %records = ();; foreach my $metadata (@loncapa) { chomp $metadata; my @tkline = split('\|', $metadata); my $title = $tkline[0]; next if ( $title eq '' ); my $author = $tkline[1]; next if ( $author eq '' ); my @authorname = split(' ', $author); my $author_fname = $authorname[0]; my $author_lname = $authorname[1]; # We have to make an exception for Multimedia Physics which is an organization not a person my $object_type; if ( $author_lname eq 'Physics' ) { $object_type = 'organization'; } else { $object_type = 'person'; } my $subject = $tkline[2]; next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); my $resourceurl = 'http://lon-capa.smete.org' . $tkline[3]; next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); my $keywords = $tkline[4]; my $version = $tkline[5]; my $notes = $tkline[6]; my $abstract = $tkline[7]; next if ($abstract eq ''); my $type = $tkline[8]; my $learning_resource_type; if ( $type eq 'problem' ) { $learning_resource_type = 114; } elsif ( $type eq 'exam' ) { $learning_resource_type = 114; } elsif ( $type eq 'quiz' ) { $learning_resource_type = 114; } elsif ( $type eq 'assess' ) { $learning_resource_type = 114; } elsif ( $type eq 'survey' ) { $learning_resource_type = 114; } elsif ( $type eq 'form' ) { $learning_resource_type = 114; } elsif ( $type eq 'library' ) { $learning_resource_type = 107; } elsif ( $type eq 'page' ) { $learning_resource_type = 104; } elsif ( $type eq 'sequence' ) { $learning_resource_type = 104; } elsif ( $type eq 'spreadsheet' ) { $learning_resource_type = 114; } else { $learning_resource_type = 0; } my $media_format; if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) { $media_format = 70; } else { $media_format = 0; } my $language = $tkline[9]; # Look only for seniso next if ( $language ne 'seniso'); my $primary_language='en-US'; my $creation_date = $tkline[10]; my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); my $revision_date = $tkline[11]; my $owner = $tkline[12]; my $rights_description; my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) # Public means no login required if ( $copyright eq 'public' ) { $rights_description = 'LON-CAPA Public Resource. No login required.'; } elsif ($copyright eq 'domain') { $rights_description = 'Restricted to certain LON-CAPA domains.'; } else { $rights_description = 'LON-CAPA Default Use Restriction. Login required.'; } # Domain means restricted to a particular LON-CAPA domain # Defaults mean access open to any registered LON-CAPA user # Private means open only to author of material next if ( $copyright eq 'private'); my $platform = "5"; # HTML Browser (not specified but construed from metadata) # Connect to database if ( $useDatabase ) { $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER: ($DBI::err) $DBI::errstr\n";; # Configuration information for LON-CAPA my $collection_id = OAIc_orgexists($dbh,'LearningOnline Network with CAPA'); my $submitter_id = OAIc_personexists($dbh,'adong@smete.org'); my $image = 'http://www.lite.msu.edu/liteani.gif'; my $cost = 1; # version.purchase_license_type_id my $collection = 'LearningOnline Network with CAPA'; # LON-CAPA has single authors my $reg_key; if ( $object_type eq 'organization' ) { if ( ! ($reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname))) ) { printf("Inserting new organization %s\n", join(' ',$author_fname, $author_lname)); my $success = OAIc_insert_org($dbh,$collection_id,$submitter_id,'',join(' ',$author_fname,$author_lname),'','','','','','','',''); $reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname)); } } else { if ( ! ($reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname))) ) { printf("Inserting new person(author) %s\n", join(' ',$author_fname, $author_lname)); my $success = OAIc_insert_person($dbh,$collection_id,$submitter_id,$author_lname,$author_fname,'',''); $reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname)); } } my $updated; my $inserted; if ( my $general_key = OAIc_loexists($dbh,$title) ) { # Do nothing $updated = $updated + 1; } else { printf("Inserting new record for %s\n",$title); my $success = OAIc_insert_lo($dbh, $title, $primary_language, $abstract, $image, $pub_month, $pub_year, $keywords, $submitter_id, $reg_key, $collection_id, $collection_id, $media_format, $platform, , '', $resourceurl, '', 1, $reg_key, $collection_id, $collection_id, '', '', '', $learning_resource_type, $rights_description, $cost); $inserted = $inserted + 1; } } if (! $useDatabase ) { # Print information if no database updates requested printf("Title: %s\n", $title); printf("Author First Name: %s\n", $author_fname); printf("Author Last Name: %s\n", $author_lname); printf("Subject: %s\n", $subject); printf("URL: %s\n", $resourceurl); printf("Keywords: %s\n", $keywords); printf("Version: %s\n", $version); printf("Notes: %s\n", $notes); printf("Abstract: %s\n", $abstract); printf("Learning Resource Type: %d\n", $learning_resource_type); printf("Media Format: %d\n", $media_format); printf("Primary Language: %s\n", $primary_language); printf("Creation Date: %s\n", $creation_date); printf("Revision Date: %s\n", $revision_date); printf("Copyright: %s\n", $copyright); printf("Publication Year: %4d\tPublication Month: %02d\n", $pub_year, $pub_month); } if ( $useDatabase ) { $dbh->commit; $dbh->disconnect; } }