version 1.1, 2003/07/28 14:27:05
|
version 1.3, 2003/07/29 14:13:36
|
Line 11
|
Line 11
|
use strict; |
use strict; |
use LWP::UserAgent; |
use LWP::UserAgent; |
use Getopt::Std; |
use Getopt::Std; |
|
use Digest::MD5 qw(md5_hex); |
use DBI; |
|
use DBD::ODBC; |
|
|
|
require OAIcataloging_v2; |
|
|
|
# -u flag specifies [u]pdate database; otherwise output to STDOUT |
|
|
|
my $usage = << "EOT"; |
|
Usage: lon-capa.pl -u |
|
|
|
-u (U)pdate the database |
|
|
|
Without -u it simply prints SQL UPDATE statements to STDOUT |
|
EOT |
|
|
|
my %args; |
|
getopts('u', \%args) || die $usage; |
|
|
|
my $useDatabase = 1 if ($args{'u'}); |
|
|
|
#my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1_dev.odbc'; |
|
my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1.odbc'; |
|
my $DBI_USER='autocataloger'; |
|
my $DBI_PWD='regolatacotua'; |
|
my $dbh; |
|
|
|
my $pub_month; |
my $pub_month; |
my $pub_year; |
my $pub_year; |
Line 50 my $content_regex = 'File Not Found';
|
Line 25 my $content_regex = 'File Not Found';
|
# Configuration |
# Configuration |
|
|
my $debug = 0; |
my $debug = 0; |
my $url = 'http://data.lite.msu.edu/cgi-bin/metadata_harvest.pl'; |
my $url = 'http://s10.lite.msu.edu/cgi-bin/metadata_harvest.pl'; |
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab |
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab |
my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu'); |
my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu'); |
|
|
# End Configuration |
# End Configuration |
|
|
#my $ua = new LWP::UserAgent; |
my $ua = new LWP::UserAgent; |
#$ua->timeout(600); |
$ua->timeout(600); |
|
|
#my $request = new HTTP::Request GET => $url; |
my $request = new HTTP::Request GET => $url; |
#$request->authorization_basic('reaper', 'cat4u'); |
$request->authorization_basic('reaper', 'cat4u'); |
|
|
#my $response = $ua->request( $request ); |
my $response = $ua->request( $request ); |
|
|
#if ( $response->is_success ) { |
if ( $response->is_success ) { |
# $content = $response->content; |
$content = $response->content; |
# Delete all blank lines |
# Delete all blank lines |
# $content =~ s/(?<!.)\n//g; |
$content =~ s/(?<!.)\n//g; |
# Replace all ^M with spaces |
# Replace all ^M with spaces |
# $content =~ s/
/\s/g; |
$content =~ s/
/\s/g; |
# Push the content into an array |
# Push the content into an array |
# @loncapa = split /\n/, $content; |
@loncapa = split /\n/, $content; |
#} else { |
} else { |
# die 'LON-CAPA request failed: ' . $response->message; |
die 'LON-CAPA request failed: ' . $response->message; |
#} |
} |
|
|
@loncapa=undef; |
#@loncapa=undef; |
open (LON_FILE, 'metadata_harvest.txt') || die; |
#open (LON_FILE, 'metadata_harvest.txt') || die; |
|
|
while (<LON_FILE>) { |
#while (<LON_FILE>) { |
chomp; |
# chomp; |
push(@loncapa,$_); |
# push(@loncapa,$_); |
} |
#} |
|
|
my %records = ();; |
my %records = ();; |
|
print '<?xml version="1.0" encoding="UTF-8"?>'."\n\n"; |
|
|
foreach my $metadata (@loncapa) { |
foreach my $metadata (@loncapa) { |
chomp $metadata; |
chomp $metadata; |
|
$metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs; |
my @tkline = split('\|', $metadata); |
my @tkline = split('\|', $metadata); |
my $title = $tkline[0]; |
my $title = $tkline[0]; |
next if ( $title eq '' ); |
next if ( $title eq '' ); |
Line 104 foreach my $metadata (@loncapa) {
|
Line 82 foreach my $metadata (@loncapa) {
|
} |
} |
my $subject = $tkline[2]; |
my $subject = $tkline[2]; |
next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); |
next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); |
my $resourceurl = 'http://lon-capa.smete.org' . $tkline[3]; |
my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; |
|
my $baseid=$tkline[3]; |
|
$baseid=~s/\W/\_/g; |
|
$baseid=~s/^\_res\_//g; |
|
|
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); |
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); |
my $keywords = $tkline[4]; |
my $keywords = $tkline[4]; |
my $version = $tkline[5]; |
my $version = $tkline[5]; |
Line 148 foreach my $metadata (@loncapa) {
|
Line 130 foreach my $metadata (@loncapa) {
|
next if ( $language ne 'seniso'); |
next if ( $language ne 'seniso'); |
my $primary_language='en-US'; |
my $primary_language='en-US'; |
my $creation_date = $tkline[10]; |
my $creation_date = $tkline[10]; |
my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); |
my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); |
my $revision_date = $tkline[11]; |
my $revision_date = $tkline[11]; |
|
my ($rev_year,$rev_month,$rev_day) = ( $revision_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); |
my $owner = $tkline[12]; |
my $owner = $tkline[12]; |
my $rights_description; |
my $rights_description; |
my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) |
my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) |
Line 167 foreach my $metadata (@loncapa) {
|
Line 150 foreach my $metadata (@loncapa) {
|
# Private means open only to author of material |
# Private means open only to author of material |
next if ( $copyright eq 'private'); |
next if ( $copyright eq 'private'); |
my $platform = "5"; # HTML Browser (not specified but construed from metadata) |
my $platform = "5"; # HTML Browser (not specified but construed from metadata) |
|
print (<<ENDMETA); |
|
<oaidc:dc xmlns="http://purl.org/dc/elements/1.1/" |
|
xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/" |
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ |
|
http://www.openarchives.org/OAI/2.0/oai_dc.xsd" |
|
> |
|
<title>$title</title> |
|
<creator>$author_fname $author_lname</creator> |
|
<identifier>$resourceurl</identifier> |
|
<subject>$keywords</subject> |
|
<subject>$subject</subject> |
|
<language>$primary_language</language> |
|
<description>$abstract</description> |
|
<date>$rev_year-$rev_month-$rev_day</date> |
|
</oaidc:dc> |
|
|
# Connect to database |
ENDMETA |
if ( $useDatabase ) { |
|
$dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER: ($DBI::err) $DBI::errstr\n";; |
|
# Configuration information for LON-CAPA |
|
my $collection_id = OAIc_orgexists($dbh,'LearningOnline Network with CAPA'); |
|
my $submitter_id = OAIc_personexists($dbh,'adong@smete.org'); |
|
my $image = 'http://www.lite.msu.edu/liteani.gif'; |
|
my $cost = 1; # version.purchase_license_type_id |
|
my $collection = 'LearningOnline Network with CAPA'; |
|
# LON-CAPA has single authors |
|
my $reg_key; |
|
if ( $object_type eq 'organization' ) { |
|
if ( ! ($reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname))) ) { |
|
printf("Inserting new organization %s\n", join(' ',$author_fname, $author_lname)); |
|
my $success = OAIc_insert_org($dbh,$collection_id,$submitter_id,'',join(' ',$author_fname,$author_lname),'','','','','','','',''); |
|
$reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname)); |
|
} |
|
} else { |
|
if ( ! ($reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname))) ) { |
|
printf("Inserting new person(author) %s\n", join(' ',$author_fname, $author_lname)); |
|
my $success = OAIc_insert_person($dbh,$collection_id,$submitter_id,$author_lname,$author_fname,'',''); |
|
$reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname)); |
|
} |
|
} |
|
my $updated; |
|
my $inserted; |
|
if ( my $general_key = OAIc_loexists($dbh,$title) ) { |
|
# Do nothing |
|
$updated = $updated + 1; |
|
} else { |
|
printf("Inserting new record for %s\n",$title); |
|
my $success = OAIc_insert_lo($dbh, $title, $primary_language, $abstract, $image, $pub_month, $pub_year, $keywords, $submitter_id, $reg_key, $collection_id, $collection_id, $media_format, $platform, , '', $resourceurl, '', 1, $reg_key, $collection_id, $collection_id, '', '', '', $learning_resource_type, $rights_description, $cost); |
|
$inserted = $inserted + 1; |
|
} |
|
} |
|
|
|
if (! $useDatabase ) { # Print information if no database updates requested |
|
printf("Title: %s\n", $title); |
|
printf("Author First Name: %s\n", $author_fname); |
|
printf("Author Last Name: %s\n", $author_lname); |
|
printf("Subject: %s\n", $subject); |
|
printf("URL: %s\n", $resourceurl); |
|
printf("Keywords: %s\n", $keywords); |
|
printf("Version: %s\n", $version); |
|
printf("Notes: %s\n", $notes); |
|
printf("Abstract: %s\n", $abstract); |
|
printf("Learning Resource Type: %d\n", $learning_resource_type); |
|
printf("Media Format: %d\n", $media_format); |
|
printf("Primary Language: %s\n", $primary_language); |
|
printf("Creation Date: %s\n", $creation_date); |
|
printf("Revision Date: %s\n", $revision_date); |
|
printf("Copyright: %s\n", $copyright); |
|
printf("Publication Year: %4d\tPublication Month: %02d\n", $pub_year, $pub_month); |
|
} |
|
|
|
if ( $useDatabase ) { |
|
$dbh->commit; |
|
$dbh->disconnect; |
|
} |
|
|
|
} |
} |