#!/usr/local/bin/perl -w # # cstc.pl - Use OAI MHP to harvest metadata from CSTS in oai_ims format # # Written by Andy Dong 11/01/2001 # use strict; use Getopt::Std; use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); use HTTP::Request; use LWP::UserAgent; use XML::Element; use XML::Parser; use XML::TreeBuilder; use DBI; use DBD::ODBC; require OAIvocabulary_v2; require OAIcataloging_v2; # -u flag specifies [u]pdate database; otherwise output to STDOUT my $usage = << "EOT"; Usage: cstc.pl -u -u (U)pdate the database Without -u it simply prints to STDOUT EOT my %args; getopts('u', \%args) || die $usage; my $inserted = 0; my $updated = 0; my $useDatabase = 1 if ($args{'u'}); my $dbh; # Database Configuration if ( $useDatabase ) { print "Updating the database\n"; my $DBI_DSN='dbi:ODBC:mel.odbc'; my $DBI_USER='autocataloger'; my $DBI_PWD='regolatacotua'; $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";; } my $content; my @cstc; # All possible LOM record variables my $logeneralDescription = ""; my $logeneralTitle1 = ""; my $logeneralCreation_Date = ""; my $logeneralPub_Year = ""; my $logeneralPub_Month = ""; my $logeneralPub_Day = ""; my $logeneralLanguage1 = ""; my $logeneralKeywords; my $publisher = ""; my $personLastname = ""; my $personFirstname = ""; my $personEmail = ""; my $personCompany = ""; my $platformVersion = ""; my $platformType = ""; my $platformOS = ""; my @pedagogyEndUserRole = (); my $pedagogyLContext = ""; my $platformFormat = ""; my $platformLocation_URL = ""; my $role = ""; my $pedagogy_description = "Unknown"; #my $url = 'http://www.cstc.org/cgi-bin/OAI/CSTC.pl?verb=ListRecords&metadataPrefix=ims1_1'; #my $ua = new LWP::UserAgent; #my $request = HTTP::Request->new('GET', $url); #my $response = $ua->request( $request ); #if ( $response->is_success ) { # $content = $response->content; #} else { # warn 'OAI request failed: ' . $response->message; # exit 1; #} my $tree = XML::TreeBuilder->new(); $tree->parse_file('matti.xml'); my $t0 = [gettimeofday]; my @records = $tree->find_by_tag_name('record'); foreach my $record (@records){ # Extract information from tag my $header = $record->find_by_tag_name('metametadata'); next if ! $header; $logeneralCreation_Date = $header->find_by_tag_name('datetime')->as_text; ($logeneralPub_Year, $logeneralPub_Month, $logeneralPub_Day) = ($logeneralCreation_Date =~ /^(\d{4})-(\d{2})-(\d{2})$/); # Extract information from tag my $general = $record->find_by_tag_name('general'); my $title = $general->find_by_tag_name('title'); $logeneralTitle1= $title->find_by_tag_name('langstring')->as_text; my $description = $general->find_by_tag_name('description'); $logeneralDescription = $description->find_by_tag_name('langstring')->as_text; $logeneralDescription =~ s/\n/ /g; $logeneralLanguage1 = $general->find_by_tag_name('language')->as_text; my $keywordsElement = $general->find_by_tag_name('keywords'); my $logeneralKeywords = $keywordsElement->find_by_tag_name('langstring')->as_text; # Extract information from tag my $lifecycle = $record->find_by_tag_name('lifecycle'); my $version = $lifecycle->find_by_tag_name('version'); $platformVersion = $version->find_by_tag_name('langstring')->as_text; my @contributeElement = $lifecycle->find_by_tag_name('contribute'); foreach my $contribute (@contributeElement) { # We will only take the author information my $roletype = $contribute->find_by_tag_name('role'); my $langstring = $roletype->find_by_tag_name('langstring')->as_text; if ( $langstring eq "author" ) { $role = 'Author'; my $centity = $contribute->find_by_tag_name('centity'); my $entity = $centity->find_by_tag_name('vcard')->as_text; ($personLastname, $personFirstname, $personEmail, $personCompany) = OAIv_parseVcard_matti($entity); } } # Extract information from tag my $technical = $record->find_by_tag_name('technical'); my $platform = $technical->find_by_tag_name('format'); $platformFormat = $technical->find_by_tag_name('langstring')->as_text; # All MATTI are 1-Generate Automatically from MIME Type $platformFormat = 1; $platformLocation_URL = $technical->find_by_tag_name('location')->as_text; $platformLocation_URL =~ tr/ //d; $platformLocation_URL =~ s/^\n(.*)$/$1/; chomp($platformLocation_URL); # ALL MATTI are Java applets $platformType = 64; $platformOS = ''; # Special Java applet tag to go in version.installation_note my $installation_note = $technical->find_by_tag_name('applettag')->as_XML(); # Extract information from tag my $educational = $record->find_by_tag_name('educational'); # Use Learner (end_user_type = 2) my $pedagogyEndUserType = '2'; # Learning context must be mapped to grade levels my $learningcontextElement = $educational->find_by_tag_name('learningcontext'); my $langstring = $learningcontextElement->find_by_tag_name('langstring')->as_text; my @learningcontext = ($langstring =~ /^(.*),\s(.*)$/); $pedagogyLContext = OAIv_findLContext(@learningcontext); my $difficulty = $educational->find_by_tag_name('difficulty')->as_text; my ($difficulty_id) = ($difficulty =~ /^(\d{1})-\w+$/); my @pedagogy_description_element = $educational->find_by_tag_name('description'); foreach my $p_d_e (@pedagogy_description_element) { $pedagogy_description = $p_d_e->find_by_tag_name('langstring')->as_text; } my $interactivity_level = $educational->find_by_tag_name('interactivitylevel')->as_text; my ($interactivity_level_id) = ($interactivity_level =~ /^(\d{1})-\w+$/); # Java Applets my $resource_type_id = 100; if ( $useDatabase ) { # Some specific configuration information for MATTI # Logo my $image = "http://www.smete.org/images/affiliation/matti.gif"; my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong my $collection = 'National Library of Virtual Manipulatives for Interactive Mathematics'; my $collection_reg_key = '{8D09C011-23B6-4F23-A690-6C74EFF5E4C7}'; my $publisher = 'MATTI Associates LLC'; my $publisher_reg_key = '{120E9D92-6F22-4FEC-A9C0-F95C571174BA}'; # Determine if this author already exists in the database (person and entity tables) my $author_reg_key; if ( ! ($author_reg_key = OAIc_personexists($dbh,$personEmail)) ) { printf("Inserting person email=%s\n",$personEmail); my $success = OAIc_insert_person($dbh,$publisher_reg_key,$submitter_key,$personLastname,$personFirstname,$personEmail,$personCompany); $author_reg_key = OAIc_personexists($dbh,$personEmail); } if ( my $general_key = OAIc_loexists($dbh,$logeneralTitle1) ) { my $success = OAIc_update_matti($dbh, $general_key, $installation_note); # my $success = OAIc_update_lo($dbh, $general_key, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, $logeneralKeywords, $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key); $updated = $updated + 1; } else { printf("Inserting new record for %s\n",$logeneralTitle1); my $success = OAIc_insert_lo($dbh, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, $logeneralKeywords, $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key, $difficulty_id, $interactivity_level_id, $pedagogy_description, $resource_type_id); $inserted = $inserted + 1; } } else { # Print Results printf("Title: %s\tDescription: %s\tKeywords: %s\n", $logeneralTitle1,$logeneralDescription, $logeneralKeywords); printf("Creation Date: %s\tPublication Year: %4d\tPublication Month: %02d\n", $logeneralCreation_Date, $logeneralPub_Year, $logeneralPub_Month); printf("Role: %s\n", $role); printf("Firstname: %s\tLastname: %s\tEmail: %s\tOrganization: %s\n", $personFirstname, $personLastname, $personEmail, $personCompany); printf("Language: %s\n", $logeneralLanguage1); printf("Format: %s\tURL: %s\tPlatform: %s\tOS: %s\n", $platformFormat, $platformLocation_URL,$platformType,$platformOS); printf("IntendedEndUserRole: %s\tLearningContext: %s\n", $pedagogyEndUserType, $pedagogyLContext); printf("Pedagogy Description: %s\tDifficulty id: %d\tInteractivity Level id: %d\n", $pedagogy_description, $difficulty_id, $interactivity_level_id); printf("Installation Note: %s\n", $installation_note); } } # end for loop $tree->delete; if ( $useDatabase ) { $dbh->commit; $dbh->disconnect(); } printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0)); exit 0;