#!/usr/local/bin/perl -w # # cstc.pl - Use OAI MHP to harvest metadata from CSTS in oai_ims format # # Written by Andy Dong 11/01/2001 # use strict; use Getopt::Std; use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); use HTTP::Request; use LWP::UserAgent; use XML::Element; use XML::Parser; use XML::TreeBuilder; use DBI; use DBD::ODBC; require OAIvocabulary_v2; require OAIcataloging_v2; # -u flag specifies [u]pdate database; otherwise output to STDOUT my $usage = << "EOT"; Usage: cstc.pl -u -u (U)pdate the database Without -u it simply prints to STDOUT EOT my %args; getopts('u', \%args) || die $usage; my $inserted = 0; my $updated = 0; my $useDatabase = 1 if ($args{'u'}); my $dbh; # Database Configuration if ( $useDatabase ) { print "Updating the database\n"; my $DBI_DSN='dbi:ODBC:mel.odbc'; my $DBI_USER='autocataloger'; my $DBI_PWD='regolatacotua'; $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";; } my $content; my @cstc; # All possible LOM record variables my $logeneralDescription = ""; my $logeneralTitle1 = ""; my $logeneralCreation_Date = ""; my $logeneralPub_Year = ""; my $logeneralPub_Month = ""; my $logeneralPub_Day = ""; my $logeneralLanguage1 = ""; my @logeneralKeywords = (); my $publisher = ""; my $personLastname = ""; my $personFirstname = ""; my $personEmail = ""; my $personCompany = ""; my $platformVersion = ""; my $platformType = ""; my $platformOS = ""; my @pedagogyEndUserRole = (); my $pedagogyLContext = ""; my $platformFormat = ""; my $platformLocation_URL = ""; #my $url = 'http://www.cstc.org/cgi-bin/OAI/CSTC.pl?verb=ListRecords&metadataPrefix=ims1_1'; #my $ua = new LWP::UserAgent; #my $request = HTTP::Request->new('GET', $url); #my $response = $ua->request( $request ); #if ( $response->is_success ) { # $content = $response->content; #} else { # warn 'OAI request failed: ' . $response->message; # exit 1; #} my $tree = XML::TreeBuilder->new(); $tree->parse_file('cstc-111.xml'); my $t0 = [gettimeofday]; my @records = $tree->find_by_tag_name('record'); foreach my $record (@records){ # Extract information from
tag my $header = $record->find_by_tag_name('header'); next if ! $header; $logeneralCreation_Date = $header->find_by_tag_name('datestamp')->as_text; ($logeneralPub_Year, $logeneralPub_Month, $logeneralPub_Day) = ($logeneralCreation_Date =~ /^(\d{4})-(\d{2})-(\d{2})$/); # Extract information from tag my $general = $record->find_by_tag_name('general'); $logeneralTitle1= $general->find_by_tag_name('title')->as_text; $logeneralDescription = $general->find_by_tag_name('description')->as_text; $logeneralDescription =~ s/\n/ /g; my $keywordsElement = $general->find_by_tag_name('keywords'); my @keywordsLangstringElement = $keywordsElement->find_by_tag_name('langstring'); my @logeneralKeywords = (); foreach my $kw (@keywordsLangstringElement) { my $word = $kw->as_text(); my ($spacejunk1,$unpaddedword,$spacejunk2) = ($word =~ /^(\s+)(\w+.*)(\s+)$/); push(@logeneralKeywords,$unpaddedword); } # Extract information from tag my $lifecycle = $record->find_by_tag_name('lifecycle'); $platformVersion = $lifecycle->find_by_tag_name('status')->as_text; # Extract information from tag my $metametadata = $record->find_by_tag_name('metametadata'); my $role = $metametadata->find_by_tag_name('role')->as_text; # We will only take the Contributor information if ( $role eq "Contributor" ) { my $entity = $metametadata->find_by_tag_name('centity')->as_text; ($personLastname, $personFirstname, $personEmail, $personCompany) = OAIv_parseVcard($entity); } $logeneralLanguage1 = $metametadata->find_by_tag_name('language')->as_text; # Extract information from tag my $technical = $record->find_by_tag_name('technical'); $platformFormat = $technical->find_by_tag_name('format')->as_text; # Convert text/html to format 65 if ($platformFormat eq 'text/html') { $platformFormat = 65; } else { $platformFormat = 1; } $platformLocation_URL = $technical->find_by_tag_name('location')->as_text; $platformLocation_URL =~ tr/ //d; $platformLocation_URL =~ s/^\n(.*)$/$1/; chomp($platformLocation_URL); # Platform Type is sometimes null and must be mapped to standard platforms my @requirementsElement = $technical->find_by_tag_name('requirements'); foreach my $req ( @requirementsElement ) { if ( defined (my $typeElement = $req->find_by_tag_name('type')) ) { if ( $req->find_by_tag_name('type')->as_text eq "Platform" ) { if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) { $platformType = OAIv_findPlatform($req->find_by_tag_name('name')->as_text); } } elsif ( $req->find_by_tag_name('type')->as_text eq "Operating System" ) { if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) { $platformOS = $req->find_by_tag_name('name')->as_text; } } } } # Extract information from tag my $educational = $record->find_by_tag_name('educational'); my @intendedenduserroleElement = $educational->find_by_tag_name('intendedenduserrole'); @pedagogyEndUserRole=(); foreach my $ieur (@intendedenduserroleElement) { my $intendedenduserroleLangstringElement = $ieur->find_by_tag_name('langstring'); push(@pedagogyEndUserRole,$intendedenduserroleLangstringElement->content_list()); } # Use Learner (end_user_type = 2) my $pedagogyEndUserType = '2'; # Learning context must be mapped to grade levels my @learningcontextElement = $educational->find_by_tag_name('learningcontext'); my @learningcontext = (); foreach my $lc (@learningcontextElement) { push(@learningcontext,$lc->find_by_tag_name('langstring')->as_text); } $pedagogyLContext = OAIv_findLContext(@learningcontext); my $difficulty_id = 0; my $pedagogy_description = ''; my $interactivity_level_id = 0; my $resource_type_id = 0; if ( $useDatabase ) { # Some specific configuration information for CSTC # Logo my $image = "http://www.smete.org/images/affiliation/cstc.gif"; my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong my $collection = 'Computer Science Teaching Center'; my $collection_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}'; my $publisher = 'Computer Science Teaching Center'; my $publisher_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}'; # Determine if this author already exists in the database (person and entity tables) my $author_reg_key; if ( ! ($author_reg_key = OAIc_personexists($dbh,$personEmail)) ) { printf("Inserting person email=%s\n",$personEmail); my $success = OAIc_insert_person($dbh,$publisher_reg_key,$submitter_key,$personLastname,$personFirstname,$personEmail,$personCompany); $author_reg_key = OAIc_personexists($dbh,$personEmail); } if ( my $general_key = OAIc_loexists($dbh,$logeneralTitle1) ) { my $success = OAIc_update_lo($dbh, $general_key, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key); $updated = $updated + 1; } else { printf("Inserting new record for %s\n",$logeneralTitle1); my $success = OAIc_insert_lo($dbh, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key,$difficulty_id, $interactivity_level_id, $pedagogy_description, $resource_type_id); $inserted = $inserted + 1; } } else { # Print Results printf("Title: %s\tDescription: %s\tKeywords: %s\n", $logeneralTitle1,$logeneralDescription,join(";",@logeneralKeywords)); printf("Creation Date: %s\tPublication Year: %4d\tPublication Month: %02d\n", $logeneralCreation_Date, $logeneralPub_Year, $logeneralPub_Month); printf("Role: %s\n", $role); printf("Firstname: %s\tLastname: %s\tEmail: %s\tOrganization: %s\n", $personFirstname, $personLastname, $personEmail, $personCompany); printf("Language: %s\n", $logeneralLanguage1); printf("Format: %s\tURL: %s\tPlatform: %s\tOS: %s\n", $platformFormat, $platformLocation_URL,$platformType,$platformOS); printf("IntendedEndUserRole: %s\tLearningContext: %s\n", join(";",@pedagogyEndUserRole), $pedagogyLContext); } } # end for loop $tree->delete; if ( $useDatabase ) { $dbh->commit; $dbh->disconnect(); } printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0)); exit 0;