Annotation of nsdl/harvestsmete/cstc.pl, revision 1.1

1.1     ! www         1: #!/usr/local/bin/perl -w
        !             2: 
        !             3: #
        !             4: # cstc.pl - Use OAI MHP to harvest metadata from CSTS in oai_ims format
        !             5: #
        !             6: # Written by Andy Dong <adong@smete.org> 11/01/2001
        !             7: #
        !             8: 
        !             9: use strict;
        !            10: use Getopt::Std;
        !            11: use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
        !            12: 
        !            13: use HTTP::Request;
        !            14: use LWP::UserAgent;
        !            15: 
        !            16: use XML::Element;
        !            17: use XML::Parser;
        !            18: use XML::TreeBuilder;
        !            19: 
        !            20: use DBI;
        !            21: use DBD::ODBC;
        !            22: 
        !            23: require OAIvocabulary_v2;
        !            24: require OAIcataloging_v2;
        !            25: 
        !            26: # -u flag specifies [u]pdate database; otherwise output to STDOUT
        !            27: 
        !            28: my $usage = << "EOT";
        !            29: Usage: cstc.pl -u
        !            30: 
        !            31:     -u (U)pdate the database
        !            32: 
        !            33:     Without -u it simply prints to STDOUT
        !            34: EOT
        !            35: 
        !            36: my %args;
        !            37: getopts('u', \%args) || die $usage;
        !            38: 
        !            39: my $inserted = 0;
        !            40: my $updated = 0;
        !            41: 
        !            42: my $useDatabase = 1 if ($args{'u'});
        !            43: my $dbh;
        !            44: # Database Configuration
        !            45: if ( $useDatabase ) {
        !            46: 	print "Updating the database\n";
        !            47: 	my $DBI_DSN='dbi:ODBC:mel.odbc';
        !            48: 	my $DBI_USER='autocataloger';
        !            49: 	my $DBI_PWD='regolatacotua';
        !            50: 	$dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";;
        !            51: }
        !            52: 
        !            53: my $content;
        !            54: my @cstc;
        !            55: 
        !            56: # All possible LOM record variables
        !            57: my $logeneralDescription = "";
        !            58: my $logeneralTitle1 = "";
        !            59: my $logeneralCreation_Date = "";
        !            60: my $logeneralPub_Year = "";
        !            61: my $logeneralPub_Month = "";
        !            62: my $logeneralPub_Day = "";
        !            63: my $logeneralLanguage1 = "";
        !            64: my @logeneralKeywords = ();
        !            65: my $publisher = "";
        !            66: my $personLastname = "";
        !            67: my $personFirstname = "";
        !            68: my $personEmail = "";
        !            69: my $personCompany = "";
        !            70: my $platformVersion = "";
        !            71: my $platformType = "";
        !            72: my $platformOS = "";
        !            73: my @pedagogyEndUserRole = ();
        !            74: my $pedagogyLContext = "";
        !            75: my $platformFormat = "";
        !            76: my $platformLocation_URL = "";
        !            77: 
        !            78: #my $url = 'http://www.cstc.org/cgi-bin/OAI/CSTC.pl?verb=ListRecords&metadataPrefix=ims1_1';
        !            79: 
        !            80: #my $ua = new LWP::UserAgent;
        !            81: #my $request = HTTP::Request->new('GET', $url);
        !            82: #my $response = $ua->request( $request );
        !            83: 
        !            84: #if ( $response->is_success ) {
        !            85: #	$content = $response->content;
        !            86: #} else {
        !            87: #	warn 'OAI request failed: ' . $response->message;
        !            88: #	exit 1;
        !            89: #}
        !            90: 
        !            91: my $tree = XML::TreeBuilder->new();
        !            92: $tree->parse_file('cstc-111.xml');
        !            93: 
        !            94: my $t0 = [gettimeofday];
        !            95: 
        !            96: my @records = $tree->find_by_tag_name('record');
        !            97: foreach my $record (@records){
        !            98: 	# Extract information from <header> tag
        !            99: 	my $header = $record->find_by_tag_name('header');
        !           100: 	next if ! $header;
        !           101: 	$logeneralCreation_Date = $header->find_by_tag_name('datestamp')->as_text;
        !           102: 	($logeneralPub_Year, $logeneralPub_Month, $logeneralPub_Day) = ($logeneralCreation_Date =~ /^(\d{4})-(\d{2})-(\d{2})$/);
        !           103: 	# Extract information from <general> tag
        !           104: 	my $general = $record->find_by_tag_name('general');
        !           105: 	$logeneralTitle1= $general->find_by_tag_name('title')->as_text;
        !           106: 	$logeneralDescription = $general->find_by_tag_name('description')->as_text;
        !           107: 	$logeneralDescription =~ s/\n/ /g;
        !           108: 	my $keywordsElement = $general->find_by_tag_name('keywords');
        !           109: 	my @keywordsLangstringElement = $keywordsElement->find_by_tag_name('langstring');
        !           110: 	my @logeneralKeywords = ();
        !           111: 	foreach my $kw (@keywordsLangstringElement) {
        !           112: 		my $word = $kw->as_text();
        !           113: 		my ($spacejunk1,$unpaddedword,$spacejunk2) = ($word =~ /^(\s+)(\w+.*)(\s+)$/);
        !           114: 		push(@logeneralKeywords,$unpaddedword);
        !           115: 	}
        !           116: 	# Extract information from <lifecycle> tag
        !           117: 	my $lifecycle = $record->find_by_tag_name('lifecycle');
        !           118: 	$platformVersion = $lifecycle->find_by_tag_name('status')->as_text;
        !           119: 	# Extract information from <metametadata> tag
        !           120: 	my $metametadata = $record->find_by_tag_name('metametadata');
        !           121: 	my $role = $metametadata->find_by_tag_name('role')->as_text;
        !           122: 	# We will only take the Contributor information
        !           123: 	if ( $role eq "Contributor" ) {
        !           124: 		my $entity = $metametadata->find_by_tag_name('centity')->as_text;
        !           125: 		($personLastname, $personFirstname, $personEmail, $personCompany) = OAIv_parseVcard($entity);
        !           126: 	}
        !           127: 	$logeneralLanguage1 = $metametadata->find_by_tag_name('language')->as_text;
        !           128: 	# Extract information from <technical> tag
        !           129: 	my $technical = $record->find_by_tag_name('technical');
        !           130: 	$platformFormat = $technical->find_by_tag_name('format')->as_text;
        !           131: 	# Convert text/html to format 65
        !           132: 	if ($platformFormat eq 'text/html') {
        !           133: 		$platformFormat = 65;
        !           134: 	} else {
        !           135: 		$platformFormat = 1;
        !           136: 	}
        !           137: 	$platformLocation_URL = $technical->find_by_tag_name('location')->as_text;
        !           138: 	$platformLocation_URL =~ tr/ //d;
        !           139: 	$platformLocation_URL =~ s/^\n(.*)$/$1/;
        !           140: 	chomp($platformLocation_URL);
        !           141: #	Platform Type is sometimes null and must be mapped to standard platforms
        !           142: 	my @requirementsElement = $technical->find_by_tag_name('requirements');
        !           143: 	foreach my $req ( @requirementsElement ) {
        !           144: 		if ( defined (my $typeElement = $req->find_by_tag_name('type')) ) {
        !           145: 			if ( $req->find_by_tag_name('type')->as_text eq "Platform" ) {
        !           146: 				if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
        !           147: 					$platformType = OAIv_findPlatform($req->find_by_tag_name('name')->as_text);
        !           148: 				}
        !           149: 			} elsif ( $req->find_by_tag_name('type')->as_text eq "Operating System" ) {
        !           150: 				if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
        !           151: 					$platformOS = $req->find_by_tag_name('name')->as_text;
        !           152: 				}
        !           153: 			}
        !           154: 		}
        !           155: 	}
        !           156: 	# Extract information from <educational> tag
        !           157: 	my $educational = $record->find_by_tag_name('educational');
        !           158: 	my @intendedenduserroleElement = $educational->find_by_tag_name('intendedenduserrole');
        !           159: 	@pedagogyEndUserRole=();
        !           160: 	foreach my $ieur (@intendedenduserroleElement) {
        !           161: 		my $intendedenduserroleLangstringElement = $ieur->find_by_tag_name('langstring');
        !           162: 		push(@pedagogyEndUserRole,$intendedenduserroleLangstringElement->content_list());
        !           163: 	}
        !           164: 	# Use Learner (end_user_type = 2)
        !           165: 	my $pedagogyEndUserType = '2';
        !           166: # 	Learning context must be mapped to grade levels
        !           167: 	my @learningcontextElement = $educational->find_by_tag_name('learningcontext');
        !           168: 	my @learningcontext = ();
        !           169: 	foreach my $lc (@learningcontextElement) {
        !           170: 		push(@learningcontext,$lc->find_by_tag_name('langstring')->as_text);
        !           171: 	}
        !           172: 	$pedagogyLContext = OAIv_findLContext(@learningcontext);
        !           173: 
        !           174: 	my $difficulty_id = 0;
        !           175: 	my $pedagogy_description = '';
        !           176: 	my $interactivity_level_id = 0;
        !           177: 	my $resource_type_id = 0;
        !           178: 
        !           179: if ( $useDatabase ) {
        !           180: 	# Some specific configuration information for CSTC
        !           181: 	# Logo
        !           182: 	my $image = "http://www.smete.org/images/affiliation/cstc.gif";
        !           183: 	my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong
        !           184: 	my $collection = 'Computer Science Teaching Center';
        !           185: 	my $collection_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
        !           186: 	my $publisher = 'Computer Science Teaching Center';
        !           187: 	my $publisher_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
        !           188: 	# Determine if this author already exists in the database (person and entity tables)
        !           189: 	my $author_reg_key;
        !           190: 	if ( ! ($author_reg_key = OAIc_personexists($dbh,$personEmail)) ) {
        !           191: 		printf("Inserting person email=%s\n",$personEmail);
        !           192: 		my $success = OAIc_insert_person($dbh,$publisher_reg_key,$submitter_key,$personLastname,$personFirstname,$personEmail,$personCompany);
        !           193: 		$author_reg_key = OAIc_personexists($dbh,$personEmail);
        !           194: 	}
        !           195: 	if ( my $general_key = OAIc_loexists($dbh,$logeneralTitle1) ) {
        !           196: 		my $success = OAIc_update_lo($dbh, $general_key, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key);
        !           197: 		$updated = $updated + 1;
        !           198: 		} else {
        !           199: 		printf("Inserting new record for %s\n",$logeneralTitle1);
        !           200: 		my $success = OAIc_insert_lo($dbh, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key,$difficulty_id, $interactivity_level_id, $pedagogy_description, $resource_type_id);
        !           201: 		$inserted = $inserted + 1;
        !           202:         	}
        !           203: 	} else {
        !           204: 		# Print Results
        !           205: 		printf("Title: %s\tDescription: %s\tKeywords: %s\n", $logeneralTitle1,$logeneralDescription,join(";",@logeneralKeywords));
        !           206: 		printf("Creation Date: %s\tPublication Year: %4d\tPublication Month: %02d\n", $logeneralCreation_Date, $logeneralPub_Year, $logeneralPub_Month);
        !           207: 		printf("Role: %s\n", $role);
        !           208: 		printf("Firstname: %s\tLastname: %s\tEmail: %s\tOrganization: %s\n", $personFirstname, $personLastname, $personEmail, $personCompany);
        !           209: 		printf("Language: %s\n", $logeneralLanguage1);
        !           210: 		printf("Format: %s\tURL: %s\tPlatform: %s\tOS: %s\n", $platformFormat, $platformLocation_URL,$platformType,$platformOS);
        !           211: 		printf("IntendedEndUserRole: %s\tLearningContext: %s\n", join(";",@pedagogyEndUserRole), $pedagogyLContext);
        !           212: 	}
        !           213: } # end for loop
        !           214: $tree->delete;
        !           215: 
        !           216: if ( $useDatabase ) {
        !           217: 	$dbh->commit;
        !           218: 	$dbh->disconnect();
        !           219: }
        !           220: 
        !           221: printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0));
        !           222: 
        !           223: exit 0;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>