Annotation of nsdl/harvestsmete/dlese-parserecord.pl, revision 1.1

1.1     ! www         1: #!/usr/local/bin/perl -w
        !             2: 
        !             3: #
        !             4: # dlese-parserecord.pl - Parse DLESE Metadata
        !             5: # This program reads a file dlese-identifiers.xml then requests each record and stores in a sub-directory called dlese
        !             6: #
        !             7: # Written by Andy Dong <adong@smete.org> 11/01/2001
        !             8: #
        !             9: 
        !            10: use strict;
        !            11: use Getopt::Std;
        !            12: use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
        !            13: 
        !            14: use HTTP::Request;
        !            15: use LWP::UserAgent;
        !            16: 
        !            17: use XML::Element;
        !            18: use XML::Parser;
        !            19: use XML::TreeBuilder;
        !            20: 
        !            21: require OAIvocabulary_v2;
        !            22: require OAIcataloging_v2;
        !            23: 
        !            24: # -u flag specifies [u]pdate database; otherwise output to STDOUT
        !            25: 
        !            26: my $usage = << "EOT";
        !            27: Usage: dlese-parserecord.pl -u
        !            28: 
        !            29:     -u (U)pdate the database
        !            30: 
        !            31:     Without -u it simply prints to STDOUT
        !            32: EOT
        !            33: 
        !            34: my %args;
        !            35: getopts('u', \%args) || die $usage;
        !            36: 
        !            37: my $inserted = 0;
        !            38: my $updated = 0;          
        !            39: 
        !            40: my $useDatabase = 1 if ($args{'u'});
        !            41: my $dbh;
        !            42: 
        !            43: my $t0 = [gettimeofday];
        !            44: 
        !            45: # Database Configuration
        !            46: if ( $useDatabase ) {
        !            47: 	print "Updating the database\n";
        !            48: 	my $DBI_DSN='dbi:ODBC:mel.odbc';
        !            49: 	my $DBI_USER='autocataloger';
        !            50: 	my $DBI_PWD='regolatacotua';
        !            51: 	$dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";;
        !            52: }
        !            53: 
        !            54: # First parse the dlese-identifiers.xml file to get the list of available records
        !            55: my $tree = XML::TreeBuilder->new();
        !            56: $tree->parse_file('dlese-identifiers.xml');
        !            57: my @identifiers = $tree->find_by_tag_name('identifier');
        !            58: 
        !            59: # Now go grab them and save them to a file
        !            60: foreach my $identifier (@identifiers) {
        !            61: 	my $record = $identifier->as_text();
        !            62: 
        !            63: 	my $getrecord_tree = XML::TreeBuilder->new();
        !            64: 	$getrecord_tree->parse_file("dlese/$record");
        !            65: 
        !            66: 	my $metadata = $getrecord_tree->find_by_tag_name('metadata');
        !            67: 
        !            68: 	# <general>
        !            69: 	my $general_element = $metadata->find_by_tag_name('general');
        !            70: 	my $title_element = $general_element->find_by_tag_name('title');
        !            71: 	my $title = $title_element->find_by_tag_name('langstring')->as_text();
        !            72: 	# Note: DLESE appears to use general.extension.topic for keywords not general.keywords
        !            73: 	my @keywords_list = ();
        !            74: 	my @keywords_element = $general_element->find_by_tag_name('topic');
        !            75: 	foreach my $topics (@keywords_element) {
        !            76: 		push(@keywords_list,$topics->find_by_tag_name('langstring')->as_text());
        !            77: 	}
        !            78: 	my $keywords = join(',',@keywords_list);
        !            79: 	my $description_element = $general_element->find_by_tag_name('description');
        !            80: 	my $description = $description_element->find_by_tag_name('langstring')->as_text();
        !            81: 	my $language = $general_element->find_by_tag_name('language')->as_text();
        !            82: 
        !            83: 	# <metametadata>
        !            84: 	my $metametadata_element = $metadata->find_by_tag_name('metametadata');
        !            85: 	my $catalog_entry_element = $metametadata_element->find_by_tag_name('accession')->as_text();
        !            86: 	my ($pub_year,$pub_month,$pub_day) = ( $catalog_entry_element =~ /^(\d{4})-(\d{2})-(\d{2})$/ );
        !            87: 
        !            88: 	# <technical>
        !            89: 	my $technical_element = $metadata->find_by_tag_name('technical');
        !            90: 	my $location = $technical_element->find_by_tag_name('location')->as_text;
        !            91: 	my $format_element = $technical_element->find_by_tag_name('format');
        !            92: 	my $format = $format_element->find_by_tag_name('langstring')->as_text;
        !            93: 	if ( $format eq 'text/html' ) {
        !            94: 		$format = 65;	# text/html
        !            95: 	} else {
        !            96: 		$format = 0;	# unknown
        !            97: 	}
        !            98: 	my $platform = "5";	# HTML Browser (not specified but construed from metadata)
        !            99: 
        !           100: 	# <lifecycle>
        !           101: 	my $lifecycle_element=$metadata->find_by_tag_name('lifecycle');
        !           102: 	my $version= $lifecycle_element->find_by_tag_name('version');
        !           103: 	my $version_string = $version->find_by_tag_name('langstring')->as_text();
        !           104: 	my @contributor_element = $lifecycle_element->find_by_tag_name('contribute');
        !           105: 
        !           106: 	my @author_reg_key = ();
        !           107: 	my @contact_reg_key = ();
        !           108: 	my @publisher_reg_key = ();
        !           109: 	my @other_reg_key = ();
        !           110: 	my $reg_key;
        !           111: 	foreach my $contributor_item (@contributor_element) {
        !           112: 		my $role_element = $contributor_item->find_by_tag_name('role');
        !           113: 		my $role = $role_element->find_by_tag_name('langstring')->as_text();
        !           114: 		my $role_id;
        !           115: 		if ( $role eq "Author" ) {
        !           116: 			$role_id = 8;
        !           117: 		} elsif ( $role eq "Publisher" ) {
        !           118: 			$role_id = 9;
        !           119: 		} elsif ( $role eq "Contact" ) {
        !           120: 			$role_id = 11;
        !           121: 		} else {
        !           122: 			$role_id = 4;
        !           123: 		}
        !           124: 		my $centity = $contributor_item->find_by_tag_name('centity');
        !           125: 		my $role_extension = $centity->find_by_tag_name('extension');
        !           126: 		my $person_first_name = $role_extension->find_by_tag_name('firstname')->as_text();
        !           127: 		my $person_last_name = $role_extension->find_by_tag_name('lastname')->as_text();
        !           128: 		my $person_middle_name = $role_extension->find_by_tag_name('mi')->as_text();
        !           129: 		my $person_title = $role_extension->find_by_tag_name('nametitle')->as_text();
        !           130: 		my $person_company = $role_extension->find_by_tag_name('org')->as_text();
        !           131: 		my $entity_email_address = $role_extension->find_by_tag_name('email')->as_text();
        !           132: 		my $entity_address = $role_extension->find_by_tag_name('adr')->as_text();
        !           133: 		my $entity_city = $role_extension->find_by_tag_name('city')->as_text();
        !           134: 		my $entity_state = $role_extension->find_by_tag_name('state')->as_text();
        !           135: 		my $entity_postal_code = $role_extension->find_by_tag_name('zip')->as_text();
        !           136: 		my $entity_home_page_url = $role_extension->find_by_tag_name('url')->as_text();
        !           137: 		my $entity_phone = $role_extension->find_by_tag_name('tel')->as_text();
        !           138: 		my $entity_fax = $role_extension->find_by_tag_name('fax')->as_text();
        !           139: 		my $entity_country = $role_extension->find_by_tag_name('country')->as_text();
        !           140: 
        !           141: 		# if there is no $person_first_name, then this is an organization
        !           142: 		my $object_type;
        !           143: 		if ( not $person_first_name ) {
        !           144: 			$object_type = 'organization';
        !           145: 		} else {
        !           146: 			$object_type = 'person';
        !           147: 		}
        !           148: 		if ( $useDatabase ) {
        !           149: 			# DLESE
        !           150: 			my $collection_reg_key = '{CA001C50-77CA-43DC-A761-95207D386EDD}';
        !           151: 			# Andy Dong
        !           152: 			my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong
        !           153: 			# Does this entity exist
        !           154: 			if ( $object_type eq 'organization' ) {
        !           155: 				if ( ! ($reg_key = OAIc_orgexists($dbh,$person_company)) ) {
        !           156: 					my $success = OAIc_insert_org($dbh,$collection_reg_key,$submitter_key,$entity_email_address,$person_company,$entity_address,$entity_city,$entity_state,$entity_postal_code,$entity_home_page_url,$entity_phone,$entity_fax,$entity_country);
        !           157: 		                	$reg_key = OAIc_orgexists($dbh,$person_company);
        !           158: 					printf("Inserted new organization %s\n", $reg_key);
        !           159: 				}
        !           160: 			} else {
        !           161: 				if ( ! ($reg_key = OAIc_personexists($dbh,$entity_email_address)) ) {
        !           162: 					my $success = OAIc_insert_person_full($dbh,$collection_reg_key,$submitter_key,$person_last_name,$person_first_name,$entity_email_address,$person_company,$person_middle_name,$person_title,$entity_address,$entity_city,$entity_state,$entity_postal_code,$entity_home_page_url,$entity_phone,$entity_fax,$entity_country);
        !           163: 			                $reg_key = OAIc_personexists($dbh,$entity_email_address);
        !           164: 					printf("Inserted new person %s\n", $reg_key);
        !           165: 				}
        !           166: 			} 
        !           167: 			# Push author, contact and publisher id into array for insert/update into the database
        !           168: 			if ( $role_id == 8 ) {
        !           169: 				push(@author_reg_key,$reg_key);
        !           170: 			} elsif ( $role_id == 9 ) {
        !           171: 				push(@publisher_reg_key,$reg_key);
        !           172: 			} elsif ( $role_id == 11 ) {
        !           173: 				push(@contact_reg_key,$reg_key);
        !           174: 			} else {
        !           175: 				push(@other_reg_key,$reg_key);
        !           176: 			}
        !           177: 		} else {
        !           178: 		printf("Author/Publisher/Contact Information (%d): %s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $role_id, $person_first_name, $person_last_name, $person_middle_name, $person_title, $person_company, $entity_email_address, $entity_address, $entity_city, $entity_state, $entity_postal_code, $entity_home_page_url, $entity_phone, $entity_fax, $entity_country);
        !           179: 		printf("This is a(n) %s.\n", $object_type);
        !           180: 		}
        !           181: 	}
        !           182: 
        !           183: 	# <educational>
        !           184: 	my $educational_element = $metadata->find_by_tag_name('educational');
        !           185: 	my @learning_context_element = $educational_element->find_by_tag_name('learningcontext');
        !           186: 	my @learning_context_list = ();
        !           187: 	foreach my $learning_context_item (@learning_context_element) {
        !           188: 		push(@learning_context_list,OAIv_findLContext($learning_context_item->find_by_tag_name('langstring')->as_text()));
        !           189: 	}
        !           190: 	my $learning_context;
        !           191: 	if (@learning_context_list) {
        !           192: 		$learning_context = join(';',@learning_context_list);
        !           193: 	}
        !           194: 	my @intendedenduserrole_element = $educational_element->find_by_tag_name('intendedenduserrole');
        !           195: 	my $intendedenduserrole_id = 0;
        !           196: 	foreach my $intendedenduserrole_item (@intendedenduserrole_element) {
        !           197: 		my $intendedenduserrole = $intendedenduserrole_item->find_by_tag_name('langstring')->as_text();
        !           198: 		if ($intendedenduserrole eq 'Learner') {
        !           199: 			$intendedenduserrole_id = 2;
        !           200: 		} elsif ($intendedenduserrole eq 'Teacher') {
        !           201: 			$intendedenduserrole_id = 1;
        !           202: 		}
        !           203: 	}
        !           204: 
        !           205: 	# <rights>
        !           206: 	my $rights_element = $metadata->find_by_tag_name('rights');
        !           207: 	my $cost_element = $rights_element->find_by_tag_name('cost');
        !           208: 	my $cost = $cost_element->find_by_tag_name('langstring')->as_text();
        !           209: 	# This is version.purchase_license_type_id
        !           210: 	if ( $cost eq 'No') {
        !           211: 		$cost= 1;
        !           212: 	} else {
        !           213: 		$cost = 3;
        !           214: 	}
        !           215: 	my $rights_description_element = $rights_element->find_by_tag_name('description');
        !           216: 	my $rights_description = $rights_description_element->find_by_tag_name('langstring')->as_text();
        !           217: 
        !           218: 	if ( $useDatabase ) {
        !           219: 		# General configuration information for DLESE
        !           220: 		my $image = "http://www.smete.org/images/affiliation/dlese.gif";
        !           221: 		my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong
        !           222: 		my $collection = 'Digital Library for Earth System Education';
        !           223: 		my $collection_reg_key = '{CA001C50-77CA-43DC-A761-95207D386EDD}';
        !           224: 		my $publisher = 'Digital Library for Earth System Education';
        !           225: 		push(@publisher_reg_key,'{CA001C50-77CA-43DC-A761-95207D386EDD}');
        !           226:         	if ( my $general_key = OAIc_loexists($dbh,$title) ) {
        !           227: #		my $success = OAIc_update_lo_dlese($dbh, $general_key, $title, $language, $description, $image, $pub_month, $pub_year, $keywords, $submitter_key, $publisher, $collection, $format, $platform, $location, $learning_context, $intendedenduserrole_id, $collection_reg_key, $rights_description, $cost);
        !           228: 		my $success = OAIc_update_lo_dlese($dbh, $general_key, $learning_context, $intendedenduserrole_id, $rights_description, $cost);
        !           229:                 $updated = $updated + 1;
        !           230: 		} else { 
        !           231: 		printf("Inserting new record for %s\n",$title);
        !           232: 		my $success = OAIc_insert_lo_dlese($dbh, $title, $language, $description, $image, $pub_month, $pub_year, $keywords, $submitter_key, $publisher, $collection, $format, $platform, $location, $learning_context, $intendedenduserrole_id, $collection_reg_key, $rights_description, $cost);
        !           233: 		my $id = OAIc_loexists($dbh,$title);
        !           234: 		# INSERT INTO [needs_3_1]..learning_object_contributor
        !           235: 		# Add author contribution (ct_key=8)                  
        !           236: 			foreach my $author (@author_reg_key) { 
        !           237: 			printf("Adding author contribution for %s\n", $author);
        !           238: 			my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,8)}, undef, $id,$author);
        !           239: 			}
        !           240: 		# Add publisher contribution (ct_key=9)
        !           241: 			foreach my $publisher_id (@publisher_reg_key) {
        !           242: 			printf("Adding publisher contribution for %s\n", $publisher_id);
        !           243: 			my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,9)}, undef, $id,$publisher_id);
        !           244: 			}
        !           245: 		# Add contact contribution (ct_key=11)
        !           246: 			foreach my $contact (@contact_reg_key) {
        !           247: 			printf("Adding contact contribution for %s\n", $contact);
        !           248: 			my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,11)}, undef, $id, $contact);
        !           249: 			}
        !           250: 		# Add other contribution (ct_key=4)
        !           251: 			foreach my $other (@other_reg_key) {
        !           252: 			printf("Adding other contribution for %s\n", $other);
        !           253: 			my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,4)}, undef, $id, $other);
        !           254: 			}
        !           255: 		# Add collection contribution (ct_key=12)
        !           256: 			my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,12)}, undef, $id, $collection_reg_key);
        !           257: 			if (!$rc) {
        !           258: 			$dbh->rollback;
        !           259: 			$dbh->disconnect;
        !           260: 			die "Unable to insert new record into learning_object_contributor: $dbh->errstr\n";
        !           261: 		}
        !           262:                 $inserted = $inserted + 1;
        !           263: 		}
        !           264: 		$dbh->commit;
        !           265: 		$getrecord_tree->delete;
        !           266: 	} else {
        !           267: 	# Print parsed data
        !           268: 	printf("Title: %s\tKeywords: %s\tDescription: %s\n", $title,$keywords,$description);
        !           269: 	printf("Publication %d-%d\n", $pub_month, $pub_year);
        !           270: 	printf("Format: %s\tPlatform: %s\tLocation: %s\n", $format, $platform, $location);
        !           271: 	printf("Learning Context: %s\tIntended End User Role: %d\n", $learning_context,$intendedenduserrole_id);
        !           272: 	printf("Cost: %s\tCopyright: %s\n", $cost, substr($rights_description,0,1024));
        !           273: 	$getrecord_tree->delete;
        !           274: 	}
        !           275: 
        !           276: }
        !           277: 
        !           278: $tree->delete;
        !           279: 
        !           280: if ( $useDatabase ) {               
        !           281: 	$dbh->disconnect();
        !           282: }
        !           283: 
        !           284: printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0));
        !           285: 
        !           286: exit 0;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>