#!/usr/local/bin/perl -w # # dlese-parserecord.pl - Parse DLESE Metadata # This program reads a file dlese-identifiers.xml then requests each record and stores in a sub-directory called dlese # # Written by Andy Dong 11/01/2001 # use strict; use Getopt::Std; use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); use HTTP::Request; use LWP::UserAgent; use XML::Element; use XML::Parser; use XML::TreeBuilder; require OAIvocabulary_v2; require OAIcataloging_v2; # -u flag specifies [u]pdate database; otherwise output to STDOUT my $usage = << "EOT"; Usage: dlese-parserecord.pl -u -u (U)pdate the database Without -u it simply prints to STDOUT EOT my %args; getopts('u', \%args) || die $usage; my $inserted = 0; my $updated = 0; my $useDatabase = 1 if ($args{'u'}); my $dbh; my $t0 = [gettimeofday]; # Database Configuration if ( $useDatabase ) { print "Updating the database\n"; my $DBI_DSN='dbi:ODBC:mel.odbc'; my $DBI_USER='autocataloger'; my $DBI_PWD='regolatacotua'; $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";; } # First parse the dlese-identifiers.xml file to get the list of available records my $tree = XML::TreeBuilder->new(); $tree->parse_file('dlese-identifiers.xml'); my @identifiers = $tree->find_by_tag_name('identifier'); # Now go grab them and save them to a file foreach my $identifier (@identifiers) { my $record = $identifier->as_text(); my $getrecord_tree = XML::TreeBuilder->new(); $getrecord_tree->parse_file("dlese/$record"); my $metadata = $getrecord_tree->find_by_tag_name('metadata'); # my $general_element = $metadata->find_by_tag_name('general'); my $title_element = $general_element->find_by_tag_name('title'); my $title = $title_element->find_by_tag_name('langstring')->as_text(); # Note: DLESE appears to use general.extension.topic for keywords not general.keywords my @keywords_list = (); my @keywords_element = $general_element->find_by_tag_name('topic'); foreach my $topics (@keywords_element) { push(@keywords_list,$topics->find_by_tag_name('langstring')->as_text()); } my $keywords = join(',',@keywords_list); my $description_element = $general_element->find_by_tag_name('description'); my $description = $description_element->find_by_tag_name('langstring')->as_text(); my $language = $general_element->find_by_tag_name('language')->as_text(); # my $metametadata_element = $metadata->find_by_tag_name('metametadata'); my $catalog_entry_element = $metametadata_element->find_by_tag_name('accession')->as_text(); my ($pub_year,$pub_month,$pub_day) = ( $catalog_entry_element =~ /^(\d{4})-(\d{2})-(\d{2})$/ ); # my $technical_element = $metadata->find_by_tag_name('technical'); my $location = $technical_element->find_by_tag_name('location')->as_text; my $format_element = $technical_element->find_by_tag_name('format'); my $format = $format_element->find_by_tag_name('langstring')->as_text; if ( $format eq 'text/html' ) { $format = 65; # text/html } else { $format = 0; # unknown } my $platform = "5"; # HTML Browser (not specified but construed from metadata) # my $lifecycle_element=$metadata->find_by_tag_name('lifecycle'); my $version= $lifecycle_element->find_by_tag_name('version'); my $version_string = $version->find_by_tag_name('langstring')->as_text(); my @contributor_element = $lifecycle_element->find_by_tag_name('contribute'); my @author_reg_key = (); my @contact_reg_key = (); my @publisher_reg_key = (); my @other_reg_key = (); my $reg_key; foreach my $contributor_item (@contributor_element) { my $role_element = $contributor_item->find_by_tag_name('role'); my $role = $role_element->find_by_tag_name('langstring')->as_text(); my $role_id; if ( $role eq "Author" ) { $role_id = 8; } elsif ( $role eq "Publisher" ) { $role_id = 9; } elsif ( $role eq "Contact" ) { $role_id = 11; } else { $role_id = 4; } my $centity = $contributor_item->find_by_tag_name('centity'); my $role_extension = $centity->find_by_tag_name('extension'); my $person_first_name = $role_extension->find_by_tag_name('firstname')->as_text(); my $person_last_name = $role_extension->find_by_tag_name('lastname')->as_text(); my $person_middle_name = $role_extension->find_by_tag_name('mi')->as_text(); my $person_title = $role_extension->find_by_tag_name('nametitle')->as_text(); my $person_company = $role_extension->find_by_tag_name('org')->as_text(); my $entity_email_address = $role_extension->find_by_tag_name('email')->as_text(); my $entity_address = $role_extension->find_by_tag_name('adr')->as_text(); my $entity_city = $role_extension->find_by_tag_name('city')->as_text(); my $entity_state = $role_extension->find_by_tag_name('state')->as_text(); my $entity_postal_code = $role_extension->find_by_tag_name('zip')->as_text(); my $entity_home_page_url = $role_extension->find_by_tag_name('url')->as_text(); my $entity_phone = $role_extension->find_by_tag_name('tel')->as_text(); my $entity_fax = $role_extension->find_by_tag_name('fax')->as_text(); my $entity_country = $role_extension->find_by_tag_name('country')->as_text(); # if there is no $person_first_name, then this is an organization my $object_type; if ( not $person_first_name ) { $object_type = 'organization'; } else { $object_type = 'person'; } if ( $useDatabase ) { # DLESE my $collection_reg_key = '{CA001C50-77CA-43DC-A761-95207D386EDD}'; # Andy Dong my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong # Does this entity exist if ( $object_type eq 'organization' ) { if ( ! ($reg_key = OAIc_orgexists($dbh,$person_company)) ) { my $success = OAIc_insert_org($dbh,$collection_reg_key,$submitter_key,$entity_email_address,$person_company,$entity_address,$entity_city,$entity_state,$entity_postal_code,$entity_home_page_url,$entity_phone,$entity_fax,$entity_country); $reg_key = OAIc_orgexists($dbh,$person_company); printf("Inserted new organization %s\n", $reg_key); } } else { if ( ! ($reg_key = OAIc_personexists($dbh,$entity_email_address)) ) { my $success = OAIc_insert_person_full($dbh,$collection_reg_key,$submitter_key,$person_last_name,$person_first_name,$entity_email_address,$person_company,$person_middle_name,$person_title,$entity_address,$entity_city,$entity_state,$entity_postal_code,$entity_home_page_url,$entity_phone,$entity_fax,$entity_country); $reg_key = OAIc_personexists($dbh,$entity_email_address); printf("Inserted new person %s\n", $reg_key); } } # Push author, contact and publisher id into array for insert/update into the database if ( $role_id == 8 ) { push(@author_reg_key,$reg_key); } elsif ( $role_id == 9 ) { push(@publisher_reg_key,$reg_key); } elsif ( $role_id == 11 ) { push(@contact_reg_key,$reg_key); } else { push(@other_reg_key,$reg_key); } } else { printf("Author/Publisher/Contact Information (%d): %s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $role_id, $person_first_name, $person_last_name, $person_middle_name, $person_title, $person_company, $entity_email_address, $entity_address, $entity_city, $entity_state, $entity_postal_code, $entity_home_page_url, $entity_phone, $entity_fax, $entity_country); printf("This is a(n) %s.\n", $object_type); } } # my $educational_element = $metadata->find_by_tag_name('educational'); my @learning_context_element = $educational_element->find_by_tag_name('learningcontext'); my @learning_context_list = (); foreach my $learning_context_item (@learning_context_element) { push(@learning_context_list,OAIv_findLContext($learning_context_item->find_by_tag_name('langstring')->as_text())); } my $learning_context; if (@learning_context_list) { $learning_context = join(';',@learning_context_list); } my @intendedenduserrole_element = $educational_element->find_by_tag_name('intendedenduserrole'); my $intendedenduserrole_id = 0; foreach my $intendedenduserrole_item (@intendedenduserrole_element) { my $intendedenduserrole = $intendedenduserrole_item->find_by_tag_name('langstring')->as_text(); if ($intendedenduserrole eq 'Learner') { $intendedenduserrole_id = 2; } elsif ($intendedenduserrole eq 'Teacher') { $intendedenduserrole_id = 1; } } # my $rights_element = $metadata->find_by_tag_name('rights'); my $cost_element = $rights_element->find_by_tag_name('cost'); my $cost = $cost_element->find_by_tag_name('langstring')->as_text(); # This is version.purchase_license_type_id if ( $cost eq 'No') { $cost= 1; } else { $cost = 3; } my $rights_description_element = $rights_element->find_by_tag_name('description'); my $rights_description = $rights_description_element->find_by_tag_name('langstring')->as_text(); if ( $useDatabase ) { # General configuration information for DLESE my $image = "http://www.smete.org/images/affiliation/dlese.gif"; my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong my $collection = 'Digital Library for Earth System Education'; my $collection_reg_key = '{CA001C50-77CA-43DC-A761-95207D386EDD}'; my $publisher = 'Digital Library for Earth System Education'; push(@publisher_reg_key,'{CA001C50-77CA-43DC-A761-95207D386EDD}'); if ( my $general_key = OAIc_loexists($dbh,$title) ) { # my $success = OAIc_update_lo_dlese($dbh, $general_key, $title, $language, $description, $image, $pub_month, $pub_year, $keywords, $submitter_key, $publisher, $collection, $format, $platform, $location, $learning_context, $intendedenduserrole_id, $collection_reg_key, $rights_description, $cost); my $success = OAIc_update_lo_dlese($dbh, $general_key, $learning_context, $intendedenduserrole_id, $rights_description, $cost); $updated = $updated + 1; } else { printf("Inserting new record for %s\n",$title); my $success = OAIc_insert_lo_dlese($dbh, $title, $language, $description, $image, $pub_month, $pub_year, $keywords, $submitter_key, $publisher, $collection, $format, $platform, $location, $learning_context, $intendedenduserrole_id, $collection_reg_key, $rights_description, $cost); my $id = OAIc_loexists($dbh,$title); # INSERT INTO [needs_3_1]..learning_object_contributor # Add author contribution (ct_key=8) foreach my $author (@author_reg_key) { printf("Adding author contribution for %s\n", $author); my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,8)}, undef, $id,$author); } # Add publisher contribution (ct_key=9) foreach my $publisher_id (@publisher_reg_key) { printf("Adding publisher contribution for %s\n", $publisher_id); my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,9)}, undef, $id,$publisher_id); } # Add contact contribution (ct_key=11) foreach my $contact (@contact_reg_key) { printf("Adding contact contribution for %s\n", $contact); my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,11)}, undef, $id, $contact); } # Add other contribution (ct_key=4) foreach my $other (@other_reg_key) { printf("Adding other contribution for %s\n", $other); my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,4)}, undef, $id, $other); } # Add collection contribution (ct_key=12) my $rc = $dbh->do(q{INSERT INTO learning_object_contributor (learning_object_id, entity_id, role_id) VALUES (?,?,12)}, undef, $id, $collection_reg_key); if (!$rc) { $dbh->rollback; $dbh->disconnect; die "Unable to insert new record into learning_object_contributor: $dbh->errstr\n"; } $inserted = $inserted + 1; } $dbh->commit; $getrecord_tree->delete; } else { # Print parsed data printf("Title: %s\tKeywords: %s\tDescription: %s\n", $title,$keywords,$description); printf("Publication %d-%d\n", $pub_month, $pub_year); printf("Format: %s\tPlatform: %s\tLocation: %s\n", $format, $platform, $location); printf("Learning Context: %s\tIntended End User Role: %d\n", $learning_context,$intendedenduserrole_id); printf("Cost: %s\tCopyright: %s\n", $cost, substr($rights_description,0,1024)); $getrecord_tree->delete; } } $tree->delete; if ( $useDatabase ) { $dbh->disconnect(); } printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0)); exit 0;