Annotation of nsdl/nsdlloncapaorg/harvester.pl, revision 1.1
1.1 ! www 1: #!/usr/local/bin/perl
! 2:
! 3: #
! 4: # lon-capa.pl
! 5: # Parse the LON-CAPA metadata
! 6: #
! 7: # Andy Dong <adong@smete.org> 10/23/2002
! 8: #
! 9: # Contact Gerd Kortemeyer (korte@lite.msu.edu)
! 10:
! 11: use strict;
! 12: use LWP::UserAgent;
! 13: use Getopt::Std;
! 14:
! 15: use DBI;
! 16: use DBD::ODBC;
! 17:
! 18: require OAIcataloging_v2;
! 19:
! 20: # -u flag specifies [u]pdate database; otherwise output to STDOUT
! 21:
! 22: my $usage = << "EOT";
! 23: Usage: lon-capa.pl -u
! 24:
! 25: -u (U)pdate the database
! 26:
! 27: Without -u it simply prints SQL UPDATE statements to STDOUT
! 28: EOT
! 29:
! 30: my %args;
! 31: getopts('u', \%args) || die $usage;
! 32:
! 33: my $useDatabase = 1 if ($args{'u'});
! 34:
! 35: #my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1_dev.odbc';
! 36: my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1.odbc';
! 37: my $DBI_USER='autocataloger';
! 38: my $DBI_PWD='regolatacotua';
! 39: my $dbh;
! 40:
! 41: my $pub_month;
! 42: my $pub_year;
! 43: my @loncapa;
! 44:
! 45: # HTTP requests
! 46:
! 47: my $content;
! 48: my $content_regex = 'File Not Found';
! 49:
! 50: # Configuration
! 51:
! 52: my $debug = 0;
! 53: my $url = 'http://data.lite.msu.edu/cgi-bin/metadata_harvest.pl';
! 54: # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab
! 55: my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu');
! 56:
! 57: # End Configuration
! 58:
! 59: #my $ua = new LWP::UserAgent;
! 60: #$ua->timeout(600);
! 61:
! 62: #my $request = new HTTP::Request GET => $url;
! 63: #$request->authorization_basic('reaper', 'cat4u');
! 64:
! 65: #my $response = $ua->request( $request );
! 66:
! 67: #if ( $response->is_success ) {
! 68: # $content = $response->content;
! 69: # Delete all blank lines
! 70: # $content =~ s/(?<!.)\n//g;
! 71: # Replace all ^M with spaces
! 72: # $content =~ s/
/\s/g;
! 73: # Push the content into an array
! 74: # @loncapa = split /\n/, $content;
! 75: #} else {
! 76: # die 'LON-CAPA request failed: ' . $response->message;
! 77: #}
! 78:
! 79: @loncapa=undef;
! 80: open (LON_FILE, 'metadata_harvest.txt') || die;
! 81:
! 82: while (<LON_FILE>) {
! 83: chomp;
! 84: push(@loncapa,$_);
! 85: }
! 86:
! 87: my %records = ();;
! 88: foreach my $metadata (@loncapa) {
! 89: chomp $metadata;
! 90: my @tkline = split('\|', $metadata);
! 91: my $title = $tkline[0];
! 92: next if ( $title eq '' );
! 93: my $author = $tkline[1];
! 94: next if ( $author eq '' );
! 95: my @authorname = split(' ', $author);
! 96: my $author_fname = $authorname[0];
! 97: my $author_lname = $authorname[1];
! 98: # We have to make an exception for Multimedia Physics which is an organization not a person
! 99: my $object_type;
! 100: if ( $author_lname eq 'Physics' ) {
! 101: $object_type = 'organization';
! 102: } else {
! 103: $object_type = 'person';
! 104: }
! 105: my $subject = $tkline[2];
! 106: next if ( ($subject eq 'Sample') || ($subject eq 'Something') );
! 107: my $resourceurl = 'http://lon-capa.smete.org' . $tkline[3];
! 108: next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ );
! 109: my $keywords = $tkline[4];
! 110: my $version = $tkline[5];
! 111: my $notes = $tkline[6];
! 112: my $abstract = $tkline[7];
! 113: next if ($abstract eq '');
! 114: my $type = $tkline[8];
! 115: my $learning_resource_type;
! 116: if ( $type eq 'problem' ) {
! 117: $learning_resource_type = 114;
! 118: } elsif ( $type eq 'exam' ) {
! 119: $learning_resource_type = 114;
! 120: } elsif ( $type eq 'quiz' ) {
! 121: $learning_resource_type = 114;
! 122: } elsif ( $type eq 'assess' ) {
! 123: $learning_resource_type = 114;
! 124: } elsif ( $type eq 'survey' ) {
! 125: $learning_resource_type = 114;
! 126: } elsif ( $type eq 'form' ) {
! 127: $learning_resource_type = 114;
! 128: } elsif ( $type eq 'library' ) {
! 129: $learning_resource_type = 107;
! 130: } elsif ( $type eq 'page' ) {
! 131: $learning_resource_type = 104;
! 132: } elsif ( $type eq 'sequence' ) {
! 133: $learning_resource_type = 104;
! 134: } elsif ( $type eq 'spreadsheet' ) {
! 135: $learning_resource_type = 114;
! 136: } else {
! 137: $learning_resource_type = 0;
! 138: }
! 139:
! 140: my $media_format;
! 141: if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) {
! 142: $media_format = 70;
! 143: } else {
! 144: $media_format = 0;
! 145: }
! 146:
! 147: my $language = $tkline[9]; # Look only for seniso
! 148: next if ( $language ne 'seniso');
! 149: my $primary_language='en-US';
! 150: my $creation_date = $tkline[10];
! 151: my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ );
! 152: my $revision_date = $tkline[11];
! 153: my $owner = $tkline[12];
! 154: my $rights_description;
! 155: my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain)
! 156: # Public means no login required
! 157:
! 158: if ( $copyright eq 'public' ) {
! 159: $rights_description = 'LON-CAPA Public Resource. No login required.';
! 160: } elsif ($copyright eq 'domain') {
! 161: $rights_description = 'Restricted to certain LON-CAPA domains.';
! 162: } else {
! 163: $rights_description = 'LON-CAPA Default Use Restriction. Login required.';
! 164: }
! 165: # Domain means restricted to a particular LON-CAPA domain
! 166: # Defaults mean access open to any registered LON-CAPA user
! 167: # Private means open only to author of material
! 168: next if ( $copyright eq 'private');
! 169: my $platform = "5"; # HTML Browser (not specified but construed from metadata)
! 170:
! 171: # Connect to database
! 172: if ( $useDatabase ) {
! 173: $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER: ($DBI::err) $DBI::errstr\n";;
! 174: # Configuration information for LON-CAPA
! 175: my $collection_id = OAIc_orgexists($dbh,'LearningOnline Network with CAPA');
! 176: my $submitter_id = OAIc_personexists($dbh,'adong@smete.org');
! 177: my $image = 'http://www.lite.msu.edu/liteani.gif';
! 178: my $cost = 1; # version.purchase_license_type_id
! 179: my $collection = 'LearningOnline Network with CAPA';
! 180: # LON-CAPA has single authors
! 181: my $reg_key;
! 182: if ( $object_type eq 'organization' ) {
! 183: if ( ! ($reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname))) ) {
! 184: printf("Inserting new organization %s\n", join(' ',$author_fname, $author_lname));
! 185: my $success = OAIc_insert_org($dbh,$collection_id,$submitter_id,'',join(' ',$author_fname,$author_lname),'','','','','','','','');
! 186: $reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname));
! 187: }
! 188: } else {
! 189: if ( ! ($reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname))) ) {
! 190: printf("Inserting new person(author) %s\n", join(' ',$author_fname, $author_lname));
! 191: my $success = OAIc_insert_person($dbh,$collection_id,$submitter_id,$author_lname,$author_fname,'','');
! 192: $reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname));
! 193: }
! 194: }
! 195: my $updated;
! 196: my $inserted;
! 197: if ( my $general_key = OAIc_loexists($dbh,$title) ) {
! 198: # Do nothing
! 199: $updated = $updated + 1;
! 200: } else {
! 201: printf("Inserting new record for %s\n",$title);
! 202: my $success = OAIc_insert_lo($dbh, $title, $primary_language, $abstract, $image, $pub_month, $pub_year, $keywords, $submitter_id, $reg_key, $collection_id, $collection_id, $media_format, $platform, , '', $resourceurl, '', 1, $reg_key, $collection_id, $collection_id, '', '', '', $learning_resource_type, $rights_description, $cost);
! 203: $inserted = $inserted + 1;
! 204: }
! 205: }
! 206:
! 207: if (! $useDatabase ) { # Print information if no database updates requested
! 208: printf("Title: %s\n", $title);
! 209: printf("Author First Name: %s\n", $author_fname);
! 210: printf("Author Last Name: %s\n", $author_lname);
! 211: printf("Subject: %s\n", $subject);
! 212: printf("URL: %s\n", $resourceurl);
! 213: printf("Keywords: %s\n", $keywords);
! 214: printf("Version: %s\n", $version);
! 215: printf("Notes: %s\n", $notes);
! 216: printf("Abstract: %s\n", $abstract);
! 217: printf("Learning Resource Type: %d\n", $learning_resource_type);
! 218: printf("Media Format: %d\n", $media_format);
! 219: printf("Primary Language: %s\n", $primary_language);
! 220: printf("Creation Date: %s\n", $creation_date);
! 221: printf("Revision Date: %s\n", $revision_date);
! 222: printf("Copyright: %s\n", $copyright);
! 223: printf("Publication Year: %4d\tPublication Month: %02d\n", $pub_year, $pub_month);
! 224: }
! 225:
! 226: if ( $useDatabase ) {
! 227: $dbh->commit;
! 228: $dbh->disconnect;
! 229: }
! 230:
! 231: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>