Annotation of nsdl/nsdlloncapaorg/harvester.pl, revision 1.1

1.1     ! www         1: #!/usr/local/bin/perl
        !             2: 
        !             3: #
        !             4: # lon-capa.pl
        !             5: # Parse the LON-CAPA metadata
        !             6: #
        !             7: # Andy Dong <adong@smete.org> 10/23/2002
        !             8: #
        !             9: # Contact Gerd Kortemeyer (korte@lite.msu.edu)
        !            10: 
        !            11: use strict;
        !            12: use LWP::UserAgent;
        !            13: use Getopt::Std;
        !            14: 
        !            15: use DBI;
        !            16: use DBD::ODBC;
        !            17: 
        !            18: require OAIcataloging_v2;
        !            19: 
        !            20: # -u flag specifies [u]pdate database; otherwise output to STDOUT
        !            21: 
        !            22: my $usage = << "EOT";
        !            23: Usage: lon-capa.pl -u
        !            24: 
        !            25:     -u (U)pdate the database
        !            26: 
        !            27:     Without -u it simply prints SQL UPDATE statements to STDOUT
        !            28: EOT
        !            29: 
        !            30: my %args;
        !            31: getopts('u', \%args) || die $usage;
        !            32: 
        !            33: my $useDatabase = 1 if ($args{'u'});
        !            34: 
        !            35: #my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1_dev.odbc';
        !            36: my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1.odbc';
        !            37: my $DBI_USER='autocataloger';
        !            38: my $DBI_PWD='regolatacotua';
        !            39: my $dbh;
        !            40: 
        !            41: my $pub_month;
        !            42: my $pub_year;
        !            43: my @loncapa;
        !            44: 
        !            45: # HTTP requests
        !            46: 
        !            47: my $content;
        !            48: my $content_regex = 'File Not Found';
        !            49: 
        !            50: # Configuration
        !            51: 
        !            52: my $debug = 0;
        !            53: my $url = 'http://data.lite.msu.edu/cgi-bin/metadata_harvest.pl';
        !            54: # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab
        !            55: my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu');
        !            56: 
        !            57: # End Configuration
        !            58: 
        !            59: #my $ua = new LWP::UserAgent;
        !            60: #$ua->timeout(600);
        !            61: 
        !            62: #my $request = new HTTP::Request GET => $url;
        !            63: #$request->authorization_basic('reaper', 'cat4u');
        !            64: 
        !            65: #my $response = $ua->request( $request );
        !            66: 
        !            67: #if ( $response->is_success ) {
        !            68: #	$content = $response->content;
        !            69: # Delete all blank lines
        !            70: #	$content =~ s/(?<!.)\n//g;
        !            71: # Replace all ^M with spaces
        !            72: #	$content =~ s/
/\s/g;
        !            73: # Push the content into an array
        !            74: #	@loncapa = split /\n/, $content;
        !            75: #} else {
        !            76: #	die 'LON-CAPA request failed: ' . $response->message;
        !            77: #}
        !            78: 
        !            79: @loncapa=undef;
        !            80: open (LON_FILE, 'metadata_harvest.txt') || die;
        !            81: 
        !            82: while (<LON_FILE>) {
        !            83:        chomp;
        !            84:        push(@loncapa,$_);
        !            85: }
        !            86: 
        !            87: my %records = ();;
        !            88: foreach my $metadata (@loncapa) {
        !            89: 	chomp $metadata;
        !            90: 	my @tkline = split('\|', $metadata);
        !            91: 	my $title = $tkline[0];
        !            92: 	next if ( $title eq '' );
        !            93: 	my $author = $tkline[1];
        !            94: 	next if ( $author eq '' );
        !            95: 	my @authorname = split(' ', $author);
        !            96: 	my $author_fname = $authorname[0];
        !            97: 	my $author_lname = $authorname[1];
        !            98: 	# We have to make an exception for Multimedia Physics which is an organization not a person
        !            99: 	my $object_type;
        !           100: 	if ( $author_lname eq 'Physics' ) {
        !           101: 		$object_type = 'organization';
        !           102: 	} else {
        !           103: 		$object_type = 'person';
        !           104: 	}
        !           105: 	my $subject = $tkline[2];
        !           106: 	next if ( ($subject eq 'Sample') || ($subject eq 'Something') );
        !           107: 	my $resourceurl = 'http://lon-capa.smete.org' . $tkline[3];
        !           108: 	next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ );
        !           109: 	my $keywords = $tkline[4];
        !           110: 	my $version = $tkline[5];
        !           111: 	my $notes = $tkline[6];
        !           112: 	my $abstract = $tkline[7];
        !           113: 	next if ($abstract eq '');
        !           114: 	my $type = $tkline[8];
        !           115: 	my $learning_resource_type;
        !           116: 	if ( $type eq 'problem' ) {
        !           117: 		$learning_resource_type = 114;
        !           118: 	} elsif ( $type eq 'exam' ) {
        !           119: 		$learning_resource_type = 114;
        !           120: 	} elsif ( $type eq 'quiz' ) {
        !           121: 		$learning_resource_type = 114;
        !           122: 	} elsif ( $type eq 'assess' ) {
        !           123: 		$learning_resource_type = 114;
        !           124: 	} elsif ( $type eq 'survey' ) {
        !           125: 		$learning_resource_type = 114;
        !           126: 	} elsif ( $type eq 'form' ) {
        !           127: 		$learning_resource_type = 114;
        !           128: 	} elsif ( $type eq 'library' ) {
        !           129: 		$learning_resource_type = 107;
        !           130: 	} elsif ( $type eq 'page' ) {
        !           131: 		$learning_resource_type = 104;
        !           132: 	} elsif ( $type eq 'sequence' ) {
        !           133: 		$learning_resource_type = 104;
        !           134: 	} elsif ( $type eq 'spreadsheet' ) {
        !           135: 		$learning_resource_type = 114;
        !           136: 	} else {
        !           137: 		$learning_resource_type = 0;
        !           138: 	}
        !           139: 	
        !           140: 	my $media_format;
        !           141: 	if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) {
        !           142: 		$media_format = 70;
        !           143: 	} else {
        !           144: 		$media_format = 0;
        !           145: 	}
        !           146: 
        !           147: 	my $language = $tkline[9]; # Look only for seniso
        !           148: 	next if ( $language ne 'seniso');
        !           149: 	my $primary_language='en-US';
        !           150: 	my $creation_date = $tkline[10];
        !           151: 	my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ );
        !           152: 	my $revision_date = $tkline[11];
        !           153: 	my $owner = $tkline[12];
        !           154: 	my $rights_description;
        !           155: 	my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain)
        !           156: 	# Public means no login required
        !           157: 
        !           158: 	if ( $copyright eq 'public' ) {
        !           159: 		$rights_description = 'LON-CAPA Public Resource. No login required.';
        !           160: 	} elsif ($copyright eq 'domain') {
        !           161: 		$rights_description = 'Restricted to certain LON-CAPA domains.';
        !           162: 	} else {
        !           163: 		$rights_description = 'LON-CAPA Default Use Restriction. Login required.';
        !           164: 	}
        !           165: 	# Domain means restricted to a particular LON-CAPA domain
        !           166: 	# Defaults mean access open to any registered LON-CAPA user
        !           167: 	# Private means open only to author of material
        !           168: 	next if ( $copyright eq 'private');
        !           169: 	my $platform = "5";     # HTML Browser (not specified but construed from metadata)
        !           170: 
        !           171: # Connect to database
        !           172: if ( $useDatabase ) {
        !           173: 	$dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER: ($DBI::err) $DBI::errstr\n";;
        !           174: 	# Configuration information for LON-CAPA
        !           175: 	my $collection_id = OAIc_orgexists($dbh,'LearningOnline Network with CAPA');
        !           176: 	my $submitter_id = OAIc_personexists($dbh,'adong@smete.org');
        !           177: 	my $image = 'http://www.lite.msu.edu/liteani.gif';
        !           178: 	my $cost = 1; # version.purchase_license_type_id
        !           179: 	my $collection = 'LearningOnline Network with CAPA';
        !           180: 	# LON-CAPA has single authors
        !           181: 	my $reg_key;
        !           182: 	if ( $object_type eq 'organization' ) {
        !           183: 		if ( ! ($reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname))) ) {
        !           184: 		printf("Inserting new organization %s\n", join(' ',$author_fname, $author_lname));
        !           185: 		my $success = OAIc_insert_org($dbh,$collection_id,$submitter_id,'',join(' ',$author_fname,$author_lname),'','','','','','','','');
        !           186: 		$reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname));
        !           187: 		}
        !           188: 	} else {
        !           189: 		if ( ! ($reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname))) ) {
        !           190: 		printf("Inserting new person(author) %s\n", join(' ',$author_fname, $author_lname));
        !           191: 		my $success = OAIc_insert_person($dbh,$collection_id,$submitter_id,$author_lname,$author_fname,'','');
        !           192: 		$reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname));
        !           193: 		}
        !           194: 	}
        !           195: 	my $updated;
        !           196: 	my $inserted;
        !           197: 	if ( my $general_key = OAIc_loexists($dbh,$title) ) {
        !           198: 		# Do nothing
        !           199: 		$updated = $updated + 1;
        !           200: 	} else {
        !           201: 		printf("Inserting new record for %s\n",$title);	
        !           202: 		my $success = OAIc_insert_lo($dbh, $title, $primary_language, $abstract, $image, $pub_month, $pub_year, $keywords, $submitter_id, $reg_key, $collection_id, $collection_id, $media_format, $platform, , '', $resourceurl, '', 1, $reg_key, $collection_id, $collection_id, '', '', '', $learning_resource_type, $rights_description, $cost);
        !           203: 		$inserted = $inserted + 1;
        !           204: 	}
        !           205: }
        !           206: 
        !           207: if (! $useDatabase ) { # Print information if no database updates requested
        !           208: 	printf("Title: %s\n", $title);
        !           209: 	printf("Author First Name: %s\n", $author_fname);
        !           210: 	printf("Author Last Name: %s\n", $author_lname);
        !           211: 	printf("Subject: %s\n", $subject);
        !           212: 	printf("URL: %s\n", $resourceurl);
        !           213: 	printf("Keywords: %s\n", $keywords);
        !           214: 	printf("Version: %s\n", $version);
        !           215: 	printf("Notes: %s\n", $notes);
        !           216: 	printf("Abstract: %s\n", $abstract);
        !           217: 	printf("Learning Resource Type: %d\n", $learning_resource_type);
        !           218: 	printf("Media Format: %d\n", $media_format);
        !           219: 	printf("Primary Language: %s\n", $primary_language);
        !           220: 	printf("Creation Date: %s\n", $creation_date);
        !           221: 	printf("Revision Date: %s\n", $revision_date);
        !           222: 	printf("Copyright: %s\n", $copyright);
        !           223: 	printf("Publication Year: %4d\tPublication Month: %02d\n", $pub_year, $pub_month);
        !           224: }
        !           225: 
        !           226: if ( $useDatabase ) {
        !           227: 	$dbh->commit;
        !           228: 	$dbh->disconnect;
        !           229: }
        !           230: 
        !           231: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>