File:  [LON-CAPA] / nsdl / harvestsmete / dlese-getrecord.pl
Revision 1.1: download - view: text, annotated - select for diffs
Thu May 8 16:37:31 2003 UTC (21 years ago) by www
Branches: MAIN
CVS tags: HEAD
SMETE side harvest code for LON-CAPA

    1: #!/usr/local/bin/perl -w
    2: 
    3: #
    4: # dlese-getrecord.pl - Use OAI MHP to harvest metadata from DLESE in dlese_ims format
    5: # This program reads a file dlese-identifiers.xml then requests each record and stores in a sub-directory called dlese_ims
    6: #
    7: # Written by Andy Dong <adong@smete.org> 11/01/2001
    8: #
    9: 
   10: use strict;
   11: use Getopt::Std;
   12: use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
   13: 
   14: use HTTP::Request;
   15: use LWP::UserAgent;
   16: 
   17: use XML::Element;
   18: use XML::Parser;
   19: use XML::TreeBuilder;
   20: 
   21: # First parse the dlese-identifiers.xml file to get the list of available records
   22: my $tree = XML::TreeBuilder->new();
   23: $tree->parse_file('dlese-identifiers.xml');
   24: my @identifiers = $tree->find_by_tag_name('identifier');
   25: 
   26: # Now go grab them and save them to a file
   27: foreach my $identifier (@identifiers) {
   28: 	my $record = $identifier->as_text();
   29: 	my $url = join('','http://oai.dlese.org/provider?verb=GetRecord&metadataPrefix=dlese_ims&identifier=',$record);
   30: 	printf("Going to retrieve %s\n", $url);
   31: 	my $ua = new LWP::UserAgent;
   32: 	my $request = HTTP::Request->new('GET', $url);
   33: 	my $response = $ua->request( $request );
   34: 
   35: 	if ( $response->is_success ) {
   36: 		my $content = $response->content;
   37: 		open(OUTPUT,">dlese/$record");
   38: 		print OUTPUT $content;
   39: 		close OUTPUT;
   40: 	} else {
   41: 		warn 'OAI request failed: ' . $response->message;
   42: 	}
   43: 
   44: 	# sleep else DLESE may crash
   45: 	sleep(10);
   46: }
   47: 
   48: exit 0;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>