File:  [LON-CAPA] / nsdl / harvestsmete / dlese-getrecord.pl
Revision 1.1: download - view: text, annotated - select for diffs
Thu May 8 16:37:31 2003 UTC (20 years, 10 months ago) by www
Branches: MAIN
CVS tags: HEAD
SMETE side harvest code for LON-CAPA

#!/usr/local/bin/perl -w

#
# dlese-getrecord.pl - Use OAI MHP to harvest metadata from DLESE in dlese_ims format
# This program reads a file dlese-identifiers.xml then requests each record and stores in a sub-directory called dlese_ims
#
# Written by Andy Dong <adong@smete.org> 11/01/2001
#

use strict;
use Getopt::Std;
use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);

use HTTP::Request;
use LWP::UserAgent;

use XML::Element;
use XML::Parser;
use XML::TreeBuilder;

# First parse the dlese-identifiers.xml file to get the list of available records
my $tree = XML::TreeBuilder->new();
$tree->parse_file('dlese-identifiers.xml');
my @identifiers = $tree->find_by_tag_name('identifier');

# Now go grab them and save them to a file
foreach my $identifier (@identifiers) {
	my $record = $identifier->as_text();
	my $url = join('','http://oai.dlese.org/provider?verb=GetRecord&metadataPrefix=dlese_ims&identifier=',$record);
	printf("Going to retrieve %s\n", $url);
	my $ua = new LWP::UserAgent;
	my $request = HTTP::Request->new('GET', $url);
	my $response = $ua->request( $request );

	if ( $response->is_success ) {
		my $content = $response->content;
		open(OUTPUT,">dlese/$record");
		print OUTPUT $content;
		close OUTPUT;
	} else {
		warn 'OAI request failed: ' . $response->message;
	}

	# sleep else DLESE may crash
	sleep(10);
}

exit 0;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>