#!/usr/local/bin/perl -w # # dlese-getrecord.pl - Use OAI MHP to harvest metadata from DLESE in dlese_ims format # This program reads a file dlese-identifiers.xml then requests each record and stores in a sub-directory called dlese_ims # # Written by Andy Dong 11/01/2001 # use strict; use Getopt::Std; use Time::HiRes qw(usleep ualarm gettimeofday tv_interval); use HTTP::Request; use LWP::UserAgent; use XML::Element; use XML::Parser; use XML::TreeBuilder; # First parse the dlese-identifiers.xml file to get the list of available records my $tree = XML::TreeBuilder->new(); $tree->parse_file('dlese-identifiers.xml'); my @identifiers = $tree->find_by_tag_name('identifier'); # Now go grab them and save them to a file foreach my $identifier (@identifiers) { my $record = $identifier->as_text(); my $url = join('','http://oai.dlese.org/provider?verb=GetRecord&metadataPrefix=dlese_ims&identifier=',$record); printf("Going to retrieve %s\n", $url); my $ua = new LWP::UserAgent; my $request = HTTP::Request->new('GET', $url); my $response = $ua->request( $request ); if ( $response->is_success ) { my $content = $response->content; open(OUTPUT,">dlese/$record"); print OUTPUT $content; close OUTPUT; } else { warn 'OAI request failed: ' . $response->message; } # sleep else DLESE may crash sleep(10); } exit 0;