File:  [LON-CAPA] / nsdl / prototypes / metadata_convert.pl
Revision 1.2: download - view: text, annotated - select for diffs
Sat May 11 20:20:32 2002 UTC (21 years, 11 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
an exploratory effort at going both ways in the metadata via XSLT;
still need to prototype transformation of text contents (like date
or language)

use XML::Xalan;

my $tr = new XML::Xalan::Transformer;

my $LONCAPA_metadata=(<<END);
<abstract></abstract>
<author>Felicia Berryman, MSU HHMI First Year Online Biology</author>
<copyright>default</copyright>
<creationdate>1011110523</creationdate>
<dependencies></dependencies>
<keywords>biomolecules,practice</keywords>
<language>seniso</language>
<lastrevisiondate>1011110523</lastrevisiondate>
<mime>html</mime>
<notes></notes>
<owner>bio\@msu, bio\@msu (Michigan State University), bio\@msu (Michigan State University), bio\@msu (Michigan State University)</owner>
<subject>Bio Information, Large Biomolecules - Intro, Large Biomolecules - Review</subject>
<title>Test title</title>
END

# ---------------------------------------------------- The 16 major nsdl fields
# Audience
# Publisher
# Contributor
# Relation
# Coverage
# Resource Identifier
# Creator 
# Resource Type
# Date
# Rights
# Description
# Source
# Format
# Subject & Keywords
# Language
# Title
# --------------- also need educational recommended fields

# (there are also other identified cross-walk strategies for 8 other metadata
#  standards)

#http://metamanagement.comm.nsdlib.org/overview.html#NSDL
#http://www.dlib.vt.edu/projects/OAi/marcxml/marcxml.html
#http://www.openarchives.org/OAI/openarchivesprotocol.html
# ">
#     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
#     xsi:schemaLocation="http://purl.org/dc/elements/1.1
#     http://www.openarchives.org/OAI/1.1/dc.xsd">
my $NSDL_metadata1=(<<END);
<?xml version="1.0"?>
<dc xmlns="http://purl.org/dc/elements/1.1/">
<title>The Cornell Law Quarterly</title>
<date>1915-1916</date>
<identifier>http://heinonline.org/HeinOnline/show.pl?
            handle=hein.journals/clqv1%26id=1%26size=4</identifier>
<rights>Available by Subscription. See http://www.wshein.com</rights>
</dc>
END
open(OUT,'>tmpdc.xml');
print(OUT <<END);
$NSDL_metadata1
END
close(OUT);

my $NSDL_metadata2=(<<END);
<dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" 
       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
       xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                           http://www.openarchives.org/OAI/1.1/dc.xsd">
<dc:title>Grassmann's space analysis</dc:title> 
<dc:creator>Hyde, E. W. (Edward Wyllys)</dc:creator>
<dc:subject>LCSH:Ausdehnungslehre; LCCN QA205.H99</dc:subject>
<dc:publisher>J. Wiley &amp; Sons</dc:publisher>
<dc:date>Created: 1906; Available: 1991</dc:date>
<dc:type>text</dc:type>
<dc:identifier>http://resolver.library.cornell.edu/math/1796949</dc:identifier>
<dc:language>eng</dc:language>
<dc:rights>Public Domain</dc:rights>
</dc:dc>
END

open(OUT,'>tmploncapa.xml');
print(OUT <<END);
<?xml version="1.0"?>
<loncapa>
$LONCAPA_metadata
</loncapa>
END
close(OUT);

# probably also want to protect dc name-space
open(OUT,'>tmpl2n.xsl');
print(OUT <<END);
<?xml version="1.0"?> 
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="loncapa">
<dc>
<xsl:text>
</xsl:text>
<creator>
<xsl:value-of select="author"/>
</creator>
<xsl:text>
</xsl:text>
<publisher><xsl:value-of select="owner"/></publisher>
<xsl:text>
</xsl:text>
<title><xsl:value-of select="title"/></title>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
&lt;!-- converted from LON-CAPA language tag to Dublin Core language tag --&gt;
</xsl:text>
<language><xsl:value-of select="language"/></language>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
&lt;!-- converted from LON-CAPA seconds since 1/1/1970 to standard
calendar values --&gt;
</xsl:text>
<date>
Created: <xsl:value-of select="creationdate"/>;
Last revised: <xsl:value-of select="lastrevisiondate"/>
</date>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
&lt;!-- allow some sort of mapping to Library of Congress or Dewey Decimal
standard subject mapping --&gt;
</xsl:text>
<subject><xsl:value-of select="subject"/></subject>
<xsl:text>
</xsl:text>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<audience>unknown mapping</audience>
<xsl:text>
</xsl:text>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<contributor>unknown mapping (coauthors?)</contributor>
<xsl:text>
</xsl:text>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<relation>unknown mapping... prerequisite/postrequisite info could go here</relation>
<xsl:text>
</xsl:text>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<coverage>unknown mapping</coverage>
<xsl:text>
</xsl:text>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<identifier>the gateway url</identifier>
<xsl:text>
</xsl:text>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
&lt;!-- probably mapping from LON-CAPA mime typing --&gt;
</xsl:text>
<resourcetype>n/a</resourcetype>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
&lt;!-- probably mapping from LON-CAPA copyright --&gt;
</xsl:text>
<rights>n/a</rights>
<description><xsl:value-of select="abstract"/></description>
<xsl:text>
</xsl:text>
# Source
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<format>software and hardware needed</format>
<xsl:text disable-output-escaping="yes">
&lt;!-- NEED TO IMPLEMENT --&gt;
</xsl:text>
<subjectandkeywords>need to break things into formal "classification" tag to insert keywords</subjectandkeywords>
<xsl:text>
</xsl:text>
</dc>
</xsl:template>
</xsl:stylesheet>
END
close(OUT);

# probably also want to protect dc name-space
open(OUT,'>tmpn2l.xsl');
print(OUT <<END);
<?xml version="1.0"?> 
<xsl:stylesheet xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="/">
<loncapa>
<xsl:text>
</xsl:text>
<xsl:apply-templates select="/*/dc:*"/>
</loncapa>
<xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:title">
<title><xsl:value-of select="."/></title>
<xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:date">
<date><xsl:value-of select="."/></date>
<xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:rights">
<copyright><xsl:value-of select="."/></copyright>
<xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:identifier">
<dc:identifier><xsl:value-of select="."/></dc:identifier>
<xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>
END
close(OUT);

# ---------------------------------- LON-CAPA to NSDL (Dublic Core PLUS format)
my $compiled = $tr->compile_stylesheet_file("tmpl2n.xsl");
my $parsed = $tr->parse_file("tmploncapa.xml");
my $dest_file="l2n.xml";
$tr->transform_to_file($parsed, $compiled, $dest_file)
    or die $tr->errstr;

# ---------------------------------- NSDL (Dublic Core PLUS format) to LON-CAPA
my $compiled = $tr->compile_stylesheet_file("tmpn2l.xsl");
my $parsed = $tr->parse_file("tmpdc.xml");
my $dest_file="n2l.xml";
$tr->transform_to_file($parsed, $compiled, $dest_file)
    or die $tr->errstr;

# ---------------------------------- test
my $compiled = $tr->compile_stylesheet_file("test1.xsl");
my $parsed = $tr->parse_file("test1.xml");
my $dest_file="test1out.xml";
$tr->transform_to_file($parsed, $compiled, $dest_file)
    or die $tr->errstr;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>