File:  [LON-CAPA] / nsdl / prototypes / metadata_convert.pl
Revision 1.2: download - view: text, annotated - select for diffs
Sat May 11 20:20:32 2002 UTC (21 years, 11 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
an exploratory effort at going both ways in the metadata via XSLT;
still need to prototype transformation of text contents (like date
or language)

    1: use XML::Xalan;
    2: 
    3: my $tr = new XML::Xalan::Transformer;
    4: 
    5: my $LONCAPA_metadata=(<<END);
    6: <abstract></abstract>
    7: <author>Felicia Berryman, MSU HHMI First Year Online Biology</author>
    8: <copyright>default</copyright>
    9: <creationdate>1011110523</creationdate>
   10: <dependencies></dependencies>
   11: <keywords>biomolecules,practice</keywords>
   12: <language>seniso</language>
   13: <lastrevisiondate>1011110523</lastrevisiondate>
   14: <mime>html</mime>
   15: <notes></notes>
   16: <owner>bio\@msu, bio\@msu (Michigan State University), bio\@msu (Michigan State University), bio\@msu (Michigan State University)</owner>
   17: <subject>Bio Information, Large Biomolecules - Intro, Large Biomolecules - Review</subject>
   18: <title>Test title</title>
   19: END
   20: 
   21: # ---------------------------------------------------- The 16 major nsdl fields
   22: # Audience
   23: # Publisher
   24: # Contributor
   25: # Relation
   26: # Coverage
   27: # Resource Identifier
   28: # Creator 
   29: # Resource Type
   30: # Date
   31: # Rights
   32: # Description
   33: # Source
   34: # Format
   35: # Subject & Keywords
   36: # Language
   37: # Title
   38: # --------------- also need educational recommended fields
   39: 
   40: # (there are also other identified cross-walk strategies for 8 other metadata
   41: #  standards)
   42: 
   43: #http://metamanagement.comm.nsdlib.org/overview.html#NSDL
   44: #http://www.dlib.vt.edu/projects/OAi/marcxml/marcxml.html
   45: #http://www.openarchives.org/OAI/openarchivesprotocol.html
   46: # ">
   47: #     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
   48: #     xsi:schemaLocation="http://purl.org/dc/elements/1.1
   49: #     http://www.openarchives.org/OAI/1.1/dc.xsd">
   50: my $NSDL_metadata1=(<<END);
   51: <?xml version="1.0"?>
   52: <dc xmlns="http://purl.org/dc/elements/1.1/">
   53: <title>The Cornell Law Quarterly</title>
   54: <date>1915-1916</date>
   55: <identifier>http://heinonline.org/HeinOnline/show.pl?
   56:             handle=hein.journals/clqv1%26id=1%26size=4</identifier>
   57: <rights>Available by Subscription. See http://www.wshein.com</rights>
   58: </dc>
   59: END
   60: open(OUT,'>tmpdc.xml');
   61: print(OUT <<END);
   62: $NSDL_metadata1
   63: END
   64: close(OUT);
   65: 
   66: my $NSDL_metadata2=(<<END);
   67: <dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" 
   68:        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
   69:        xsi:schemaLocation="http://purl.org/dc/elements/1.1/
   70:                            http://www.openarchives.org/OAI/1.1/dc.xsd">
   71: <dc:title>Grassmann's space analysis</dc:title> 
   72: <dc:creator>Hyde, E. W. (Edward Wyllys)</dc:creator>
   73: <dc:subject>LCSH:Ausdehnungslehre; LCCN QA205.H99</dc:subject>
   74: <dc:publisher>J. Wiley &amp; Sons</dc:publisher>
   75: <dc:date>Created: 1906; Available: 1991</dc:date>
   76: <dc:type>text</dc:type>
   77: <dc:identifier>http://resolver.library.cornell.edu/math/1796949</dc:identifier>
   78: <dc:language>eng</dc:language>
   79: <dc:rights>Public Domain</dc:rights>
   80: </dc:dc>
   81: END
   82: 
   83: open(OUT,'>tmploncapa.xml');
   84: print(OUT <<END);
   85: <?xml version="1.0"?>
   86: <loncapa>
   87: $LONCAPA_metadata
   88: </loncapa>
   89: END
   90: close(OUT);
   91: 
   92: # probably also want to protect dc name-space
   93: open(OUT,'>tmpl2n.xsl');
   94: print(OUT <<END);
   95: <?xml version="1.0"?> 
   96: <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   97: <xsl:template match="loncapa">
   98: <dc>
   99: <xsl:text>
  100: </xsl:text>
  101: <creator>
  102: <xsl:value-of select="author"/>
  103: </creator>
  104: <xsl:text>
  105: </xsl:text>
  106: <publisher><xsl:value-of select="owner"/></publisher>
  107: <xsl:text>
  108: </xsl:text>
  109: <title><xsl:value-of select="title"/></title>
  110: <xsl:text disable-output-escaping="yes">
  111: &lt;!-- NEED TO IMPLEMENT --&gt;
  112: &lt;!-- converted from LON-CAPA language tag to Dublin Core language tag --&gt;
  113: </xsl:text>
  114: <language><xsl:value-of select="language"/></language>
  115: <xsl:text disable-output-escaping="yes">
  116: &lt;!-- NEED TO IMPLEMENT --&gt;
  117: &lt;!-- converted from LON-CAPA seconds since 1/1/1970 to standard
  118: calendar values --&gt;
  119: </xsl:text>
  120: <date>
  121: Created: <xsl:value-of select="creationdate"/>;
  122: Last revised: <xsl:value-of select="lastrevisiondate"/>
  123: </date>
  124: <xsl:text disable-output-escaping="yes">
  125: &lt;!-- NEED TO IMPLEMENT --&gt;
  126: &lt;!-- allow some sort of mapping to Library of Congress or Dewey Decimal
  127: standard subject mapping --&gt;
  128: </xsl:text>
  129: <subject><xsl:value-of select="subject"/></subject>
  130: <xsl:text>
  131: </xsl:text>
  132: <xsl:text disable-output-escaping="yes">
  133: &lt;!-- NEED TO IMPLEMENT --&gt;
  134: </xsl:text>
  135: <audience>unknown mapping</audience>
  136: <xsl:text>
  137: </xsl:text>
  138: <xsl:text disable-output-escaping="yes">
  139: &lt;!-- NEED TO IMPLEMENT --&gt;
  140: </xsl:text>
  141: <contributor>unknown mapping (coauthors?)</contributor>
  142: <xsl:text>
  143: </xsl:text>
  144: <xsl:text disable-output-escaping="yes">
  145: &lt;!-- NEED TO IMPLEMENT --&gt;
  146: </xsl:text>
  147: <relation>unknown mapping... prerequisite/postrequisite info could go here</relation>
  148: <xsl:text>
  149: </xsl:text>
  150: <xsl:text disable-output-escaping="yes">
  151: &lt;!-- NEED TO IMPLEMENT --&gt;
  152: </xsl:text>
  153: <coverage>unknown mapping</coverage>
  154: <xsl:text>
  155: </xsl:text>
  156: <xsl:text disable-output-escaping="yes">
  157: &lt;!-- NEED TO IMPLEMENT --&gt;
  158: </xsl:text>
  159: <identifier>the gateway url</identifier>
  160: <xsl:text>
  161: </xsl:text>
  162: <xsl:text disable-output-escaping="yes">
  163: &lt;!-- NEED TO IMPLEMENT --&gt;
  164: &lt;!-- probably mapping from LON-CAPA mime typing --&gt;
  165: </xsl:text>
  166: <resourcetype>n/a</resourcetype>
  167: <xsl:text disable-output-escaping="yes">
  168: &lt;!-- NEED TO IMPLEMENT --&gt;
  169: &lt;!-- probably mapping from LON-CAPA copyright --&gt;
  170: </xsl:text>
  171: <rights>n/a</rights>
  172: <description><xsl:value-of select="abstract"/></description>
  173: <xsl:text>
  174: </xsl:text>
  175: # Source
  176: <xsl:text disable-output-escaping="yes">
  177: &lt;!-- NEED TO IMPLEMENT --&gt;
  178: </xsl:text>
  179: <format>software and hardware needed</format>
  180: <xsl:text disable-output-escaping="yes">
  181: &lt;!-- NEED TO IMPLEMENT --&gt;
  182: </xsl:text>
  183: <subjectandkeywords>need to break things into formal "classification" tag to insert keywords</subjectandkeywords>
  184: <xsl:text>
  185: </xsl:text>
  186: </dc>
  187: </xsl:template>
  188: </xsl:stylesheet>
  189: END
  190: close(OUT);
  191: 
  192: # probably also want to protect dc name-space
  193: open(OUT,'>tmpn2l.xsl');
  194: print(OUT <<END);
  195: <?xml version="1.0"?> 
  196: <xsl:stylesheet xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
  197: <xsl:template match="/">
  198: <loncapa>
  199: <xsl:text>
  200: </xsl:text>
  201: <xsl:apply-templates select="/*/dc:*"/>
  202: </loncapa>
  203: <xsl:text>
  204: </xsl:text>
  205: </xsl:template>
  206: <xsl:template match="dc:title">
  207: <title><xsl:value-of select="."/></title>
  208: <xsl:text>
  209: </xsl:text>
  210: </xsl:template>
  211: <xsl:template match="dc:date">
  212: <date><xsl:value-of select="."/></date>
  213: <xsl:text>
  214: </xsl:text>
  215: </xsl:template>
  216: <xsl:template match="dc:rights">
  217: <copyright><xsl:value-of select="."/></copyright>
  218: <xsl:text>
  219: </xsl:text>
  220: </xsl:template>
  221: <xsl:template match="dc:identifier">
  222: <dc:identifier><xsl:value-of select="."/></dc:identifier>
  223: <xsl:text>
  224: </xsl:text>
  225: </xsl:template>
  226: </xsl:stylesheet>
  227: END
  228: close(OUT);
  229: 
  230: # ---------------------------------- LON-CAPA to NSDL (Dublic Core PLUS format)
  231: my $compiled = $tr->compile_stylesheet_file("tmpl2n.xsl");
  232: my $parsed = $tr->parse_file("tmploncapa.xml");
  233: my $dest_file="l2n.xml";
  234: $tr->transform_to_file($parsed, $compiled, $dest_file)
  235:     or die $tr->errstr;
  236: 
  237: # ---------------------------------- NSDL (Dublic Core PLUS format) to LON-CAPA
  238: my $compiled = $tr->compile_stylesheet_file("tmpn2l.xsl");
  239: my $parsed = $tr->parse_file("tmpdc.xml");
  240: my $dest_file="n2l.xml";
  241: $tr->transform_to_file($parsed, $compiled, $dest_file)
  242:     or die $tr->errstr;
  243: 
  244: # ---------------------------------- test
  245: my $compiled = $tr->compile_stylesheet_file("test1.xsl");
  246: my $parsed = $tr->parse_file("test1.xml");
  247: my $dest_file="test1out.xml";
  248: $tr->transform_to_file($parsed, $compiled, $dest_file)
  249:     or die $tr->errstr;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>