#!/usr/bin/perl # This takes a well-formed XML file as input, and converts it to LON-CAPA syntax. use strict; use utf8; use warnings; use XML::LibXML; binmode(STDOUT, ':encoding(UTF-8)'); if (scalar(@ARGV) != 1) { print STDERR "Usage: perl xml_to_loncapa.pl file.xml\n"; exit(1); } # find the command-line argument encoding use I18N::Langinfo qw(langinfo CODESET); my $codeset = langinfo(CODESET); use Encode qw(decode); @ARGV = map { decode $codeset, $_ } @ARGV; my $pathname = "$ARGV[0]"; if (-f $pathname) { convert_file($pathname); } # Converts a file, creating a .loncapa file in the same directory. # TODO: use the right extension based on content (or just ouput content) sub convert_file { my ($pathname) = @_; # create a name for the new file my $newpath = $pathname.'.loncapa'; print "converting $pathname...\n"; my $dom_doc = XML::LibXML->load_xml(location => $pathname); open my $out, '>:encoding(UTF-8)', $newpath; print $out node_to_string($dom_doc); close $out; } sub node_to_string { my ($node) = @_; if ($node->nodeType == XML_DOCUMENT_NODE) { my $root = $node->documentElement(); return node_to_string($root); } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) { my $parent = $node->parentNode; my $parent_name = $parent->nodeName; my $grandparent_name; if (defined $parent->parentNode) { $grandparent_name = $parent->parentNode->nodeName; } my @no_escape = ('m', 'script', 'display', 'parse', 'answer'); if (string_in_array(\@no_escape, $parent_name) && ($parent_name ne 'answer' || (defined $grandparent_name && $grandparent_name ne 'numericalresponse' && $grandparent_name ne 'formularesponse'))) { return $node->nodeValue; } else { return $node->toString(); } } elsif ($node->nodeType == XML_ELEMENT_NODE) { my $s = ''; my $tag = $node->nodeName; $s .= "<$tag"; my @attributes = $node->attributes(); foreach my $attribute (@attributes) { $s .= ' '; $s .= $attribute->nodeName; $s .= '="'; $s .= escape($attribute->nodeValue); $s .= '"'; } if ($node->hasChildNodes()) { $s .= '>'; foreach my $child ($node->childNodes) { $s .= node_to_string($child); } $s .= ""; } else { $s .= '/>'; } return $s; } else { return $node->toString(); } } # Escapes a string for LON-CAPA output (used for text nodes, not attribute values) sub escape { my ($s) = @_; $s =~ s/&/&/sg; $s =~ s//>/sg; # quot and apos do not need to be escaped outside attribute values return $s; } ## # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array) # @param {Array} array - reference to the array of strings # @param {string} value - the string to look for # @returns 1 if found, 0 otherwise ## sub string_in_array { my ($array, $value) = @_; foreach my $v (@{$array}) { if ($v eq $value) { return 1; } } return 0; }