#!/usr/bin/perl
# This takes a well-formed XML file as input, and converts it to LON-CAPA syntax.
use strict;
use utf8;
use warnings;
use XML::LibXML;
binmode(STDOUT, ':encoding(UTF-8)');
if (scalar(@ARGV) != 1) {
print STDERR "Usage: perl xml_to_loncapa.pl file.xml\n";
exit(1);
}
# find the command-line argument encoding
use I18N::Langinfo qw(langinfo CODESET);
my $codeset = langinfo(CODESET);
use Encode qw(decode);
@ARGV = map { decode $codeset, $_ } @ARGV;
my $pathname = "$ARGV[0]";
if (-f $pathname) {
convert_file($pathname);
}
# Converts a file, creating a .loncapa file in the same directory.
# TODO: use the right extension based on content (or just ouput content)
sub convert_file {
my ($pathname) = @_;
# create a name for the new file
my $newpath = $pathname.'.loncapa';
print "converting $pathname...\n";
my $dom_doc = XML::LibXML->load_xml(location => $pathname);
open my $out, '>:encoding(UTF-8)', $newpath;
print $out node_to_string($dom_doc);
close $out;
}
sub node_to_string {
my ($node) = @_;
if ($node->nodeType == XML_DOCUMENT_NODE) {
my $root = $node->documentElement();
return node_to_string($root);
} elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
my $parent = $node->parentNode;
my $parent_name = $parent->nodeName;
my $grandparent_name;
if (defined $parent->parentNode) {
$grandparent_name = $parent->parentNode->nodeName;
}
my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
if (string_in_array(\@no_escape, $parent_name) &&
($parent_name ne 'answer' ||
(defined $grandparent_name &&
$grandparent_name ne 'numericalresponse' &&
$grandparent_name ne 'formularesponse'))) {
return $node->nodeValue;
} else {
return $node->toString();
}
} elsif ($node->nodeType == XML_ELEMENT_NODE) {
my $s = '';
my $tag = $node->nodeName;
$s .= "<$tag";
my @attributes = $node->attributes();
foreach my $attribute (@attributes) {
$s .= ' ';
$s .= $attribute->nodeName;
$s .= '="';
$s .= escape($attribute->nodeValue);
$s .= '"';
}
if ($node->hasChildNodes()) {
$s .= '>';
foreach my $child ($node->childNodes) {
$s .= node_to_string($child);
}
$s .= "</$tag>";
} else {
$s .= '/>';
}
return $s;
} else {
return $node->toString();
}
}
# Escapes a string for LON-CAPA output (used for text nodes, not attribute values)
sub escape {
my ($s) = @_;
$s =~ s/&/&/sg;
$s =~ s/</</sg;
$s =~ s/>/>/sg;
# quot and apos do not need to be escaped outside attribute values
return $s;
}
##
# Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
# @param {Array<string>} array - reference to the array of strings
# @param {string} value - the string to look for
# @returns 1 if found, 0 otherwise
##
sub string_in_array {
my ($array, $value) = @_;
foreach my $v (@{$array}) {
if ($v eq $value) {
return 1;
}
}
return 0;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>