File:  [LON-CAPA] / modules / damieng / clean_xml / xml_to_loncapa.pl
Revision 1.1: download - view: text, annotated - select for diffs
Wed Apr 29 19:26:05 2015 UTC (9 years ago) by damieng
Branches: MAIN
CVS tags: HEAD
added script to convert back to LON-CAPA syntax, little fixes for optionresponse

#!/usr/bin/perl

# This takes a well-formed XML file as input, and converts it to LON-CAPA syntax.

use strict;
use utf8;
use warnings;

use XML::LibXML;

binmode(STDOUT, ':encoding(UTF-8)');

if (scalar(@ARGV) != 1) {
  print STDERR "Usage: perl xml_to_loncapa.pl file.xml\n";
  exit(1);
}

# find the command-line argument encoding
use I18N::Langinfo qw(langinfo CODESET);
my $codeset = langinfo(CODESET);
use Encode qw(decode);
@ARGV = map { decode $codeset, $_ } @ARGV;

my $pathname = "$ARGV[0]";
if (-f $pathname) {
  convert_file($pathname);
}

# Converts a file, creating a .loncapa file in the same directory.
# TODO: use the right extension based on content (or just ouput content)
sub convert_file {
  my ($pathname) = @_;

  # create a name for the new file
  my $newpath = $pathname.'.loncapa';

  print "converting $pathname...\n";
  
  my $dom_doc = XML::LibXML->load_xml(location => $pathname);
  
  open my $out, '>:encoding(UTF-8)', $newpath;
  print $out node_to_string($dom_doc);
  close $out;
}

sub node_to_string {
  my ($node) = @_;
  
  if ($node->nodeType == XML_DOCUMENT_NODE) {
    my $root = $node->documentElement();
    return node_to_string($root);
  } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
    my $parent = $node->parentNode;
    my $parent_name = $parent->nodeName;
    my $grandparent_name;
    if (defined $parent->parentNode) {
      $grandparent_name = $parent->parentNode->nodeName;
    }
    my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
    if (string_in_array(\@no_escape, $parent_name) &&
        ($parent_name ne 'answer' ||
        (defined $grandparent_name &&
        $grandparent_name ne 'numericalresponse' &&
        $grandparent_name ne 'formularesponse'))) {
      return $node->nodeValue;
    } else {
      return $node->toString();
    }
  } elsif ($node->nodeType == XML_ELEMENT_NODE) {
    my $s = '';
    my $tag = $node->nodeName;
    $s .= "<$tag";
    my @attributes = $node->attributes();
    foreach my $attribute (@attributes) {
      $s .= ' ';
      $s .= $attribute->nodeName;
      $s .= '="';
      $s .= escape($attribute->nodeValue);
      $s .= '"';
    }
    if ($node->hasChildNodes()) {
      $s .= '>';
      foreach my $child ($node->childNodes) {
        $s .= node_to_string($child);
      }
      $s .= "</$tag>";
    } else {
      $s .= '/>';
    }
    return $s;
  } else {
    return $node->toString();
  }
}

# Escapes a string for LON-CAPA output (used for text nodes, not attribute values)
sub escape {
  my ($s) = @_;
  $s =~ s/&/&amp;/sg;
  $s =~ s/</&lt;/sg;
  $s =~ s/>/&gt;/sg;
  # quot and apos do not need to be escaped outside attribute values
  return $s;
}

##
# Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
# @param {Array<string>} array - reference to the array of strings
# @param {string} value - the string to look for
# @returns 1 if found, 0 otherwise
##
sub string_in_array {
  my ($array, $value) = @_;
  foreach my $v (@{$array}) {
    if ($v eq $value) {
      return 1;
    }
  }
  return 0;
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>