#!/usr/bin/perl # This takes a well-formed XML file as input, and converts it to LON-CAPA syntax. use strict; use utf8; use warnings; use XML::LibXML; my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline my @html_elements = ('html','meta','head','title','base','link','style','noscript','body','header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','label','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); # without script my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' ); binmode(STDOUT, ':encoding(UTF-8)'); if (scalar(@ARGV) != 1) { print STDERR "Usage: perl xml_to_loncapa.pl file.xml\n"; exit(1); } # find the command-line argument encoding use I18N::Langinfo qw(langinfo CODESET); my $codeset = langinfo(CODESET); use Encode qw(decode); @ARGV = map { decode $codeset, $_ } @ARGV; my $pathname = "$ARGV[0]"; if (-f $pathname) { convert_file($pathname); } # Converts a file, creating a .loncapa file in the same directory. # TODO: use the right extension based on content (or just ouput content) sub convert_file { my ($pathname) = @_; # create a name for the new file my $newpath = $pathname.'.loncapa'; print "converting $pathname...\n"; my $dom_doc = XML::LibXML->load_xml(location => $pathname); open my $out, '>:encoding(UTF-8)', $newpath; add_outtext($dom_doc); print $out node_to_string($dom_doc); close $out; } sub node_to_string { my ($node) = @_; if ($node->nodeType == XML_DOCUMENT_NODE) { my $root = $node->documentElement(); return node_to_string($root); } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) { my $parent = $node->parentNode; my $parent_name = $parent->nodeName; my $grandparent_name; if (defined $parent->parentNode) { $grandparent_name = $parent->parentNode->nodeName; } my @no_escape = ('m', 'script', 'display', 'parse', 'answer'); if (string_in_array(\@no_escape, $parent_name) && ($parent_name ne 'answer' || (defined $grandparent_name && $grandparent_name ne 'numericalresponse' && $grandparent_name ne 'formularesponse'))) { return $node->nodeValue; } else { return $node->toString(); } } elsif ($node->nodeType == XML_ELEMENT_NODE) { my $s = ''; my $tag = $node->nodeName; $s .= "<$tag"; my @attributes = $node->attributes(); foreach my $attribute (@attributes) { $s .= ' '; $s .= $attribute->nodeName; $s .= '="'; $s .= escape($attribute->nodeValue); $s .= '"'; } if ($node->hasChildNodes()) { $s .= '>'; foreach my $child ($node->childNodes) { $s .= node_to_string($child); } $s .= ""; } else { $s .= '/>'; } return $s; } else { return $node->toString(); } } # Escapes a string for LON-CAPA output (used for text nodes, not attribute values) sub escape { my ($s) = @_; $s =~ s/&/&/sg; $s =~ s//>/sg; # quot and apos do not need to be escaped outside attribute values return $s; } # Adds startouttext and endouttext where useful for the colorfull editor sub add_outtext { my ($node) = @_; if ($node->nodeType == XML_DOCUMENT_NODE) { my $root = $node->documentElement(); add_outtext($root); return; } if ($node->nodeType != XML_ELEMENT_NODE) { return; } if (string_in_array(\@simple_data, $node->nodeName)) { return; } my $next; my $in_outtext = 0; for (my $child=$node->firstChild; defined $child; $child=$next) { $next = $child->nextSibling; if (!$in_outtext && inside_outtext($child)) { # Add startouttext my $doc = $node->ownerDocument; my $startouttext = $doc->createElement('startouttext'); $node->insertBefore($startouttext, $child); $in_outtext = 1; } elsif ($in_outtext && !continue_outtext($child)) { # Add endouttext add_endouttext($node, $child); $in_outtext = 0; } if (!$in_outtext) { add_outtext($child); } } if ($in_outtext) { # Add endouttext add_endouttext($node); } } # Returns 1 if this node should trigger the addition of startouttext before it sub inside_outtext { my ($node) = @_; if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) { return 1; } if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_elements, $node->nodeName)) { if (contains_loncapa_block($node)) { return 0; } else { return 1; } } if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) { return 1; } return 0; } # Returns 1 if the outtext environment can continue with this node sub continue_outtext { my ($node) = @_; if (inside_outtext($node)) { return 1; } if ($node->nodeType == XML_TEXT_NODE) { return 1; # continue even if this is just spaces } return 0; } # Returns 1 if the node contains a LON-CAPA block in a descendant. sub contains_loncapa_block { my ($node) = @_; foreach my $child ($node->childNodes) { if ($child->nodeType == XML_ELEMENT_NODE) { if (string_in_array(\@loncapa_block, $child->nodeName)) { return 1; } if (contains_loncapa_block($child)) { return 1; } } } return 0; } sub add_endouttext { my ($parent, $before_node) = @_; my $doc = $parent->ownerDocument; my $endouttext = $doc->createElement('endouttext'); my $before_before; if (defined $before_node) { $before_before = $before_node->previousSibling; } else { $before_before = $parent->lastChild; } if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) { # split space at the end of the node before endouttext if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) { $before_before->setData($1); my $space_node = $doc->createTextNode($2); if (defined $before_node) { $parent->insertBefore($space_node, $before_node); } else { $parent->appendChild($space_node); } $before_node = $space_node; } } if (defined $before_node) { $parent->insertBefore($endouttext, $before_node); } else { $parent->appendChild($endouttext); } } ## # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array) # @param {Array} array - reference to the array of strings # @param {string} value - the string to look for # @returns 1 if found, 0 otherwise ## sub string_in_array { my ($array, $value) = @_; foreach my $v (@{$array}) { if ($v eq $value) { return 1; } } return 0; }