--- modules/damieng/clean_xml/xml_to_loncapa.pl 2015/04/29 19:26:05 1.1
+++ modules/damieng/clean_xml/xml_to_loncapa.pl 2015/05/19 15:36:01 1.3
@@ -8,6 +8,19 @@ use warnings;
use XML::LibXML;
+
+my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
+
+my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
+
+# HTML elements that trigger the addition of startouttext/endouttext
+my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
+
+my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
+
+my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
+
+
binmode(STDOUT, ':encoding(UTF-8)');
if (scalar(@ARGV) != 1) {
@@ -39,6 +52,7 @@ sub convert_file {
my $dom_doc = XML::LibXML->load_xml(location => $pathname);
open my $out, '>:encoding(UTF-8)', $newpath;
+ add_outtext($dom_doc);
print $out node_to_string($dom_doc);
close $out;
}
@@ -103,6 +117,172 @@ sub escape {
return $s;
}
+# Adds startouttext and endouttext where useful for the colorful editor
+sub add_outtext {
+ my ($node) = @_;
+
+ if ($node->nodeType == XML_DOCUMENT_NODE) {
+ my $root = $node->documentElement();
+ add_outtext($root);
+ return;
+ }
+ if ($node->nodeType != XML_ELEMENT_NODE) {
+ return;
+ }
+ if (string_in_array(\@simple_data, $node->nodeName)) {
+ return;
+ }
+ convert_paragraphs($node);
+ my $next;
+ my $in_outtext = 0;
+ for (my $child=$node->firstChild; defined $child; $child=$next) {
+ $next = $child->nextSibling;
+ if (!$in_outtext && inside_outtext($child)) {
+ add_startouttext($node, $child);
+ $in_outtext = 1;
+ } elsif ($in_outtext && !continue_outtext($child)) {
+ add_endouttext($node, $child);
+ $in_outtext = 0;
+ }
+ if (!$in_outtext) {
+ add_outtext($child);
+ }
+ }
+ if ($in_outtext) {
+ add_endouttext($node);
+ }
+}
+
+# Returns 1 if this node should trigger the addition of startouttext before it
+sub inside_outtext {
+ my ($node) = @_;
+ if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
+ return 1;
+ }
+ if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
+ if (contains_loncapa_block($node)) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+ if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
+ return 1;
+ }
+ return 0;
+}
+
+# Returns 1 if the outtext environment can continue with this node
+sub continue_outtext {
+ my ($node) = @_;
+ if (inside_outtext($node)) {
+ return 1;
+ }
+ if ($node->nodeType == XML_TEXT_NODE) {
+ return 1; # continue even if this is just spaces
+ }
+ return 0;
+}
+
+# Returns 1 if the node contains a LON-CAPA block in a descendant.
+sub contains_loncapa_block {
+ my ($node) = @_;
+ foreach my $child ($node->childNodes) {
+ if ($child->nodeType == XML_ELEMENT_NODE) {
+ if (string_in_array(\@loncapa_block, $child->nodeName)) {
+ return 1;
+ }
+ if (contains_loncapa_block($child)) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+sub add_startouttext {
+ my ($parent, $before_node) = @_;
+ my $doc = $parent->ownerDocument;
+ if ($before_node->nodeType == XML_TEXT_NODE) {
+ # split space at the beginning of the node
+ if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
+ my $space_node = $doc->createTextNode($1);
+ $before_node->setData($2);
+ $parent->insertBefore($space_node, $before_node);
+ }
+ }
+ my $startouttext = $doc->createElement('startouttext');
+ $parent->insertBefore($startouttext, $before_node);
+}
+
+sub add_endouttext {
+ my ($parent, $before_node) = @_;
+ my $doc = $parent->ownerDocument;
+ my $endouttext = $doc->createElement('endouttext');
+ my $before_before;
+ if (defined $before_node) {
+ $before_before = $before_node->previousSibling;
+ } else {
+ $before_before = $parent->lastChild;
+ }
+ if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
+ # split space at the end of the node
+ if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
+ $before_before->setData($1);
+ my $space_node = $doc->createTextNode($2);
+ if (defined $before_node) {
+ $parent->insertBefore($space_node, $before_node);
+ } else {
+ $parent->appendChild($space_node);
+ }
+ $before_node = $space_node;
+ }
+ }
+ if (defined $before_node) {
+ $parent->insertBefore($endouttext, $before_node);
+ } else {
+ $parent->appendChild($endouttext);
+ }
+}
+
+# Convert paragraph children when one contains an inline response into content +
+# (the colorful editor does not support paragraphs containing inline responses)
+sub convert_paragraphs {
+ my ($parent) = @_;
+ my $p_child_with_inline_response = 0;
+ foreach my $child ($parent->childNodes) {
+ if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
+ foreach my $child2 ($child->childNodes) {
+ if ($child2->nodeType == XML_ELEMENT_NODE) {
+ if (string_in_array(\@inline_responses, $child2->nodeName)) {
+ $p_child_with_inline_response = 1;
+ last;
+ }
+ }
+ }
+ }
+ if ($p_child_with_inline_response) {
+ last;
+ }
+ }
+ if ($p_child_with_inline_response) {
+ my $doc = $parent->ownerDocument;
+ my $next;
+ for (my $child=$parent->firstChild; defined $child; $child=$next) {
+ $next = $child->nextSibling;
+ if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
+ replace_by_children($child);
+ if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
+ $next->nodeValue !~ /^\s*$/)) {
+ # we only add a br if there is something after
+ my $br = $doc->createElement('br');
+ $parent->insertBefore($br, $next);
+ }
+ }
+ }
+ }
+}
+
##
# Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
# @param {Array} array - reference to the array of strings
@@ -118,3 +298,39 @@ sub string_in_array {
}
return 0;
}
+
+##
+# replaces a node by its children
+# @param {Node} node - the DOM node
+##
+sub replace_by_children {
+ my ($node) = @_;
+ my $parent = $node->parentNode;
+ my $next;
+ my $previous;
+ for (my $child=$node->firstChild; defined $child; $child=$next) {
+ $next = $child->nextSibling;
+ if ((!defined $previous || !defined $next) &&
+ $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
+ next; # do not keep first and last whitespace nodes
+ } else {
+ if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
+ # remove whitespace at the beginning
+ my $value = $child->nodeValue;
+ $value =~ s/^\s+//;
+ $child->setData($value);
+ }
+ if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
+ # and at the end
+ my $value = $child->nodeValue;
+ $value =~ s/\s+$//;
+ $child->setData($value);
+ }
+ }
+ $node->removeChild($child);
+ $parent->insertBefore($child, $node);
+ $previous = $child;
+ }
+ $parent->removeChild($node);
+}
+