--- modules/damieng/clean_xml/xml_to_loncapa.pl 2015/04/29 19:26:05 1.1 +++ modules/damieng/clean_xml/xml_to_loncapa.pl 2015/05/19 15:36:01 1.3 @@ -8,6 +8,19 @@ use warnings; use XML::LibXML; + +my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); + +my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline + +# HTML elements that trigger the addition of startouttext/endouttext +my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); + +my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' ); + +my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse'); + + binmode(STDOUT, ':encoding(UTF-8)'); if (scalar(@ARGV) != 1) { @@ -39,6 +52,7 @@ sub convert_file { my $dom_doc = XML::LibXML->load_xml(location => $pathname); open my $out, '>:encoding(UTF-8)', $newpath; + add_outtext($dom_doc); print $out node_to_string($dom_doc); close $out; } @@ -103,6 +117,172 @@ sub escape { return $s; } +# Adds startouttext and endouttext where useful for the colorful editor +sub add_outtext { + my ($node) = @_; + + if ($node->nodeType == XML_DOCUMENT_NODE) { + my $root = $node->documentElement(); + add_outtext($root); + return; + } + if ($node->nodeType != XML_ELEMENT_NODE) { + return; + } + if (string_in_array(\@simple_data, $node->nodeName)) { + return; + } + convert_paragraphs($node); + my $next; + my $in_outtext = 0; + for (my $child=$node->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + if (!$in_outtext && inside_outtext($child)) { + add_startouttext($node, $child); + $in_outtext = 1; + } elsif ($in_outtext && !continue_outtext($child)) { + add_endouttext($node, $child); + $in_outtext = 0; + } + if (!$in_outtext) { + add_outtext($child); + } + } + if ($in_outtext) { + add_endouttext($node); + } +} + +# Returns 1 if this node should trigger the addition of startouttext before it +sub inside_outtext { + my ($node) = @_; + if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) { + return 1; + } + if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) { + if (contains_loncapa_block($node)) { + return 0; + } else { + return 1; + } + } + if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) { + return 1; + } + return 0; +} + +# Returns 1 if the outtext environment can continue with this node +sub continue_outtext { + my ($node) = @_; + if (inside_outtext($node)) { + return 1; + } + if ($node->nodeType == XML_TEXT_NODE) { + return 1; # continue even if this is just spaces + } + return 0; +} + +# Returns 1 if the node contains a LON-CAPA block in a descendant. +sub contains_loncapa_block { + my ($node) = @_; + foreach my $child ($node->childNodes) { + if ($child->nodeType == XML_ELEMENT_NODE) { + if (string_in_array(\@loncapa_block, $child->nodeName)) { + return 1; + } + if (contains_loncapa_block($child)) { + return 1; + } + } + } + return 0; +} + +sub add_startouttext { + my ($parent, $before_node) = @_; + my $doc = $parent->ownerDocument; + if ($before_node->nodeType == XML_TEXT_NODE) { + # split space at the beginning of the node + if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) { + my $space_node = $doc->createTextNode($1); + $before_node->setData($2); + $parent->insertBefore($space_node, $before_node); + } + } + my $startouttext = $doc->createElement('startouttext'); + $parent->insertBefore($startouttext, $before_node); +} + +sub add_endouttext { + my ($parent, $before_node) = @_; + my $doc = $parent->ownerDocument; + my $endouttext = $doc->createElement('endouttext'); + my $before_before; + if (defined $before_node) { + $before_before = $before_node->previousSibling; + } else { + $before_before = $parent->lastChild; + } + if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) { + # split space at the end of the node + if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) { + $before_before->setData($1); + my $space_node = $doc->createTextNode($2); + if (defined $before_node) { + $parent->insertBefore($space_node, $before_node); + } else { + $parent->appendChild($space_node); + } + $before_node = $space_node; + } + } + if (defined $before_node) { + $parent->insertBefore($endouttext, $before_node); + } else { + $parent->appendChild($endouttext); + } +} + +# Convert paragraph children when one contains an inline response into content +
+# (the colorful editor does not support paragraphs containing inline responses) +sub convert_paragraphs { + my ($parent) = @_; + my $p_child_with_inline_response = 0; + foreach my $child ($parent->childNodes) { + if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') { + foreach my $child2 ($child->childNodes) { + if ($child2->nodeType == XML_ELEMENT_NODE) { + if (string_in_array(\@inline_responses, $child2->nodeName)) { + $p_child_with_inline_response = 1; + last; + } + } + } + } + if ($p_child_with_inline_response) { + last; + } + } + if ($p_child_with_inline_response) { + my $doc = $parent->ownerDocument; + my $next; + for (my $child=$parent->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') { + replace_by_children($child); + if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE || + $next->nodeValue !~ /^\s*$/)) { + # we only add a br if there is something after + my $br = $doc->createElement('br'); + $parent->insertBefore($br, $next); + } + } + } + } +} + ## # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array) # @param {Array} array - reference to the array of strings @@ -118,3 +298,39 @@ sub string_in_array { } return 0; } + +## +# replaces a node by its children +# @param {Node} node - the DOM node +## +sub replace_by_children { + my ($node) = @_; + my $parent = $node->parentNode; + my $next; + my $previous; + for (my $child=$node->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + if ((!defined $previous || !defined $next) && + $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) { + next; # do not keep first and last whitespace nodes + } else { + if (!defined $previous && $child->nodeType == XML_TEXT_NODE) { + # remove whitespace at the beginning + my $value = $child->nodeValue; + $value =~ s/^\s+//; + $child->setData($value); + } + if (!defined $next && $child->nodeType == XML_TEXT_NODE) { + # and at the end + my $value = $child->nodeValue; + $value =~ s/\s+$//; + $child->setData($value); + } + } + $node->removeChild($child); + $parent->insertBefore($child, $node); + $previous = $child; + } + $parent->removeChild($node); +} +