--- modules/damieng/clean_xml/xml_to_loncapa.pl 2015/04/29 19:26:05 1.1 +++ modules/damieng/clean_xml/xml_to_loncapa.pl 2015/05/16 20:17:13 1.2 @@ -8,6 +8,16 @@ use warnings; use XML::LibXML; + +my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); + +my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline + +my @html_elements = ('html','meta','head','title','base','link','style','noscript','body','header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','label','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); # without script + +my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' ); + + binmode(STDOUT, ':encoding(UTF-8)'); if (scalar(@ARGV) != 1) { @@ -39,6 +49,7 @@ sub convert_file { my $dom_doc = XML::LibXML->load_xml(location => $pathname); open my $out, '>:encoding(UTF-8)', $newpath; + add_outtext($dom_doc); print $out node_to_string($dom_doc); close $out; } @@ -103,6 +114,123 @@ sub escape { return $s; } +# Adds startouttext and endouttext where useful for the colorfull editor +sub add_outtext { + my ($node) = @_; + + if ($node->nodeType == XML_DOCUMENT_NODE) { + my $root = $node->documentElement(); + add_outtext($root); + return; + } + if ($node->nodeType != XML_ELEMENT_NODE) { + return; + } + if (string_in_array(\@simple_data, $node->nodeName)) { + return; + } + my $next; + my $in_outtext = 0; + for (my $child=$node->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + if (!$in_outtext && inside_outtext($child)) { + # Add startouttext + my $doc = $node->ownerDocument; + my $startouttext = $doc->createElement('startouttext'); + $node->insertBefore($startouttext, $child); + $in_outtext = 1; + } elsif ($in_outtext && !continue_outtext($child)) { + # Add endouttext + add_endouttext($node, $child); + $in_outtext = 0; + } + if (!$in_outtext) { + add_outtext($child); + } + } + if ($in_outtext) { + # Add endouttext + add_endouttext($node); + } +} + +# Returns 1 if this node should trigger the addition of startouttext before it +sub inside_outtext { + my ($node) = @_; + if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) { + return 1; + } + if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_elements, $node->nodeName)) { + if (contains_loncapa_block($node)) { + return 0; + } else { + return 1; + } + } + if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) { + return 1; + } + return 0; +} + +# Returns 1 if the outtext environment can continue with this node +sub continue_outtext { + my ($node) = @_; + if (inside_outtext($node)) { + return 1; + } + if ($node->nodeType == XML_TEXT_NODE) { + return 1; # continue even if this is just spaces + } + return 0; +} + +# Returns 1 if the node contains a LON-CAPA block in a descendant. +sub contains_loncapa_block { + my ($node) = @_; + foreach my $child ($node->childNodes) { + if ($child->nodeType == XML_ELEMENT_NODE) { + if (string_in_array(\@loncapa_block, $child->nodeName)) { + return 1; + } + if (contains_loncapa_block($child)) { + return 1; + } + } + } + return 0; +} + +sub add_endouttext { + my ($parent, $before_node) = @_; + my $doc = $parent->ownerDocument; + my $endouttext = $doc->createElement('endouttext'); + my $before_before; + if (defined $before_node) { + $before_before = $before_node->previousSibling; + } else { + $before_before = $parent->lastChild; + } + if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) { + # split space at the end of the node before endouttext + if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) { + $before_before->setData($1); + my $space_node = $doc->createTextNode($2); + if (defined $before_node) { + $parent->insertBefore($space_node, $before_node); + } else { + $parent->appendChild($space_node); + } + $before_node = $space_node; + } + } + if (defined $before_node) { + $parent->insertBefore($endouttext, $before_node); + } else { + $parent->appendChild($endouttext); + } +} + ## # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array) # @param {Array} array - reference to the array of strings