--- modules/damieng/clean_xml/xml_to_loncapa.pl 2015/05/16 20:17:13 1.2 +++ modules/damieng/clean_xml/xml_to_loncapa.pl 2015/05/19 15:36:01 1.3 @@ -13,9 +13,12 @@ my @loncapa_block = ('parameter','locati my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline -my @html_elements = ('html','meta','head','title','base','link','style','noscript','body','header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','label','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); # without script +# HTML elements that trigger the addition of startouttext/endouttext +my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); -my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' ); +my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' ); + +my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse'); binmode(STDOUT, ':encoding(UTF-8)'); @@ -114,7 +117,7 @@ sub escape { return $s; } -# Adds startouttext and endouttext where useful for the colorfull editor +# Adds startouttext and endouttext where useful for the colorful editor sub add_outtext { my ($node) = @_; @@ -129,18 +132,15 @@ sub add_outtext { if (string_in_array(\@simple_data, $node->nodeName)) { return; } + convert_paragraphs($node); my $next; my $in_outtext = 0; for (my $child=$node->firstChild; defined $child; $child=$next) { $next = $child->nextSibling; if (!$in_outtext && inside_outtext($child)) { - # Add startouttext - my $doc = $node->ownerDocument; - my $startouttext = $doc->createElement('startouttext'); - $node->insertBefore($startouttext, $child); + add_startouttext($node, $child); $in_outtext = 1; } elsif ($in_outtext && !continue_outtext($child)) { - # Add endouttext add_endouttext($node, $child); $in_outtext = 0; } @@ -149,7 +149,6 @@ sub add_outtext { } } if ($in_outtext) { - # Add endouttext add_endouttext($node); } } @@ -160,7 +159,7 @@ sub inside_outtext { if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) { return 1; } - if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_elements, $node->nodeName)) { + if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) { if (contains_loncapa_block($node)) { return 0; } else { @@ -201,6 +200,21 @@ sub contains_loncapa_block { return 0; } +sub add_startouttext { + my ($parent, $before_node) = @_; + my $doc = $parent->ownerDocument; + if ($before_node->nodeType == XML_TEXT_NODE) { + # split space at the beginning of the node + if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) { + my $space_node = $doc->createTextNode($1); + $before_node->setData($2); + $parent->insertBefore($space_node, $before_node); + } + } + my $startouttext = $doc->createElement('startouttext'); + $parent->insertBefore($startouttext, $before_node); +} + sub add_endouttext { my ($parent, $before_node) = @_; my $doc = $parent->ownerDocument; @@ -212,7 +226,7 @@ sub add_endouttext { $before_before = $parent->lastChild; } if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) { - # split space at the end of the node before endouttext + # split space at the end of the node if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) { $before_before->setData($1); my $space_node = $doc->createTextNode($2); @@ -231,6 +245,44 @@ sub add_endouttext { } } +# Convert paragraph children when one contains an inline response into content +
+# (the colorful editor does not support paragraphs containing inline responses) +sub convert_paragraphs { + my ($parent) = @_; + my $p_child_with_inline_response = 0; + foreach my $child ($parent->childNodes) { + if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') { + foreach my $child2 ($child->childNodes) { + if ($child2->nodeType == XML_ELEMENT_NODE) { + if (string_in_array(\@inline_responses, $child2->nodeName)) { + $p_child_with_inline_response = 1; + last; + } + } + } + } + if ($p_child_with_inline_response) { + last; + } + } + if ($p_child_with_inline_response) { + my $doc = $parent->ownerDocument; + my $next; + for (my $child=$parent->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') { + replace_by_children($child); + if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE || + $next->nodeValue !~ /^\s*$/)) { + # we only add a br if there is something after + my $br = $doc->createElement('br'); + $parent->insertBefore($br, $next); + } + } + } + } +} + ## # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array) # @param {Array} array - reference to the array of strings @@ -246,3 +298,39 @@ sub string_in_array { } return 0; } + +## +# replaces a node by its children +# @param {Node} node - the DOM node +## +sub replace_by_children { + my ($node) = @_; + my $parent = $node->parentNode; + my $next; + my $previous; + for (my $child=$node->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + if ((!defined $previous || !defined $next) && + $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) { + next; # do not keep first and last whitespace nodes + } else { + if (!defined $previous && $child->nodeType == XML_TEXT_NODE) { + # remove whitespace at the beginning + my $value = $child->nodeValue; + $value =~ s/^\s+//; + $child->setData($value); + } + if (!defined $next && $child->nodeType == XML_TEXT_NODE) { + # and at the end + my $value = $child->nodeValue; + $value =~ s/\s+$//; + $child->setData($value); + } + } + $node->removeChild($child); + $parent->insertBefore($child, $node); + $previous = $child; + } + $parent->removeChild($node); +} +