--- loncom/homework/cleanxml/post_xml.pm 2016/01/21 22:09:38 1.9 +++ loncom/homework/cleanxml/post_xml.pm 2016/11/10 19:48:22 1.10 @@ -1,7 +1,7 @@ # The LearningOnline Network # Third step to clean a file. # -# $Id: post_xml.pm,v 1.9 2016/01/21 22:09:38 damieng Exp $ +# $Id: post_xml.pm,v 1.10 2016/11/10 19:48:22 damieng Exp $ # # Copyright Michigan State University Board of Trustees # @@ -136,6 +136,8 @@ sub post_xml { fix_empty_lc_elements($root); + reduce_empty_p($root); + lowercase_attribute_values($root); replace_numericalresponse_unit_attribute($root); @@ -2268,6 +2270,33 @@ sub fix_empty_lc_elements { } } } + +# remove consecutive empty paragraphs (they will not show anyway) +sub reduce_empty_p { + my ($node) = @_; + my $next; + for (my $child=$node->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) { + $next = $next->nextSibling; + } + if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p' && defined $next && + $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'p') { + my $first = $child->firstChild; + if (!defined $first || (!defined $first->nextSibling && + $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) { + $first = $next->firstChild; + if (!defined $first || (!defined $first->nextSibling && + $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) { + $node->removeChild($child); + } + } + } + if ($child->nodeType == XML_ELEMENT_NODE) { + reduce_empty_p($child); + } + } +} # turn some attribute values into lowercase when they should be sub lowercase_attribute_values {