--- loncom/homework/cleanxml/post_xml.pm 2016/01/21 22:09:38 1.9 +++ loncom/homework/cleanxml/post_xml.pm 2016/11/10 21:53:56 1.11 @@ -1,7 +1,7 @@ # The LearningOnline Network # Third step to clean a file. # -# $Id: post_xml.pm,v 1.9 2016/01/21 22:09:38 damieng Exp $ +# $Id: post_xml.pm,v 1.11 2016/11/10 21:53:56 damieng Exp $ # # Copyright Michigan State University Board of Trustees # @@ -136,6 +136,8 @@ sub post_xml { fix_empty_lc_elements($root); + reduce_empty_p($root); + lowercase_attribute_values($root); replace_numericalresponse_unit_attribute($root); @@ -1856,6 +1858,18 @@ sub fix_paragraphs_inside { push(@new_children, $doc->createElement('p')); } $p = undef; + # ignore the next node if it is a br (the paragraph default margin will take as much space) + # (ignoring whitespace) + while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) { + my $next2 = $next->nextSibling; + $node->removeChild($next); + $next = $next2; + } + if (defined $next && $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'br') { + my $next2 = $next->nextSibling; + $node->removeChild($next); + $next = $next2; + } } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array(\@inline_like_block, $child->nodeName)) { # inline_like_block: use the paragraph if there is one, otherwise do not create one if (defined $p) { @@ -2032,6 +2046,20 @@ sub fix_paragraph { if (!defined $left || !$left_needs_p) { $replacement->appendChild($middle); } + # ignore the next node if it is a br (the paragraph default margin will take as much space) + my $first_right; + if (defined $right) { + $first_right = $right->firstChild; + # ignore non-nbsp whitespace + while (defined $first_right && $first_right->nodeType == XML_TEXT_NODE && + $first_right->nodeValue =~ /^[ \t\f\n\r]*$/) { + $first_right = $first_right->nextSibling; + } + } + if (defined $first_right && $first_right->nodeType == XML_ELEMENT_NODE && + $first_right->nodeName eq 'br') { + $right->removeChild($first_right); + } } else { fix_paragraphs_inside($n, $all_block); $replacement->appendChild($n); @@ -2268,6 +2296,33 @@ sub fix_empty_lc_elements { } } } + +# remove consecutive empty paragraphs (they will not show anyway) +sub reduce_empty_p { + my ($node) = @_; + my $next; + for (my $child=$node->firstChild; defined $child; $child=$next) { + $next = $child->nextSibling; + while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) { + $next = $next->nextSibling; + } + if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p' && defined $next && + $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'p') { + my $first = $child->firstChild; + if (!defined $first || (!defined $first->nextSibling && + $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) { + $first = $next->firstChild; + if (!defined $first || (!defined $first->nextSibling && + $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) { + $node->removeChild($child); + } + } + } + if ($child->nodeType == XML_ELEMENT_NODE) { + reduce_empty_p($child); + } + } +} # turn some attribute values into lowercase when they should be sub lowercase_attribute_values {