--- loncom/homework/cleanxml/post_xml.pm 2016/01/21 22:09:38 1.9
+++ loncom/homework/cleanxml/post_xml.pm 2017/01/17 20:29:06 1.12
@@ -1,7 +1,7 @@
# The LearningOnline Network
# Third step to clean a file.
#
-# $Id: post_xml.pm,v 1.9 2016/01/21 22:09:38 damieng Exp $
+# $Id: post_xml.pm,v 1.12 2017/01/17 20:29:06 damieng Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -136,6 +136,8 @@ sub post_xml {
fix_empty_lc_elements($root);
+ reduce_empty_p($root);
+
lowercase_attribute_values($root);
replace_numericalresponse_unit_attribute($root);
@@ -428,6 +430,15 @@ sub replace_m {
my $var_key1 = 'dfhg3df54hg65hg4';
my $var_key2 = 'dfhg654d6f5g4h5f';
my $eval = defined $m->getAttribute('eval') && $m->getAttribute('eval') eq 'on';
+ my $display = $m->getAttribute('display');
+ if (defined $display) {
+ if ($display eq '') {
+ $display = undef;
+ }
+ if (lc($display) eq 'jsmath') {
+ $display = 'mathjax';
+ }
+ }
if ($eval) {
# replace variables
foreach my $variable (@variables) {
@@ -465,6 +476,9 @@ sub replace_m {
if ($eval) {
$new_node->setAttribute('eval', 'on');
}
+ if (defined $display) {
+ $new_node->setAttribute('display', $display);
+ }
$new_node->appendChild($doc->createTextNode($new_text));
$m->parentNode->replaceChild($new_node, $m);
next;
@@ -495,7 +509,7 @@ sub replace_m {
# there are math separators inside, even after hiding variables, or there was no math symbol
# hide math parts inside before running tth
- my $math_key1 = '#ghjgdh5hg45gf';
+ my $math_key1 = '#5752398247516385';
my $math_key2 = '#';
my @maths = ();
my @separators = (['$$','$$'], ['\\(','\\)'], ['\\[','\\]'], ['$','$']);
@@ -526,14 +540,30 @@ sub replace_m {
$math =~ s/&/&/g;
$math =~ s/</g;
$math =~ s/>/>/g;
+ my ($mel, $inside);
if ($math =~ /^\$\$(.*)\$\$$/s) {
- $math = ''.$1.'';
+ $mel = 'dtm';
+ $inside = $1;
} elsif ($math =~ /^\\\[(.*)\\\]$/s) {
- $math = ''.$1.'';
+ $mel = 'dtm';
+ $inside = $1;
} elsif ($math =~ /^\\\((.*)\\\)$/s) {
- $math = ''.$1.'';
+ $mel = 'tm';
+ $inside = $1;
} elsif ($math =~ /^\$(.*)\$$/s) {
- $math = ''.$1.'';
+ $mel = 'tm';
+ $inside = $1;
+ }
+ if (defined $inside) {
+ if ($inside =~ /^\s*$/) {
+ $math = '';
+ } else {
+ $math = '<'.$mel;
+ if ($eval && $inside =~ /$var_key1/) {
+ $math .= ' eval="on"';
+ }
+ $math .= '>'.$inside.''.$mel.'>';
+ }
}
my $replace = $math_key1.($i+1).$math_key2;
$html_text =~ s/$replace/$math/;
@@ -1856,6 +1886,18 @@ sub fix_paragraphs_inside {
push(@new_children, $doc->createElement('p'));
}
$p = undef;
+ # ignore the next node if it is a br (the paragraph default margin will take as much space)
+ # (ignoring whitespace)
+ while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) {
+ my $next2 = $next->nextSibling;
+ $node->removeChild($next);
+ $next = $next2;
+ }
+ if (defined $next && $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'br') {
+ my $next2 = $next->nextSibling;
+ $node->removeChild($next);
+ $next = $next2;
+ }
} elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array(\@inline_like_block, $child->nodeName)) {
# inline_like_block: use the paragraph if there is one, otherwise do not create one
if (defined $p) {
@@ -2032,6 +2074,20 @@ sub fix_paragraph {
if (!defined $left || !$left_needs_p) {
$replacement->appendChild($middle);
}
+ # ignore the next node if it is a br (the paragraph default margin will take as much space)
+ my $first_right;
+ if (defined $right) {
+ $first_right = $right->firstChild;
+ # ignore non-nbsp whitespace
+ while (defined $first_right && $first_right->nodeType == XML_TEXT_NODE &&
+ $first_right->nodeValue =~ /^[ \t\f\n\r]*$/) {
+ $first_right = $first_right->nextSibling;
+ }
+ }
+ if (defined $first_right && $first_right->nodeType == XML_ELEMENT_NODE &&
+ $first_right->nodeName eq 'br') {
+ $right->removeChild($first_right);
+ }
} else {
fix_paragraphs_inside($n, $all_block);
$replacement->appendChild($n);
@@ -2268,6 +2324,33 @@ sub fix_empty_lc_elements {
}
}
}
+
+# remove consecutive empty paragraphs (they will not show anyway)
+sub reduce_empty_p {
+ my ($node) = @_;
+ my $next;
+ for (my $child=$node->firstChild; defined $child; $child=$next) {
+ $next = $child->nextSibling;
+ while (defined $next && $next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^[ \t\f\n\r]*$/) {
+ $next = $next->nextSibling;
+ }
+ if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p' && defined $next &&
+ $next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'p') {
+ my $first = $child->firstChild;
+ if (!defined $first || (!defined $first->nextSibling &&
+ $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) {
+ $first = $next->firstChild;
+ if (!defined $first || (!defined $first->nextSibling &&
+ $first->nodeType == XML_TEXT_NODE && $first->nodeValue =~ /^[ \t\f\n\r]*$/)) {
+ $node->removeChild($child);
+ }
+ }
+ }
+ if ($child->nodeType == XML_ELEMENT_NODE) {
+ reduce_empty_p($child);
+ }
+ }
+}
# turn some attribute values into lowercase when they should be
sub lowercase_attribute_values {