Annotation of loncom/homework/cleanxml/post_xml.pm, revision 1.1

1.1     ! damieng     1: # The LearningOnline Network
        !             2: # Third step to clean a file.
        !             3: #
        !             4: # $Id$
        !             5: #
        !             6: # Copyright Michigan State University Board of Trustees
        !             7: #
        !             8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
        !             9: #
        !            10: # LON-CAPA is free software; you can redistribute it and/or modify
        !            11: # it under the terms of the GNU General Public License as published by
        !            12: # the Free Software Foundation; either version 2 of the License, or
        !            13: # (at your option) any later version.
        !            14: #
        !            15: # LON-CAPA is distributed in the hope that it will be useful,
        !            16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            18: # GNU General Public License for more details.
        !            19: #
        !            20: # You should have received a copy of the GNU General Public License
        !            21: # along with LON-CAPA; if not, write to the Free Software
        !            22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
        !            23: #
        !            24: # /home/httpd/html/adm/gpl.txt
        !            25: #
        !            26: # http://www.lon-capa.org/
        !            27: #
        !            28: ###
        !            29: 
        !            30: #!/usr/bin/perl
        !            31: 
        !            32: package Apache::post_xml;
        !            33: 
        !            34: use strict;
        !            35: use utf8;
        !            36: use warnings;
        !            37: 
        !            38: use File::Basename;
        !            39: use File::Temp qw/ tempfile /;
        !            40: use Cwd 'abs_path';
        !            41: use XML::LibXML;
        !            42: use HTML::TokeParser; # used to parse sty files
        !            43: use Tie::IxHash; # for ordered hashes
        !            44: 
        !            45: use Env qw(RES_DIR); # path of res directory parent (without the / at the end)
        !            46: 
        !            47: no warnings 'recursion'; # yes, fix_paragraph is using heavy recursion, I know
        !            48: 
        !            49: # these are constants
        !            50: my @block_elements = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm');
        !            51: my @inline_like_block = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); # inline elements treated like blocks for pretty print and some other things
        !            52: my @responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse','essayresponse','radiobuttonresponse','matchresponse','rankresponse','imageresponse','functionplotresponse');
        !            53: my @block_html = ('html','head','body','section','h1','h2','h3','h4','h5','h6','div','p','ul','ol','li','table','tbody','tr','td','th','dl','dt','dd','pre','noscript','hr','address','blockquote','object','applet','embed','map','form','fieldset','iframe','center','frameset');
        !            54: my @no_newline_inside = ('import','parserlib','scriptlib','data','function','label','xlabel','ylabel','tic','text','rectangle','image','title','h1','h2','h3','h4','h5','h6','li','td','p');
        !            55: my @preserve_elements = ('script','answer','pre');
        !            56: my @accepting_style = ('section','h1','h2','h3','h4','h5','h6','div','p','li','td','th','dt','dd','pre','blockquote');
        !            57: my @latex_math = ('\alpha', '\theta', '\omicron', '\tau', '\beta', '\vartheta', '\pi', '\upsilon', '\gamma', '\gamma', '\varpi', '\phi', '\delta', '\kappa', '\rho', '\varphi', '\epsilon', '\lambda', '\varrho', '\chi', '\varepsilon', '\mu', '\sigma', '\psi', '\zeta', '\nu', '\varsigma', '\omega', '\eta', '\xi',
        !            58:   '\Gamma', '\Lambda', '\Sigma', '\Psi', '\Delta', '\Xi', '\Upsilon', '\Omega', '\Theta', '\Pi', '\Phi',
        !            59:   '\pm', '\cap', '\diamond', '\oplus', '\mp', '\cup', '\bigtriangleup', '\ominus', '\times', '\uplus', '\bigtriangledown', '\otimes', '\div', '\sqcap', '\triangleleft', '\oslash', '\ast', '\sqcup', '\triangleright', '\odot', '\star', '\vee', '\lhd$', '\bigcirc', '\circ', '\wedge', '\rhd$', '\dagger', '\bullet', '\setminus', '\unlhd$', '\ddagger', '\cdot', '\wr', '\unrhd$', '\amalg', '+', '-',
        !            60:   '\leq', '\geq', '\equiv', '\models', '\prec', '\succ', '\sim', '\perp', '\preceq', '\succeq', '\simeq', '\mid', '\ll', '\gg', '\asymp', '\parallel', '\subset', '\supset', '\approx', '\bowtie', '\subseteq', '\supseteq', '\cong', '\Join$', '\sqsubset$', '\sqsupset$', '\neq', '\smile', '\sqsubseteq', '\sqsupseteq', '\doteq', '\frown', '\in', '\ni', '\propto', '\vdash', '\dashv',
        !            61:   '\colon', '\ldotp', '\cdotp',
        !            62:   '\leftarrow', '\longleftarrow', '\uparrow', '\Leftarrow', '\Longleftarrow', '\Uparrow', '\rightarrow', '\longrightarrow', '\downarrow', '\Rightarrow', '\Longrightarrow', '\Downarrow', '\leftrightarrow', '\longleftrightarrow', '\updownarrow', '\Leftrightarrow', '\Longleftrightarrow', '\Updownarrow', '\mapsto', '\longmapsto', '\nearrow', '\hookleftarrow', '\hookrightarrow', '\searrow', '\leftharpoonup', '\rightharpoonup', '\swarrow', '\leftharpoondown', '\rightharpoondown', '\nwarrow', '\rightleftharpoons', '\leadsto$',
        !            63:   '\ldots', '\cdots', '\vdots', '\ddots', '\aleph', '\prime', '\forall', '\infty', '\hbar', '\emptyset', '\exists', '\Box$', '\imath', '\nabla', '\neg', '\Diamond$', '\jmath', '\surd', '\flat', '\triangle', '\ell', '\top', '\natural', '\clubsuit', '\wp', '\bot', '\sharp', '\diamondsuit', '\Re', '\|', '\backslash', '\heartsuit', '\Im', '\angle', '\partial', '\spadesuit', '\mho$',
        !            64:   '\sum', '\bigcap', '\bigodot', '\prod', '\bigcup', '\bigotimes', '\coprod', '\bigsqcup', '\bigoplus', '\int', '\bigvee', '\biguplus', '\oint', '\bigwedge',
        !            65:   '\arccos', '\cos', '\csc', '\exp', '\ker', '\limsup', '\min', '\sinh', '\arcsin', '\cosh', '\deg', '\gcd', '\lg', '\ln', '\Pr', '\sup', '\arctan', '\cot', '\det', '\hom', '\lim', '\log', '\sec', '\tan', '\arg', '\coth', '\dim', '\inf', '\liminf', '\max', '\sin', '\tanh',
        !            66:   '\uparrow', '\Uparrow', '\downarrow', '\Downarrow', '\updownarrow', '\Updownarrow', '\lfloor', '\rfloor', '\lceil', '\rceil', '\langle', '\rangle', '\backslash',
        !            67:   '\rmoustache', '\lmoustache', '\rgroup', '\lgroup', '\arrowvert', '\Arrowvert', '\bracevert',
        !            68:   '\hat{', '\acute{', '\bar{', '\dot{', '\breve{', '\check{', '\grave{', '\vec{', '\ddot{', '\tilde{',
        !            69:   '\widetilde{', '\widehat{', '\overleftarrow{', '\overrightarrow{', '\overline{', '\underline{', '\overbrace{', '\underbrace{', '\sqrt{', '\sqrt[', '\frac{'
        !            70: );
        !            71: # list of elements that can contain style elements:
        !            72: my @containing_styles = ('library','problem',@responses,'foil','item','text','hintgroup','hintpart','label','part','preduedate','postanswerdate','solved','notsolved','block','while','web','standalone','problemtype','languageblock','translated','lang','window','windowlink','togglebox','instructorcomment','body','section','div','p','li','dd','td','th','blockquote','object','applet','video','audio','canvas','fieldset','button',
        !            73: 'span','strong','em','b','i','sup','sub','code','kbd','samp','tt','ins','del','var','small','big','u','font');
        !            74: my @html_styles = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'tt', 'var', 'small', 'big', 'u');
        !            75: 
        !            76: my $warnings; # 1 = print warnings
        !            77: 
        !            78: 
        !            79: # Parses the XML document and fixes many things to turn it into a document matching the schema.
        !            80: # Returns the text of the document as a byte string.
        !            81: sub post_xml {
        !            82:   my ($textref, $file_path, $warn) = @_;
        !            83:   $warnings = $warn;
        !            84:   
        !            85:   my $dom_doc = XML::LibXML->load_xml(string => $textref);
        !            86: 
        !            87:   my $root = fix_structure($dom_doc);
        !            88: 
        !            89:   remove_elements($root, ['startouttext','startoutext','startottext','startouttex','startouttect','atartouttext','starouttext','starttextout','starttext','starttextarea','endouttext','endoutext','endoutttext','endouttxt','endouutext','ednouttext','endouttex','endoouttext','endouttest','endtextout','endtextarea','startpartmarker','endpartmarker','basefont','x-claris-tagview','x-claris-window','x-sas-window']);
        !            90:   
        !            91:   remove_empty_attributes($root);
        !            92:   
        !            93:   fix_attribute_case($root);
        !            94:   
        !            95:   my $fix_by_hand = replace_m($root);
        !            96:   
        !            97:   my @all_block = (@block_elements, @block_html);
        !            98:   add_sty_blocks($file_path, $root, \@all_block); # must come before the subs using @all_block
        !            99: 
        !           100:   fix_block_styles($root, \@all_block);
        !           101:   $root->normalize();
        !           102:   
        !           103:   fix_fonts($root, \@all_block);
        !           104:   
        !           105:   replace_u($root);
        !           106: 
        !           107:   remove_bad_cdata_sections($root);
        !           108:   
        !           109:   add_cdata_sections($root);
        !           110:   
        !           111:   fix_style_element($root);
        !           112:   
        !           113:   fix_tables($root);
        !           114: 
        !           115:   fix_lists($root);
        !           116:   
        !           117:   fix_wrong_name_for_img($root); # should be before replace_deprecated_attributes_by_css
        !           118: 
        !           119:   replace_deprecated_attributes_by_css($root);
        !           120:   
        !           121:   replace_center($root, \@all_block); # must come after replace_deprecated_attributes_by_css
        !           122:   
        !           123:   replace_nobr($root);
        !           124:   
        !           125:   remove_useless_notsolved($root);
        !           126:   
        !           127:   fix_paragraphs_inside($root, \@all_block);
        !           128: 
        !           129:   remove_empty_style($root);
        !           130:   
        !           131:   fix_empty_lc_elements($root);
        !           132:   
        !           133:   lowercase_attribute_values($root);
        !           134:   
        !           135:   replace_numericalresponse_unit_attribute($root);
        !           136:   
        !           137:   replace_functions_by_elements($root);
        !           138:   
        !           139:   pretty($root, \@all_block);
        !           140: 
        !           141:   replace_tm_dtm($root);
        !           142:   
        !           143:   if ($fix_by_hand) {
        !           144:     die "The file has been converted but it should be fixed by hand.";
        !           145:   }
        !           146:   return $dom_doc->toString(); # byte string !
        !           147: }
        !           148: 
        !           149: sub fix_structure {
        !           150:   my ($doc) = @_;
        !           151:   # the root element has already been added in pre_xml
        !           152:   my $root = $doc->documentElement;
        !           153:   # inside the root, replace html, problem and library elements by their content
        !           154:   my @toreplace = ('html','problem','library');
        !           155:   foreach my $name (@toreplace) {
        !           156:     my @elements = $root->getElementsByTagName($name);
        !           157:     foreach my $element (@elements) {
        !           158:       replace_by_children($element);
        !           159:     }
        !           160:   }
        !           161:   # insert all link and style elements inside a new head element
        !           162:   my $current_node = undef;
        !           163:   my @heads = $doc->getElementsByTagName('head');
        !           164:   my @links = $doc->getElementsByTagName('link');
        !           165:   my @styles = $doc->getElementsByTagName('style');
        !           166:   my @titles = $doc->getElementsByTagName('title');
        !           167:   if (scalar(@titles) > 0) {
        !           168:     # NOTE: there is a title element in gnuplot, not to be confused with the one inside HTML head
        !           169:     for (my $i=0; $i<scalar(@titles); $i++) {
        !           170:       my $title = $titles[$i];
        !           171:       my $found_gnuplot = 0;
        !           172:       my $ancestor = $title->parentNode;
        !           173:       while (defined $ancestor) {
        !           174:         if ($ancestor->nodeName eq 'gnuplot') {
        !           175:           $found_gnuplot = 1;
        !           176:           last;
        !           177:         }
        !           178:         $ancestor = $ancestor->parentNode;
        !           179:       }
        !           180:       if ($found_gnuplot) {
        !           181:         splice(@titles, $i, 1);
        !           182:         $i--;
        !           183:       }
        !           184:     }
        !           185:   }
        !           186:   if (scalar(@heads) > 0 || scalar(@titles) > 0 || scalar(@links) > 0 || scalar(@styles) > 0) {
        !           187:     my $htmlhead = $doc->createElement('head');
        !           188:     foreach my $head (@heads) {
        !           189:       my $next;
        !           190:       for (my $child=$head->firstChild; defined $child; $child=$next) {
        !           191:         $next = $child->nextSibling;
        !           192:         $head->removeChild($child);
        !           193:         if ($child->nodeType != XML_ELEMENT_NODE ||
        !           194:             string_in_array(['title','script','style','meta','link','import','base'], $child->nodeName)) {
        !           195:           $htmlhead->appendChild($child);
        !           196:         } else {
        !           197:           # this should not be in head
        !           198:           insert_after_or_first($root, $child, $current_node);
        !           199:         }
        !           200:       }
        !           201:       $head->parentNode->removeChild($head);
        !           202:     }
        !           203:     foreach my $child (@titles, @links, @styles) {
        !           204:       $child->parentNode->removeChild($child);
        !           205:       $htmlhead->appendChild($child);
        !           206:     }
        !           207:     insert_after_or_first($root, $htmlhead, $current_node);
        !           208:     $current_node = $htmlhead;
        !           209:   }
        !           210:   # body
        !           211:   my $htmlbody = undef;
        !           212:   my @bodies = $doc->getElementsByTagName('body');
        !           213:   if (scalar(@bodies) > 0) {
        !           214:     # TODO: fix content and position of body elements
        !           215:     if ($root->nodeName eq 'problem') {
        !           216:       foreach my $body (@bodies) {
        !           217:         replace_by_children($body);
        !           218:       }
        !           219:     }
        !           220:   }
        !           221:   # add all the meta elements afterwards when they are LON-CAPA meta. Remove all HTML meta.
        !           222:   my @meta_names = ('abstract','author','authorspace','avetries','avetries_list','clear','comefrom','comefrom_list','copyright','correct','count','course','course_list','courserestricted','creationdate','dependencies','depth','difficulty','difficulty_list','disc','disc_list','domain','end','field','firstname','generation','goto','goto_list','groupname','helpful','highestgradelevel','hostname','id','keynum','keywords','language','lastname','lastrevisiondate','lowestgradelevel','middlename','mime','modifyinguser','notes','owner','permanentemail','scope','sequsage','sequsage_list','standards','start','stdno','stdno_list','subject','technical','title','url','username','value','version');
        !           223:   my @metas = $doc->getElementsByTagName('meta');
        !           224:   foreach my $meta (@metas) {
        !           225:     $meta->parentNode->removeChild($meta);
        !           226:     my $name = $meta->getAttribute('name');
        !           227:     my $content = $meta->getAttribute('content');
        !           228:     if (defined $name && defined $content && string_in_array(\@meta_names, lc($name))) {
        !           229:       my $lcmeta = $doc->createElement('meta');
        !           230:       $lcmeta->setAttribute('name', lc($name));
        !           231:       $lcmeta->setAttribute('content', $content);
        !           232:       insert_after_or_first($root, $lcmeta, $current_node);
        !           233:       $current_node = $lcmeta;
        !           234:     }
        !           235:   }
        !           236:   return($root);
        !           237: }
        !           238: 
        !           239: # insert the new child under parent after the reference child, or as the first child if the reference child is not defined
        !           240: sub insert_after_or_first {
        !           241:   my ($parent, $newchild, $refchild) = @_;
        !           242:   if (defined $refchild) {
        !           243:     $parent->insertAfter($newchild, $refchild);
        !           244:   } elsif (defined $parent->firstChild) {
        !           245:     $parent->insertBefore($newchild, $parent->firstChild);
        !           246:   } else {
        !           247:     $parent->appendChild($newchild);
        !           248:   }
        !           249: }
        !           250: 
        !           251: # removes all elements with given names inside the node, but keep the content
        !           252: sub remove_elements {
        !           253:   my ($node, $to_remove) = @_;
        !           254:   my $nextChild;
        !           255:   for (my $child=$node->firstChild; defined $child; $child=$nextChild) {
        !           256:     $nextChild = $child->nextSibling;
        !           257:     my $type = $node->nodeType;
        !           258:     if ($type == XML_ELEMENT_NODE) {
        !           259:       if (string_in_array($to_remove, $child->nodeName)) {
        !           260:         my $first_non_white = $child->firstChild;
        !           261:         if (defined $first_non_white && $first_non_white->nodeType == XML_TEXT_NODE &&
        !           262:             $first_non_white->nodeValue =~ /^\s*$/) {
        !           263:           $first_non_white = $first_non_white->nextSibling;
        !           264:         }
        !           265:         if (defined $first_non_white) {
        !           266:           $nextChild = $first_non_white;
        !           267:           replace_by_children($child);
        !           268:         } else {
        !           269:           $node->removeChild($child);
        !           270:         }
        !           271:       } else {
        !           272:         remove_elements($child, $to_remove);
        !           273:       }
        !           274:     }
        !           275:   }
        !           276: }
        !           277: 
        !           278: # removes some attributes that have an invalid empty value
        !           279: sub remove_empty_attributes {
        !           280:   my ($root) = @_;
        !           281:   my $doc = $root->ownerDocument;
        !           282:   # this list is based on validation errors in the MSU subset (it could be more complete if it was based on the schema)
        !           283:   my @attributes = (
        !           284:     ['curve', ['pointsize']],
        !           285:     ['foil', ['location']],
        !           286:     ['foilgroup', ['checkboxoptions', 'options', 'texoptions']],
        !           287:     ['gnuplot', ['pattern', 'texwidth']],
        !           288:     ['img', ['height', 'texheight', 'texwidth', 'texwrap', 'width']],
        !           289:     ['import', ['importmode']],
        !           290:     ['optionresponse', ['max']],
        !           291:     ['organicstructure', ['options']],
        !           292:     ['radiobuttonresponse', ['max']],
        !           293:     ['randomlabel', ['height', 'texwidth', 'width']],
        !           294:     ['stringresponse', ['type']],
        !           295:     ['textline', ['size']],
        !           296:   );
        !           297:   foreach my $element_attributes (@attributes) {
        !           298:     my $element_name = $element_attributes->[0];
        !           299:     my $attribute_names = $element_attributes->[1];
        !           300:     my @elements = $doc->getElementsByTagName($element_name);
        !           301:     foreach my $element (@elements) {
        !           302:       foreach my $attribute_name (@$attribute_names) {
        !           303:         my $value = $element->getAttribute($attribute_name);
        !           304:         if (defined $value && $value =~ /^\s*$/) {
        !           305:           $element->removeAttribute($attribute_name);
        !           306:         }
        !           307:       }
        !           308:     }
        !           309:   }
        !           310: }
        !           311: 
        !           312: # fixes the case for a few attributes that are not all lowercase
        !           313: # (the HTML parser used in html_to_xml turns everything lowercase, which is a good thing in general)
        !           314: sub fix_attribute_case {
        !           315:   my ($root) = @_;
        !           316:   my $doc = $root->ownerDocument;
        !           317:   my @attributes = (
        !           318:     ['labelgroup', ['TeXsize']],
        !           319:     ['h1', ['TeXsize']],
        !           320:     ['h2', ['TeXsize']],
        !           321:     ['h3', ['TeXsize']],
        !           322:     ['h4', ['TeXsize']],
        !           323:     ['h5', ['TeXsize']],
        !           324:     ['h6', ['TeXsize']],
        !           325:     # font and basefont have a TeXsize but will be removed
        !           326:     ['optionresponse', ['TeXlayout']],
        !           327:     ['itemgroup', ['TeXitemgroupwidth']],
        !           328:     ['Task', ['OptionalRequired']],
        !           329:     ['Question', ['OptionalRequired','Mandatory']],
        !           330:     ['Instance', ['OptionalRequired','Disabled']],
        !           331:     ['Criteria', ['Mandatory']],
        !           332:     ['table', ['TeXwidth','TeXtheme']],
        !           333:     ['td', ['TeXwidth']],
        !           334:     ['th', ['TeXwidth']],
        !           335:     ['img', ['TeXwidth','TeXheight','TeXwrap']],
        !           336:   );
        !           337:   foreach my $element_attributes (@attributes) {
        !           338:     my $element_name = $element_attributes->[0];
        !           339:     my $attribute_names = $element_attributes->[1];
        !           340:     my @elements = $doc->getElementsByTagName($element_name);
        !           341:     foreach my $element (@elements) {
        !           342:       foreach my $attribute_name (@$attribute_names) {
        !           343:         my $value = $element->getAttribute(lc($attribute_name));
        !           344:         if (defined $value) {
        !           345:           $element->removeAttribute(lc($attribute_name));
        !           346:           $element->setAttribute($attribute_name, $value);
        !           347:         }
        !           348:       }
        !           349:     }
        !           350:   }
        !           351: }
        !           352: 
        !           353: # Replaces m by HTML, tm and/or dtm (which will be replaced by <m> later, but they are useful
        !           354: #   to know if the element is a block element or not).
        !           355: # m might contain non-math LaTeX, while tm and dtm may only contain math.
        !           356: # Returns 1 if the file should be fixed by hand, 0 otherwise.
        !           357: sub replace_m {
        !           358:   my ($root) = @_;
        !           359:   my $doc = $root->ownerDocument;
        !           360:   my $fix_by_hand = 0;
        !           361:   # search for variable declarations
        !           362:   my @variables = ();
        !           363:   my @scripts = $root->getElementsByTagName('script');
        !           364:   foreach my $script (@scripts) {
        !           365:     my $type = $script->getAttribute('type');
        !           366:     if (defined $type && $type eq 'loncapa/perl') {
        !           367:       if (defined $script->firstChild && $script->firstChild->nodeType == XML_TEXT_NODE) {
        !           368:         my $text = $script->firstChild->nodeValue;
        !           369:         # NOTE: we are not interested in replacing "@value", only "$value"
        !           370:         # this regexp is for "  $a = ..." and "  $a[...] = ..."
        !           371:         while ($text =~ /^[ \t]*\$([a-zA-Z_0-9]+)(?:\[[^\]]+\])?[ \t]*=/gm) {
        !           372:           if (!string_in_array(\@variables, $1)) {
        !           373:             push(@variables, $1);
        !           374:           }
        !           375:         }
        !           376:         # this regexp is for "...;  $a = ..." and "...;  $a[...] = ..."
        !           377:         while ($text =~ /^[^'"\/;]+;[ \t]*\$([a-zA-Z_0-9]+)(?:\[[^\]]+\])?[ \t]*=/gm) {
        !           378:           if (!string_in_array(\@variables, $1)) {
        !           379:             push(@variables, $1);
        !           380:           }
        !           381:         }
        !           382:         # this regexp is for "  @a = ..."
        !           383:         while ($text =~ /^[ \t]*\@([a-zA-Z_0-9]+)[ \t]*=/gm) {
        !           384:           if (!string_in_array(\@variables, $1)) {
        !           385:             push(@variables, $1);
        !           386:           }
        !           387:         }
        !           388:         # this regexp is for "  ($a, $b, $c) = ..."
        !           389:         my @matches = ($text =~ /^[ \t]*\([ \t]*\$([a-zA-Z_0-9]+)(?:[ \t]*,[ \t]*\$([a-zA-Z_0-9]+))*[ \t]*\)[ \t]*=/gm);
        !           390:         foreach my $match (@matches) {
        !           391:           if (!defined $match) {
        !           392:             next; # not sure why it happens, but it does
        !           393:           }
        !           394:           if (!string_in_array(\@variables, $match)) {
        !           395:             push(@variables, $match);
        !           396:           }
        !           397:         }
        !           398:         # and this one is for "push @a"
        !           399:         while ($text =~ /^[ \t]*push @([a-zA-Z_0-9]+)[ \t,]*/gm) {
        !           400:           if (!string_in_array(\@variables, $1)) {
        !           401:             push(@variables, $1);
        !           402:           }
        !           403:         }
        !           404:         # use the opportunity to report usage of <m> in Perl scripts
        !           405:         if ($text =~ /^[^#].*<m[ >]/m) {
        !           406:           if ($warnings) {
        !           407:             print "WARNING: <m> is used in a script, it should be converted by hand\n";
        !           408:           }
        !           409:           $fix_by_hand = 1;
        !           410:         }
        !           411:       }
        !           412:     }
        !           413:   }
        !           414:   my @ms = $root->getElementsByTagName('m');
        !           415:   foreach my $m (@ms) {
        !           416:     if (!defined $m->firstChild) {
        !           417:       $m->parentNode->removeChild($m);
        !           418:       next;
        !           419:     }
        !           420:     if (defined $m->firstChild->nextSibling || $m->firstChild->nodeType != XML_TEXT_NODE) {
        !           421:       if ($warnings) {
        !           422:         print "WARNING: m value is not simple text\n";
        !           423:       }
        !           424:       $fix_by_hand = 1;
        !           425:       next;
        !           426:     }
        !           427:     my $text = $m->firstChild->nodeValue;
        !           428:     my $text_before_variable_replacement = $text;
        !           429:     my $var_key1 = 'dfhg3df54hg65hg4';
        !           430:     my $var_key2 = 'dfhg654d6f5g4h5f';
        !           431:     my $eval = defined $m->getAttribute('eval') && $m->getAttribute('eval') eq 'on';
        !           432:     if ($eval) {
        !           433:       # replace variables
        !           434:       foreach my $variable (@variables) {
        !           435:         my $replacement = $var_key1.$variable.$var_key2;
        !           436:         $text =~ s/\$$variable(?![a-zA-Z])/$replacement/ge;
        !           437:         $text =~ s/\$\{$variable\}/$replacement/ge;
        !           438:       }
        !           439:     }
        !           440:     # check if the expression is enclosed in math separators: $ $$ \( \) \[ \]
        !           441:     # if so, replace the whole node by dtm or tm
        !           442:     my $new_text;
        !           443:     my $new_node_name;
        !           444:     if ($text =~ /^\s*\$\$([^\$]*)\$\$\s*$/) {
        !           445:       $new_node_name = 'dtm';
        !           446:       $new_text = $1;
        !           447:     } elsif ($text =~ /^\s*\\\[(.*)\\\]\s*$/s) {
        !           448:       $new_node_name = 'dtm';
        !           449:       $new_text = $1;
        !           450:     } elsif ($text =~ /^\s*\$([^\$]*)\$\s*$/) {
        !           451:       $new_node_name = 'tm';
        !           452:       $new_text = $1;
        !           453:     } elsif ($text =~ /^\s*\\\((.*)\\\)\s*$/s) {
        !           454:       $new_node_name = 'tm';
        !           455:       $new_text = $1;
        !           456:     }
        !           457:     if (defined $new_node_name) {
        !           458:       if ($eval) {
        !           459:         foreach my $variable (@variables) {
        !           460:           my $replacement = $var_key1.$variable.$var_key2;
        !           461:           $new_text =~ s/$replacement([a-zA-Z])/\${$variable}$1/g;
        !           462:           $new_text =~ s/$replacement/\$$variable/g;
        !           463:         }
        !           464:       }
        !           465:       my $new_node = $doc->createElement($new_node_name);
        !           466:       if ($eval) {
        !           467:         $new_node->setAttribute('eval', 'on');
        !           468:       }
        !           469:       $new_node->appendChild($doc->createTextNode($new_text));
        !           470:       $m->parentNode->replaceChild($new_node, $m);
        !           471:       next;
        !           472:     }
        !           473:     if ($text !~ /\$|\\\(|\\\)|\\\[|\\\]/) {
        !           474:       # there are no math separators inside
        !           475:       # try to guess if this is meant as math
        !           476:       my $found_math = 0;
        !           477:       foreach my $symbol (@latex_math) {
        !           478:         if (index($text, $symbol) != -1) {
        !           479:           $found_math = 1;
        !           480:           last;
        !           481:         }
        !           482:       }
        !           483:       if ($found_math) {
        !           484:         # interpret the whole text as LaTeX inline math
        !           485:         my $new_node = $doc->createElement('tm');
        !           486:         if ($eval) {
        !           487:           $new_node->setAttribute('eval', 'on');
        !           488:         }
        !           489:         $new_node->appendChild($doc->createTextNode($text_before_variable_replacement));
        !           490:         $m->parentNode->replaceChild($new_node, $m);
        !           491:         next;
        !           492:       }
        !           493:       # no math symbol found, we will convert the text with tth
        !           494:     }
        !           495:     
        !           496:     # there are math separators inside, even after hiding variables, or there was no math symbol
        !           497:     
        !           498:     # hide math parts inside before running tth
        !           499:     my $math_key1 = '#ghjgdh5hg45gf';
        !           500:     my $math_key2 = '#';
        !           501:     my @maths = ();
        !           502:     my @separators = (['$$','$$'], ['\\(','\\)'], ['\\[','\\]'], ['$','$']);
        !           503:     foreach my $seps (@separators) {
        !           504:       my $sep1 = $seps->[0];
        !           505:       my $sep2 = $seps->[1];
        !           506:       my $pos1 = index($text, $sep1);
        !           507:       if ($pos1 == -1) {
        !           508:         next;
        !           509:       }
        !           510:       my $pos2 = index($text, $sep2, $pos1+length($sep1));
        !           511:       while ($pos1 != -1 && $pos2 != -1) {
        !           512:         my $replace = substr($text, $pos1, $pos2+length($sep2)-$pos1);
        !           513:         push(@maths, $replace);
        !           514:         my $by = $math_key1.scalar(@maths).$math_key2;
        !           515:         $text = substr($text, 0, $pos1).$by.substr($text, $pos2+length($sep2));
        !           516:         $pos1 = index($text, $sep1);
        !           517:         if ($pos1 != -1) {
        !           518:           $pos2 = index($text, $sep2, $pos1+length($sep1));
        !           519:         }
        !           520:       }
        !           521:     }
        !           522:     # get HTML as text from tth
        !           523:     my $html_text = tth($text);
        !           524:     # replace math by replacements
        !           525:     for (my $i=0; $i < scalar(@maths); $i++) {
        !           526:       my $math = $maths[$i];
        !           527:       $math =~ s/&/&amp;/g;
        !           528:       $math =~ s/</&lt;/g;
        !           529:       $math =~ s/>/&gt;/g;
        !           530:       if ($math =~ /^\$\$(.*)\$\$$/s) {
        !           531:         $math = '<dtm>'.$1.'</dtm>';
        !           532:       } elsif ($math =~ /^\\\[(.*)\\\]$/s) {
        !           533:         $math = '<dtm>'.$1.'</dtm>';
        !           534:       } elsif ($math =~ /^\\\((.*)\\\)$/s) {
        !           535:         $math = '<tm>'.$1.'</tm>';
        !           536:       } elsif ($math =~ /^\$(.*)\$$/s) {
        !           537:         $math = '<tm>'.$1.'</tm>';
        !           538:       }
        !           539:       my $replace = $math_key1.($i+1).$math_key2;
        !           540:       $html_text =~ s/$replace/$math/;
        !           541:     }
        !           542:     # replace variables if necessary
        !           543:     if ($eval) {
        !           544:       foreach my $variable (@variables) {
        !           545:         my $replacement = $var_key1.$variable.$var_key2;
        !           546:         $html_text =~ s/$replacement([a-zA-Z])/\${$variable}$1/g;
        !           547:         $html_text =~ s/$replacement/\$$variable/g;
        !           548:       }
        !           549:     }
        !           550:     my $fragment = html_to_dom($html_text);
        !           551:     $doc->adoptNode($fragment);
        !           552:     $m->parentNode->replaceChild($fragment, $m);
        !           553:     
        !           554:   }
        !           555:   return $fix_by_hand;
        !           556: }
        !           557: 
        !           558: # Returns the HTML equivalent of LaTeX input, using tth
        !           559: sub tth {
        !           560:   my ($text) = @_;
        !           561:   my ($fh, $tmp_path) = tempfile();
        !           562:   binmode($fh, ':utf8');
        !           563:   print $fh $text;
        !           564:   close $fh;
        !           565:   my $output = `tth -r -w2 -u -y0 < $tmp_path 2>/dev/null`;
        !           566:   # hopefully the temp file will not be removed before this point (otherwise we should use unlink_on_destroy 0)
        !           567:   $output =~ s/^\s*|\s*$//;
        !           568:   $output =~ s/<div class="p"><!----><\/div>/<br\/>/; # why is tth using such ugly markup for \newline ?
        !           569:   return $output;
        !           570: }
        !           571: 
        !           572: # transform simple HTML into a DOM fragment (which will need to be adopted by the document)
        !           573: sub html_to_dom {
        !           574:   my ($text) = @_;
        !           575:   $text = '<root>'.$text.'</root>';
        !           576:   my $textref = html_to_xml::html_to_xml(\$text);
        !           577:   utf8::upgrade($$textref); # otherwise the XML parser fails when the HTML parser turns &nbsp; into a character
        !           578:   my $dom_doc = XML::LibXML->load_xml(string => $textref);
        !           579:   my $root = $dom_doc->documentElement;
        !           580:   remove_empty_style($root);
        !           581:   my $fragment = $dom_doc->createDocumentFragment();
        !           582:   my $next;
        !           583:   for (my $n=$root->firstChild; defined $n; $n=$next) {
        !           584:     $next = $n->nextSibling;
        !           585:     $root->removeChild($n);
        !           586:     $fragment->appendChild($n);
        !           587:   }
        !           588:   return($fragment);
        !           589: }
        !           590: 
        !           591: # Use the linked sty files to guess which newly defined elements should be considered blocks.
        !           592: # Also adds to @containing_styles the sty elements that contain styles.
        !           593: # @param {string} fn - the file path (we only extract the directory path from it)
        !           594: sub add_sty_blocks {
        !           595:   my ($fn, $root, $all_block) = @_;
        !           596:   my $doc = $root->ownerDocument;
        !           597:   my @parserlibs = $doc->getElementsByTagName('parserlib');
        !           598:   my @libs = ();
        !           599:   foreach my $parserlib (@parserlibs) {
        !           600:     if (defined $parserlib->firstChild && $parserlib->firstChild->nodeType == XML_TEXT_NODE) {
        !           601:       my $value = $parserlib->firstChild->nodeValue;
        !           602:       $value =~ s/^\s+|\s+$//g;
        !           603:       if ($value ne '') {
        !           604:         push(@libs, $value);
        !           605:       }
        !           606:     }
        !           607:   }
        !           608:   my ($name, $path, $suffix) = fileparse($fn);
        !           609:   foreach my $sty (@libs) {
        !           610:     if (substr($sty, 0, 1) eq '/') {
        !           611:       $sty = $RES_DIR.$sty;
        !           612:     } else {
        !           613:       $sty = $path.$sty;
        !           614:     }
        !           615:     my $new_elements = parse_sty($sty, $all_block);
        !           616:     better_guess($root, $new_elements, $all_block);
        !           617:     my $new_blocks = $new_elements->{'block'};
        !           618:     my $new_inlines = $new_elements->{'inline'};
        !           619:     push(@$all_block, @{$new_blocks});
        !           620:     #push(@inlines, @{$new_inlines}); # we are not using a list of inline elements at this point
        !           621:   }
        !           622: }
        !           623: 
        !           624: ##
        !           625: # Parses a sty file and returns lists of block and inline elements.
        !           626: # @param {string} fn - the file path
        !           627: ##
        !           628: sub parse_sty {
        !           629:   my ($fn, $all_block) = @_;
        !           630:   my @blocks = ();
        !           631:   my @inlines = ();
        !           632:   my $p = HTML::TokeParser->new($fn);
        !           633:   if (! $p) {
        !           634:     die "post_xml.pl: parse_sty: Error reading $fn\n";
        !           635:   }
        !           636:   $p->empty_element_tags(1);
        !           637:   my $in_definetag = 0;
        !           638:   my $in_render = 0;
        !           639:   my %newtags = ();
        !           640:   my $newtag = '';
        !           641:   my $is_block = 0;
        !           642:   while (my $token = $p->get_token) {
        !           643:     if ($token->[0] eq 'S') {
        !           644:       my $tag = lc($token->[1]);
        !           645:       if ($tag eq 'definetag') {
        !           646:         $in_definetag = 1;
        !           647:         $is_block = 0;
        !           648:         my $attributes = $token->[2];
        !           649:         $newtag = $attributes->{'name'};
        !           650:         if (substr($newtag, 0, 1) eq '/') {
        !           651:           $newtag = substr($newtag, 1);
        !           652:         }
        !           653:       } elsif ($in_definetag && $tag eq 'render') {
        !           654:         $in_render = 1;
        !           655:         $is_block = 0;
        !           656:       } elsif ($in_render) {
        !           657:         if (string_in_array($all_block, $tag)) {
        !           658:           $is_block = 1;
        !           659:         }
        !           660:       }
        !           661:     } elsif ($token->[0] eq 'E') {
        !           662:       my $tag = lc($token->[1]);
        !           663:       if ($tag eq 'definetag') {
        !           664:         $in_definetag = 0;
        !           665:         if (defined $newtags{$newtag}) {
        !           666:           $newtags{$newtag} = $newtags{$newtag} || $is_block;
        !           667:         } else {
        !           668:           $newtags{$newtag} = $is_block;
        !           669:         }
        !           670:       } elsif ($in_definetag && $tag eq 'render') {
        !           671:         $in_render = 0;
        !           672:       }
        !           673:     }
        !           674:   }
        !           675:   foreach $newtag (keys(%newtags)) {
        !           676:     if ($newtags{$newtag} == 1) {
        !           677:       push(@blocks, $newtag);
        !           678:     } else {
        !           679:       push(@inlines, $newtag);
        !           680:     }
        !           681:   }
        !           682:   return {'block'=>\@blocks, 'inline'=>\@inlines};
        !           683: }
        !           684: 
        !           685: ##
        !           686: # Marks as block the elements that contain block elements in the input file.
        !           687: # Also adds to @containing_styles the sty elements that contain styles.
        !           688: # @param {string} fn - the file path
        !           689: # @param {Hash<string,Array>} new_elements - contains arrays in 'block' and 'inline'
        !           690: ##
        !           691: sub better_guess {
        !           692:   my ($root, $new_elements, $all_block) = @_;
        !           693:   my $new_blocks = $new_elements->{'block'};
        !           694:   my $new_inlines = $new_elements->{'inline'};
        !           695:   
        !           696:   my @change = (); # change these elements from inline to block
        !           697:   foreach my $tag (@{$new_inlines}) {
        !           698:     my @nodes = $root->getElementsByTagName($tag);
        !           699:     NODE_LOOP: foreach my $node (@nodes) {
        !           700:       for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
        !           701:         if ($child->nodeType == XML_ELEMENT_NODE) {
        !           702:           if (string_in_array($all_block, $child->nodeName) || string_in_array($new_blocks, $child->nodeName)) {
        !           703:             push(@change, $tag);
        !           704:             last NODE_LOOP;
        !           705:           }
        !           706:         }
        !           707:       }
        !           708:     }
        !           709:   }
        !           710:   foreach my $inline (@change) {
        !           711:     my $index = 0;
        !           712:     $index++ until $new_inlines->[$index] eq $inline;
        !           713:     splice(@{$new_inlines}, $index, 1);
        !           714:     push(@{$new_blocks}, $inline);
        !           715:   }
        !           716:   # add to @containing_styles when a style is used inside
        !           717:   # NOTE: some sty elements will be added even though they should not, but if we don't do that
        !           718:   # all style will be removed in the sty elements.
        !           719:   foreach my $tag ((@{$new_blocks}, @{$new_inlines})) {
        !           720:     my @nodes = $root->getElementsByTagName($tag);
        !           721:     NODE_LOOP: foreach my $node (@nodes) {
        !           722:       for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
        !           723:         if ($child->nodeType == XML_ELEMENT_NODE) {
        !           724:           if (string_in_array(\@html_styles, $child->nodeName)) {
        !           725:             push(@containing_styles, $tag);
        !           726:             last NODE_LOOP;
        !           727:           }
        !           728:         }
        !           729:       }
        !           730:     }
        !           731:   }
        !           732: }
        !           733: 
        !           734: # When a style element contains a block, move the style inside the block where it is allowed.
        !           735: # style/block/other -> block/style/other
        !           736: # When a style is used where it is not allowed, move it inside its children or remove it (unless it contains only text)
        !           737: # element_not_containing_styles/style/other -> element_not_containing_styles/other/style (except if other is a style)
        !           738: # The fix is not perfect in the case of element_not_containing_styles/style1/style2/block/text (style1 will be lost):
        !           739: # element_not_containing_styles/style1/style2/block/text -> element_not_containing_styles/block/style2/text
        !           740: # (a solution to this problem would be to merge the styles in a span)
        !           741: # NOTE: .sty defined elements might have been added to @containing_styles by better_guess().
        !           742: sub fix_block_styles {
        !           743:   my ($element, $all_block) = @_;
        !           744:   my $doc = $element->ownerDocument;
        !           745:   if (string_in_array(\@html_styles, $element->nodeName)) {
        !           746:     # move spaces out of the style element
        !           747:     if (defined $element->firstChild && $element->firstChild->nodeType == XML_TEXT_NODE) {
        !           748:       my $child = $element->firstChild;
        !           749:       if ($child->nodeValue =~ /^(\s+)(\S.*)$/s) {
        !           750:         $element->parentNode->insertBefore($doc->createTextNode($1), $element);
        !           751:         $child->setData($2);
        !           752:       }
        !           753:     }
        !           754:     if (defined $element->lastChild && $element->lastChild->nodeType == XML_TEXT_NODE) {
        !           755:       my $child = $element->lastChild;
        !           756:       if ($child->nodeValue =~ /^(.*\S)(\s+)$/s) {
        !           757:         $element->parentNode->insertAfter($doc->createTextNode($2), $element);
        !           758:         $child->setData($1);
        !           759:       }
        !           760:     }
        !           761:     
        !           762:     my $found_block = 0;
        !           763:     for (my $child=$element->firstChild; defined $child; $child=$child->nextSibling) {
        !           764:       if ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
        !           765:         $found_block = 1;
        !           766:         last;
        !           767:       }
        !           768:     }
        !           769:     my $no_style_here = !string_in_array(\@containing_styles, $element->parentNode->nodeName);
        !           770:     if ($found_block || $no_style_here) {
        !           771:       # there is a block or the style is not allowed here,
        !           772:       # the style element has to be replaced by its modified children
        !           773:       my $s; # a clone of the style
        !           774:       my $next;
        !           775:       for (my $child=$element->firstChild; defined $child; $child=$next) {
        !           776:         $next = $child->nextSibling;
        !           777:         if ($child->nodeType == XML_ELEMENT_NODE && (string_in_array($all_block, $child->nodeName) ||
        !           778:             $child->nodeName eq 'br' || $no_style_here)) {
        !           779:           # avoid inverting a style with a style with $no_style_here (that would cause endless recursion)
        !           780:           if (!$no_style_here || (!string_in_array(\@html_styles, $child->nodeName) &&
        !           781:               string_in_array(\@containing_styles, $child->nodeName))) {
        !           782:             # block node or inline node when the style is not allowed:
        !           783:             # move all children inside the style, and make the style the only child
        !           784:             $s = $element->cloneNode();
        !           785:             my $next2;
        !           786:             for (my $child2=$child->firstChild; defined $child2; $child2=$next2) {
        !           787:               $next2 = $child2->nextSibling;
        !           788:               $child->removeChild($child2);
        !           789:               $s->appendChild($child2);
        !           790:             }
        !           791:             $child->appendChild($s);
        !           792:           }
        !           793:           $s = undef;
        !           794:         } elsif (($child->nodeType == XML_TEXT_NODE && $child->nodeValue !~ /^\s*$/) ||
        !           795:             $child->nodeType == XML_ELEMENT_NODE) {
        !           796:           # if the style is allowed, move text and inline nodes inside the style
        !           797:           if (!$no_style_here) {
        !           798:             if (!defined $s) {
        !           799:               $s = $element->cloneNode();
        !           800:               $element->insertBefore($s, $child);
        !           801:             }
        !           802:             $element->removeChild($child);
        !           803:             $s->appendChild($child);
        !           804:           }
        !           805:         } else {
        !           806:           # do not put other nodes inside the style
        !           807:           $s = undef;
        !           808:         }
        !           809:       }
        !           810:       # now replace by children and fix them
        !           811:       my $parent = $element->parentNode;
        !           812:       for (my $child=$element->firstChild; defined $child; $child=$next) {
        !           813:         $next = $child->nextSibling;
        !           814:         $element->removeChild($child);
        !           815:         $parent->insertBefore($child, $element);
        !           816:         if ($child->nodeType == XML_ELEMENT_NODE) {
        !           817:           fix_block_styles($child, $all_block);
        !           818:         }
        !           819:       }
        !           820:       $parent->removeChild($element);
        !           821:       return;
        !           822:     }
        !           823:   }
        !           824:   # otherwise fix all children
        !           825:   my $next;
        !           826:   for (my $child=$element->firstChild; defined $child; $child=$next) {
        !           827:     $next = $child->nextSibling;
        !           828:     if ($child->nodeType == XML_ELEMENT_NODE) {
        !           829:       fix_block_styles($child, $all_block);
        !           830:     }
        !           831:   }
        !           832: }
        !           833: 
        !           834: # removes empty font elements and font elements that contain at least one block element
        !           835: # replaces other font elements by equivalent span
        !           836: sub fix_fonts {
        !           837:   my ($root, $all_block) = @_;
        !           838:   my $doc = $root->ownerDocument;
        !           839:   my @fonts = $root->getElementsByTagName('font');
        !           840:   @fonts = reverse(@fonts); # to deal with the ancestor last in the case of font/font
        !           841:   foreach my $font (@fonts) {
        !           842:     my $block = 0;
        !           843:     for (my $child=$font->firstChild; defined $child; $child=$child->nextSibling) {
        !           844:       if (string_in_array($all_block, $child->nodeName) || string_in_array(\@inline_like_block, $child->nodeName)) {
        !           845:         $block = 1;
        !           846:         last;
        !           847:       }
        !           848:     }
        !           849:     if (!defined $font->firstChild || $block) {
        !           850:       # empty font or font containing block elements
        !           851:       # replace this node by its content
        !           852:       replace_by_children($font);
        !           853:     } else {
        !           854:       # replace by equivalent span
        !           855:       my $color = get_non_empty_attribute($font, 'color');
        !           856:       my $size = get_non_empty_attribute($font, 'size');
        !           857:       my $face = get_non_empty_attribute($font, 'face');
        !           858:       if (defined $face) {
        !           859:         $face =~ s/^,|,$//;
        !           860:       }
        !           861:       if (!defined $color && !defined $size && !defined $face) {
        !           862:         # useless font element: replace this node by its content
        !           863:         replace_by_children($font);
        !           864:         next;
        !           865:       }
        !           866:       my $replacement;
        !           867:       tie (my %properties, 'Tie::IxHash', ());
        !           868:       if (!defined $color && !defined $size && defined $face && lc($face) eq 'symbol') {
        !           869:         $replacement = $doc->createDocumentFragment();
        !           870:       } else {
        !           871:         $replacement = $doc->createElement('span');
        !           872:         my $css = '';
        !           873:         if (defined $color) {
        !           874:           $color =~ s/^x/#/;
        !           875:           $properties{'color'} = $color;
        !           876:         }
        !           877:         if (defined $size) {
        !           878:           my %hash = (
        !           879:             '1' => 'x-small',
        !           880:             '2' => 'small',
        !           881:             '3' => 'medium',
        !           882:             '4' => 'large',
        !           883:             '5' => 'x-large',
        !           884:             '6' => 'xx-large',
        !           885:             '7' => '300%',
        !           886:             '-1' => 'small',
        !           887:             '-2' => 'x-small',
        !           888:             '+1' => 'large',
        !           889:             '+2' => 'x-large',
        !           890:             '+3' => 'xx-large',
        !           891:             '+4' => '300%',
        !           892:           );
        !           893:           my $value = $hash{$size};
        !           894:           if (!defined $value) {
        !           895:             $value = 'medium';
        !           896:           }
        !           897:           $properties{'font-size'} = $value;
        !           898:         }
        !           899:         if (defined $face) {
        !           900:           if (lc($face) ne 'symbol' && lc($face) ne 'bold') {
        !           901:             $properties{'font-family'} = $face;
        !           902:           }
        !           903:         }
        !           904:         set_css_properties($replacement, \%properties);
        !           905:       }
        !           906:       if (defined $face && lc($face) eq 'symbol') {
        !           907:         # convert all content to unicode
        !           908:         my $next;
        !           909:         for (my $child=$font->firstChild; defined $child; $child=$next) {
        !           910:           $next = $child->nextSibling;
        !           911:           if ($child->nodeType == XML_TEXT_NODE) {
        !           912:             my $value = $child->nodeValue;
        !           913:             $value =~ tr/ABGDEZHQIKLMNXOPRSTUFCYWabgdezhqiklmnxoprVstufcywJjv¡«¬®/ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϕϖϒ↔←→/;
        !           914:             $child->setData($value);
        !           915:           }
        !           916:         }
        !           917:       }
        !           918:       # replace the font node
        !           919:       if ($replacement->nodeType == XML_ELEMENT_NODE && !defined $font->previousSibling &&
        !           920:           !defined $font->nextSibling && string_in_array(\@accepting_style, $font->parentNode->nodeName)) {
        !           921:         # use CSS on the parent block and replace font by its children instead of using a new element
        !           922:         set_css_properties($font->parentNode, \%properties);
        !           923:         replace_by_children($font);
        !           924:       } else {
        !           925:         # move all font children inside the replacement (span or fragment)
        !           926:         my $next;
        !           927:         for (my $child=$font->firstChild; defined $child; $child=$next) {
        !           928:           $next = $child->nextSibling;
        !           929:           $font->removeChild($child);
        !           930:           $replacement->appendChild($child);
        !           931:         }
        !           932:         # replace font
        !           933:         $font->parentNode->replaceChild($replacement, $font);
        !           934:       }
        !           935:     }
        !           936:   }
        !           937:   $root->normalize();
        !           938: }
        !           939: 
        !           940: # replaces u by <span style="text-decoration: underline">
        !           941: sub replace_u {
        !           942:   my ($root) = @_;
        !           943:   my $doc = $root->ownerDocument;
        !           944:   my @us = $root->getElementsByTagName('u');
        !           945:   foreach my $u (@us) {
        !           946:     my $span = $doc->createElement('span');
        !           947:     $span->setAttribute('style', 'text-decoration: underline');
        !           948:     my $next;
        !           949:     for (my $child=$u->firstChild; defined $child; $child=$next) {
        !           950:       $next = $child->nextSibling;
        !           951:       $u->removeChild($child);
        !           952:       $span->appendChild($child);
        !           953:     }
        !           954:     $u->parentNode->replaceChild($span, $u);
        !           955:   }
        !           956: }
        !           957: 
        !           958: # removes CDATA sections tags that have not been parsed correcty by the HTML parser
        !           959: # also removes bad comments in script elements
        !           960: sub remove_bad_cdata_sections {
        !           961:   my ($root) = @_;
        !           962:   my $doc = $root->ownerDocument;
        !           963:   foreach my $name (@preserve_elements) {
        !           964:     my @nodes = $root->getElementsByTagName($name);
        !           965:     foreach my $node (@nodes) {
        !           966:       if (defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
        !           967:         my $value = $node->firstChild->nodeValue;
        !           968:         if ($name eq 'script' && (!defined $node->getAttribute('type') || $node->getAttribute('type') ne 'loncapa/perl') &&
        !           969:             !defined $node->firstChild->nextSibling && $value =~ /^(\s*)<!--(.*)-->(\s*)$/) {
        !           970:           # web browsers interpret that as a real comment when it is on 1 line, but the Perl HTML parser thinks it is the script
        !           971:           # -> turning it back into a comment
        !           972:           # (this is only true for Javascript script elements, since LON-CAPA does not parse loncapa/perl scripts in the same way)
        !           973:           $node->removeChild($node->firstChild);
        !           974:           $node->appendChild($doc->createComment($2));
        !           975:           next;
        !           976:         }
        !           977:         # at the beginning:
        !           978:         $value =~ s/^(\s*)<!\[CDATA\[/$1/; # <![CDATA[
        !           979:         $value =~ s/^(\s*)\/\*\s*<!\[CDATA\[\s*\*\//$1/; # /* <![CDATA[ */
        !           980:         $value =~ s/^(\s*)\/\/\s*<!\[CDATA\[/$1/; # // <![CDATA[
        !           981:         $value =~ s/^(\s*)(\/\/)?\s*<!--/$1/; # // <!--
        !           982:         # at the end:
        !           983:         $value =~ s/\/\/\s*\]\]>(\s*)$/$1/; # // ]]>
        !           984:         $value =~ s/\]\]>(\s*)$/$1/; # ]]>
        !           985:         $value =~ s/(\/\/)?\s*-->(\s*)$/$2/; # // -->
        !           986:         $value =~ s/\/\*\s*\]\]>\s*\*\/(\s*)$/$1/; # /* ]]> */
        !           987:         
        !           988:         $value = "\n".$value."\n";
        !           989:         $value =~ s/\s*(\n[ \t]*)/$1/;
        !           990:         $value =~ s/\s+$/\n/;
        !           991:         $node->firstChild->setData($value);
        !           992:       }
        !           993:     }
        !           994:   }
        !           995: }
        !           996: 
        !           997: # adds CDATA sections to scripts
        !           998: sub add_cdata_sections {
        !           999:   my ($root) = @_;
        !          1000:   my $doc = $root->ownerDocument;
        !          1001:   my @scripts = $root->getElementsByTagName('script');
        !          1002:   my @answers = $root->getElementsByTagName('answer');
        !          1003:   foreach my $answer (@answers) {
        !          1004:     my $ancestor = $answer->parentNode;
        !          1005:     my $found_capa_response = 0;
        !          1006:     while (defined $ancestor) {
        !          1007:       if ($ancestor->nodeName eq 'numericalresponse' || $ancestor->nodeName eq 'formularesponse') {
        !          1008:         $found_capa_response = 1;
        !          1009:         last;
        !          1010:       }
        !          1011:       $ancestor = $ancestor->parentNode;
        !          1012:     }
        !          1013:     if (!$found_capa_response) {
        !          1014:       push(@scripts, $answer);
        !          1015:     }
        !          1016:   }
        !          1017:   foreach my $script (@scripts) {
        !          1018:     # use a CDATA section in the normal situation, for any script
        !          1019:     my $first = $script->firstChild;
        !          1020:     if (defined $first && $first->nodeType == XML_TEXT_NODE && !defined $first->nextSibling) {
        !          1021:       my $cdata = $doc->createCDATASection($first->nodeValue);
        !          1022:       $script->replaceChild($cdata, $first);
        !          1023:     }
        !          1024:   }
        !          1025: }
        !          1026: 
        !          1027: # removes "<!--" and "-->" at the beginning and end of style elements
        !          1028: sub fix_style_element {
        !          1029:   my ($root) = @_;
        !          1030:   my @styles = $root->getElementsByTagName('style');
        !          1031:   foreach my $style (@styles) {
        !          1032:     if (defined $style->firstChild && $style->firstChild->nodeType == XML_TEXT_NODE &&
        !          1033:         !defined $style->firstChild->nextSibling) {
        !          1034:       my $text = $style->firstChild->nodeValue;
        !          1035:       if ($text =~ /^\s*<!--(.*)-->\s*$/s) {
        !          1036:         $style->firstChild->setData($1);
        !          1037:       }
        !          1038:     }
        !          1039:   }
        !          1040: }
        !          1041: 
        !          1042: # create missing cells at the end of table rows
        !          1043: sub fix_tables {
        !          1044:   my ($root) = @_;
        !          1045:   my @tables = $root->getElementsByTagName('table');
        !          1046:   foreach my $table (@tables) {
        !          1047:     fix_cells($table);
        !          1048:     foreach my $tbody ($table->getChildrenByTagName('tbody')) {
        !          1049:       fix_cells($tbody);
        !          1050:     }
        !          1051:     foreach my $thead ($table->getChildrenByTagName('thead')) {
        !          1052:       fix_cells($thead);
        !          1053:     }
        !          1054:     foreach my $tfoot ($table->getChildrenByTagName('tfoot')) {
        !          1055:       fix_cells($tfoot);
        !          1056:     }
        !          1057:   }
        !          1058: }
        !          1059: 
        !          1060: # create missing cells at the end of table rows
        !          1061: sub fix_cells {
        !          1062:   my ($table) = @_; # could actually be table, tbody, thead or tfoot
        !          1063:   my $doc = $table->ownerDocument;
        !          1064:   my @nb_cells = ();
        !          1065:   my $max_nb_cells = 0;
        !          1066:   my @rowspans = ();
        !          1067:   my @trs = $table->getChildrenByTagName('tr');
        !          1068:   foreach my $tr (@trs) {
        !          1069:     my $nb_cells;
        !          1070:     if (defined $rowspans[0]) {
        !          1071:       $nb_cells = shift(@rowspans);
        !          1072:     } else {
        !          1073:       $nb_cells = 0;
        !          1074:     }
        !          1075:     for (my $cell=$tr->firstChild; defined $cell; $cell=$cell->nextSibling) {
        !          1076:       if ($cell->nodeName eq 'td' || $cell->nodeName eq 'th') {
        !          1077:         my $colspan = $cell->getAttribute('colspan');
        !          1078:         if (defined $colspan && $colspan =~ /^\s*[0-9]+\s*$/) {
        !          1079:           $nb_cells += $colspan;
        !          1080:         } else {
        !          1081:           $nb_cells++;
        !          1082:         }
        !          1083:         my $rowspan = $cell->getAttribute('rowspan');
        !          1084:         if (defined $rowspan && $rowspan =~ /^\s*[0-9]+\s*$/) {
        !          1085:           for (my $i=0; $i < $rowspan-1; $i++) {
        !          1086:             if (!defined $rowspans[$i]) {
        !          1087:               $rowspans[$i] = 1;
        !          1088:             } else {
        !          1089:               $rowspans[$i]++;
        !          1090:             }
        !          1091:           }
        !          1092:         }
        !          1093:       }
        !          1094:     }
        !          1095:     push(@nb_cells, $nb_cells);
        !          1096:     if ($nb_cells > $max_nb_cells) {
        !          1097:       $max_nb_cells = $nb_cells;
        !          1098:     }
        !          1099:   }
        !          1100:   foreach my $tr (@trs) {
        !          1101:     my $nb_cells = shift(@nb_cells);
        !          1102:     if ($nb_cells < $max_nb_cells) {
        !          1103:       for (1..($max_nb_cells - $nb_cells)) {
        !          1104:         $tr->appendChild($doc->createElement('td'));
        !          1105:       }
        !          1106:     }
        !          1107:   }
        !          1108: }
        !          1109: 
        !          1110: # replaces ul/ul by ul/li/ul and the same for ol (using the previous li if possible)
        !          1111: # also adds a ul element when a li has no ul/ol ancestor
        !          1112: sub fix_lists {
        !          1113:   my ($root) = @_;
        !          1114:   my $doc = $root->ownerDocument;
        !          1115:   my @uls = $root->getElementsByTagName('ul');
        !          1116:   my @ols = $root->getElementsByTagName('ol');
        !          1117:   my @lists = (@uls, @ols);
        !          1118:   foreach my $list (@lists) {
        !          1119:     my $next;
        !          1120:     for (my $child=$list->firstChild; defined $child; $child=$next) {
        !          1121:       $next = $child->nextSibling;
        !          1122:       if ($child->nodeType == XML_ELEMENT_NODE && string_in_array(['ul','ol'], $child->nodeName)) {
        !          1123:         my $previous = $child->previousNonBlankSibling(); # note: non-DOM method
        !          1124:         $list->removeChild($child);
        !          1125:         if (defined $previous && $previous->nodeType == XML_ELEMENT_NODE && $previous->nodeName eq 'li') {
        !          1126:           $previous->appendChild($child);
        !          1127:         } else {
        !          1128:           my $li = $doc->createElement('li');
        !          1129:           $li->appendChild($child);
        !          1130:           if (!defined $next) {
        !          1131:             $list->appendChild($li);
        !          1132:           } else {
        !          1133:             $list->insertBefore($li, $next);
        !          1134:           }
        !          1135:         }
        !          1136:       }
        !          1137:     }
        !          1138:   }
        !          1139:   my @lis = $root->getElementsByTagName('li');
        !          1140:   foreach my $li (@lis) {
        !          1141:     my $found_list_ancestor = 0;
        !          1142:     my $ancestor = $li->parentNode;
        !          1143:     while (defined $ancestor) {
        !          1144:       if ($ancestor->nodeName eq 'ul' || $ancestor->nodeName eq 'ol') {
        !          1145:         $found_list_ancestor = 1;
        !          1146:         last;
        !          1147:       }
        !          1148:       $ancestor = $ancestor->parentNode;
        !          1149:     }
        !          1150:     if (!$found_list_ancestor) {
        !          1151:       # replace li by ul and add li under ul
        !          1152:       my $ul = $doc->createElement('ul');
        !          1153:       $li->parentNode->insertBefore($ul, $li);
        !          1154:       $li->parentNode->removeChild($li);
        !          1155:       $ul->appendChild($li);
        !          1156:       # add all other li afterwards inside ul (there might be text nodes in-between)
        !          1157:       my $next = $ul->nextSibling;
        !          1158:       while (defined $next) {
        !          1159:         my $next_next = $next->nextSibling;
        !          1160:         if ($next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^\s*$/ &&
        !          1161:             defined $next_next && $next_next->nodeType == XML_ELEMENT_NODE && $next_next->nodeName eq 'li') {
        !          1162:           $next->parentNode->removeChild($next);
        !          1163:           $ul->appendChild($next);
        !          1164:           $next = $next_next;
        !          1165:           $next_next = $next_next->nextSibling;
        !          1166:         }
        !          1167:         if ($next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'li') {
        !          1168:           $next->parentNode->removeChild($next);
        !          1169:           $ul->appendChild($next);
        !          1170:         } else {
        !          1171:           last;
        !          1172:         }
        !          1173:         $next = $next_next;
        !          1174:       }
        !          1175:     }
        !          1176:   }
        !          1177: }
        !          1178: 
        !          1179: # Some "image" elements are actually img element with a wrong name. This renames them.
        !          1180: # Amazingly enough, "<image src=..." displays an image in some browsers
        !          1181: # ("image" has existed at some point as an experimental HTML element).
        !          1182: sub fix_wrong_name_for_img {
        !          1183:   my ($root) = @_;
        !          1184:   my @images = $root->getElementsByTagName('image');
        !          1185:   foreach my $image (@images) {
        !          1186:     if (!defined $image->getAttribute('src')) {
        !          1187:       next;
        !          1188:     }
        !          1189:     my $found_correct_ancestor = 0;
        !          1190:     my $ancestor = $image->parentNode;
        !          1191:     while (defined $ancestor) {
        !          1192:       if ($ancestor->nodeName eq 'drawimage' || $ancestor->nodeName eq 'imageresponse') {
        !          1193:         $found_correct_ancestor = 1;
        !          1194:         last;
        !          1195:       }
        !          1196:       $ancestor = $ancestor->parentNode;
        !          1197:     }
        !          1198:     if ($found_correct_ancestor) {
        !          1199:       next;
        !          1200:     }
        !          1201:     # this really has to be renamed "img"
        !          1202:     $image->setNodeName('img');
        !          1203:   }
        !          1204: }
        !          1205: 
        !          1206: # Replaces many deprecated attributes and replaces them by equivalent CSS when possible
        !          1207: sub replace_deprecated_attributes_by_css {
        !          1208:   my ($root) = @_;
        !          1209:   
        !          1210:   fix_deprecated_in_tables($root);
        !          1211:   
        !          1212:   fix_deprecated_in_table_rows($root);
        !          1213:   
        !          1214:   fix_deprecated_in_table_cells($root);
        !          1215:   
        !          1216:   fix_deprecated_in_lists($root);
        !          1217:   
        !          1218:   fix_deprecated_in_list_items($root);
        !          1219:   
        !          1220:   fix_deprecated_in_hr($root);
        !          1221:   
        !          1222:   fix_deprecated_in_img($root);
        !          1223:   
        !          1224:   fix_deprecated_in_body($root);
        !          1225:   
        !          1226:   fix_align_attribute($root);
        !          1227: }
        !          1228: 
        !          1229: # Replaces deprecated attributes in tables
        !          1230: sub fix_deprecated_in_tables {
        !          1231:   my ($root) = @_;
        !          1232:   my @tables = $root->getElementsByTagName('table');
        !          1233:   foreach my $table (@tables) {
        !          1234:     tie (my %new_properties, 'Tie::IxHash', ());
        !          1235:     my $align = $table->getAttribute('align');
        !          1236:     if (defined $align) {
        !          1237:       $table->removeAttribute('align');
        !          1238:       $align = lc(trim($align));
        !          1239:     }
        !          1240:     if ($table->parentNode->nodeName eq 'center' || (defined $align && $align eq 'center') ||
        !          1241:         (defined $table->parentNode->getAttribute('align') && $table->parentNode->getAttribute('align') eq 'center')) {
        !          1242:       $new_properties{'margin-left'} = 'auto';
        !          1243:       $new_properties{'margin-right'} = 'auto';
        !          1244:     }
        !          1245:     if (defined $align && ($align eq 'left' || $align eq 'right')) {
        !          1246:       $new_properties{'float'} = $align;
        !          1247:     }
        !          1248:     my $width = $table->getAttribute('width');
        !          1249:     if (defined $width) {
        !          1250:       $table->removeAttribute('width');
        !          1251:       $width = trim($width);
        !          1252:       if ($width =~ /^[0-9]+$/) {
        !          1253:         $width .= 'px';
        !          1254:       }
        !          1255:       if ($width ne '') {
        !          1256:         $new_properties{'width'} = $width;
        !          1257:       }
        !          1258:     }
        !          1259:     my $height = $table->getAttribute('height');
        !          1260:     if (defined $height) {
        !          1261:       $table->removeAttribute('height');
        !          1262:       # no replacement for table height
        !          1263:     }
        !          1264:     my $bgcolor = $table->getAttribute('bgcolor');
        !          1265:     if (defined $bgcolor) {
        !          1266:       $table->removeAttribute('bgcolor');
        !          1267:       $bgcolor = trim($bgcolor);
        !          1268:       $bgcolor =~ s/^x\s*//;
        !          1269:       if ($bgcolor ne '') {
        !          1270:         $new_properties{'background-color'} = $bgcolor;
        !          1271:       }
        !          1272:     }
        !          1273:     my $frame = $table->getAttribute('frame');
        !          1274:     if (defined $frame) {
        !          1275:       $table->removeAttribute('frame');
        !          1276:       $frame = lc(trim($frame));
        !          1277:       if ($frame eq 'void') {
        !          1278:         $new_properties{'border'} = 'none';
        !          1279:       } elsif ($frame eq 'above') {
        !          1280:         $new_properties{'border-top'} = '1px solid black';
        !          1281:       } elsif ($frame eq 'below') {
        !          1282:         $new_properties{'border-bottom'} = '1px solid black';
        !          1283:       } elsif ($frame eq 'hsides') {
        !          1284:         $new_properties{'border-top'} = '1px solid black';
        !          1285:         $new_properties{'border-bottom'} = '1px solid black';
        !          1286:       } elsif ($frame eq 'vsides') {
        !          1287:         $new_properties{'border-left'} = '1px solid black';
        !          1288:         $new_properties{'border-right'} = '1px solid black';
        !          1289:       } elsif ($frame eq 'lhs') {
        !          1290:         $new_properties{'border-left'} = '1px solid black';
        !          1291:       } elsif ($frame eq 'rhs') {
        !          1292:         $new_properties{'border-right'} = '1px solid black';
        !          1293:       } elsif ($frame eq 'box') {
        !          1294:         $new_properties{'border'} = '1px solid black';
        !          1295:       } elsif ($frame eq 'border') {
        !          1296:         $new_properties{'border'} = '1px solid black';
        !          1297:       }
        !          1298:     }
        !          1299:     if (scalar(keys %new_properties) > 0) {
        !          1300:       set_css_properties($table, \%new_properties);
        !          1301:     }
        !          1302:     # we can't replace the border attribute without creating a style block, but we can improve things like border="BORDER"
        !          1303:     my $border = $table->getAttribute('border');
        !          1304:     if (defined $border) {
        !          1305:       $border = trim($border);
        !          1306:       if ($border !~ /^\s*[0-9]+\s*(px)?\s*$/) {
        !          1307:         $table->setAttribute('border', '1');
        !          1308:       }
        !          1309:     }
        !          1310:   }
        !          1311:   
        !          1312: }
        !          1313: 
        !          1314: # Replaces deprecated attributes in tr elements
        !          1315: sub fix_deprecated_in_table_rows {
        !          1316:   my ($root) = @_;
        !          1317:   my @trs = $root->getElementsByTagName('tr');
        !          1318:   foreach my $tr (@trs) {
        !          1319:     my $old_properties = get_css_properties($tr);
        !          1320:     tie (my %new_properties, 'Tie::IxHash', ());
        !          1321:     my $bgcolor = $tr->getAttribute('bgcolor');
        !          1322:     if (defined $bgcolor) {
        !          1323:       $tr->removeAttribute('bgcolor');
        !          1324:       if (!defined $old_properties->{'background-color'}) {
        !          1325:         $bgcolor = trim($bgcolor);
        !          1326:         $bgcolor =~ s/^x\s*//;
        !          1327:         if ($bgcolor ne '') {
        !          1328:           $new_properties{'background-color'} = $bgcolor;
        !          1329:         }
        !          1330:       }
        !          1331:     }
        !          1332:     my $align = $tr->getAttribute('align');
        !          1333:     if (defined $align && $align !~ /\s*char\s*/i) {
        !          1334:       $tr->removeAttribute('align');
        !          1335:       if (!defined $old_properties->{'text-align'}) {
        !          1336:         $align = lc(trim($align));
        !          1337:         if ($align ne '') {
        !          1338:           $new_properties{'text-align'} = $align;
        !          1339:         }
        !          1340:       }
        !          1341:     }
        !          1342:     my $valign = $tr->getAttribute('valign');
        !          1343:     if (defined $valign) {
        !          1344:       $tr->removeAttribute('valign');
        !          1345:       if (!defined $old_properties->{'vertical-align'}) {
        !          1346:         $valign = lc(trim($valign));
        !          1347:         if ($valign ne '') {
        !          1348:           $new_properties{'vertical-align'} = $valign;
        !          1349:         }
        !          1350:       }
        !          1351:     }
        !          1352:     if (scalar(keys %new_properties) > 0) {
        !          1353:       set_css_properties($tr, \%new_properties);
        !          1354:     }
        !          1355:   }
        !          1356: }
        !          1357: 
        !          1358: # Replaces deprecated attributes in table cells (td and th)
        !          1359: sub fix_deprecated_in_table_cells {
        !          1360:   my ($root) = @_;
        !          1361:   my @tds = $root->getElementsByTagName('td');
        !          1362:   my @ths = $root->getElementsByTagName('th');
        !          1363:   my @cells = (@tds, @ths);
        !          1364:   foreach my $cell (@cells) {
        !          1365:     my $old_properties = get_css_properties($cell);
        !          1366:     tie (my %new_properties, 'Tie::IxHash', ());
        !          1367:     my $width = $cell->getAttribute('width');
        !          1368:     if (defined $width) {
        !          1369:       $cell->removeAttribute('width');
        !          1370:       if (!defined $old_properties->{'width'}) {
        !          1371:         $width = trim($width);
        !          1372:         if ($width =~ /^[0-9]+$/) {
        !          1373:           $width .= 'px';
        !          1374:         }
        !          1375:         if ($width ne '') {
        !          1376:           $new_properties{'width'} = $width;
        !          1377:         }
        !          1378:       }
        !          1379:     }
        !          1380:     my $height = $cell->getAttribute('height');
        !          1381:     if (defined $height) {
        !          1382:       $cell->removeAttribute('height');
        !          1383:       if (!defined $old_properties->{'height'}) {
        !          1384:         $height = trim($height);
        !          1385:         if ($height =~ /^[0-9]+$/) {
        !          1386:           $height .= 'px';
        !          1387:         }
        !          1388:         if ($height ne '') {
        !          1389:           $new_properties{'height'} = $height;
        !          1390:         }
        !          1391:       }
        !          1392:     }
        !          1393:     my $bgcolor = $cell->getAttribute('bgcolor');
        !          1394:     if (defined $bgcolor) {
        !          1395:       $cell->removeAttribute('bgcolor');
        !          1396:       if (!defined $old_properties->{'background-color'}) {
        !          1397:         $bgcolor = trim($bgcolor);
        !          1398:         $bgcolor =~ s/^x\s*//;
        !          1399:         if ($bgcolor ne '') {
        !          1400:           $new_properties{'background-color'} = $bgcolor;
        !          1401:         }
        !          1402:       }
        !          1403:     }
        !          1404:     my $align = $cell->getAttribute('align');
        !          1405:     if (defined $align && $align !~ /\s*char\s*/i) {
        !          1406:       $cell->removeAttribute('align');
        !          1407:       if (!defined $old_properties->{'text-align'}) {
        !          1408:         $align = lc(trim($align));
        !          1409:         if ($align ne '') {
        !          1410:           $new_properties{'text-align'} = $align;
        !          1411:         }
        !          1412:       }
        !          1413:     }
        !          1414:     my $valign = $cell->getAttribute('valign');
        !          1415:     if (defined $valign) {
        !          1416:       $cell->removeAttribute('valign');
        !          1417:       if (!defined $old_properties->{'vertical-align'}) {
        !          1418:         $valign = lc(trim($valign));
        !          1419:         if ($valign ne '') {
        !          1420:           $new_properties{'vertical-align'} = $valign;
        !          1421:         }
        !          1422:       }
        !          1423:     }
        !          1424:     if (scalar(keys %new_properties) > 0) {
        !          1425:       set_css_properties($cell, \%new_properties);
        !          1426:     }
        !          1427:   }
        !          1428: }
        !          1429: 
        !          1430: # Replaces deprecated attributes in lists (ul and ol)
        !          1431: sub fix_deprecated_in_lists {
        !          1432:   my ($root) = @_;
        !          1433:   my @uls = $root->getElementsByTagName('ul');
        !          1434:   my @ols = $root->getElementsByTagName('ol');
        !          1435:   my @lists = (@uls, @ols);
        !          1436:   foreach my $list (@lists) {
        !          1437:     my $type = $list->getAttribute('type');
        !          1438:     if (defined $type) {
        !          1439:       my $lst = list_style_type($type);
        !          1440:       if (defined $lst) {
        !          1441:         $list->removeAttribute('type');
        !          1442:         if (!defined get_css_property($list, 'list-style-type')) {
        !          1443:           set_css_property($list, 'list-style-type', $lst);
        !          1444:         }
        !          1445:       }
        !          1446:     }
        !          1447:   }
        !          1448: }
        !          1449: 
        !          1450: # Replaces deprecated attributes in list items (li)
        !          1451: sub fix_deprecated_in_list_items {
        !          1452:   my ($root) = @_;
        !          1453:   my @lis = $root->getElementsByTagName('li');
        !          1454:   foreach my $li (@lis) {
        !          1455:     my $type = $li->getAttribute('type');
        !          1456:     if (defined $type) {
        !          1457:       my $lst = list_style_type($type);
        !          1458:       if (defined $lst) {
        !          1459:         $li->removeAttribute('type');
        !          1460:         if (!defined get_css_property($li, 'list-style-type')) {
        !          1461:           set_css_property($li, 'list-style-type', $lst);
        !          1462:         }
        !          1463:       }
        !          1464:     }
        !          1465:   }
        !          1466: }
        !          1467: 
        !          1468: # returns the CSS list-style-type value equivalent to the given type attribute for a list or list item
        !          1469: sub list_style_type {
        !          1470:   my ($type) = @_;
        !          1471:   my $value;
        !          1472:   $type = trim($type);
        !          1473:   if (lc($type) eq 'circle') {
        !          1474:     $value = 'circle';
        !          1475:   } elsif (lc($type) eq 'disc') {
        !          1476:     $value = 'disc';
        !          1477:   } elsif (lc($type) eq 'square') {
        !          1478:     $value = 'square';
        !          1479:   } elsif ($type eq 'a') {
        !          1480:     $value = 'lower-latin';
        !          1481:   } elsif ($type eq 'A') {
        !          1482:     $value = 'upper-latin';
        !          1483:   } elsif ($type eq 'i') {
        !          1484:     $value = 'lower-roman';
        !          1485:   } elsif ($type eq 'I') {
        !          1486:     $value = 'upper-roman';
        !          1487:   } elsif ($type eq '1') {
        !          1488:     $value = 'decimal';
        !          1489:   }
        !          1490:   return $value;
        !          1491: }
        !          1492: 
        !          1493: # Replaces deprecated attributes in hr
        !          1494: sub fix_deprecated_in_hr {
        !          1495:   my ($root) = @_;
        !          1496:   my @hrs = $root->getElementsByTagName('hr');
        !          1497:   foreach my $hr (@hrs) {
        !          1498:     tie (my %new_properties, 'Tie::IxHash', ());
        !          1499:     my $align = $hr->getAttribute('align');
        !          1500:     if (defined $align) {
        !          1501:       $align = lc(trim($align));
        !          1502:       if ($align eq 'left') {
        !          1503:         $new_properties{'text-align'} = 'left';
        !          1504:         $new_properties{'margin-left'} = '0';
        !          1505:       } elsif ($align eq 'right') {
        !          1506:         $new_properties{'text-align'} = 'right';
        !          1507:         $new_properties{'margin-right'} = '0';
        !          1508:       }
        !          1509:       $hr->removeAttribute('align');
        !          1510:     }
        !          1511:     my $color = $hr->getAttribute('color');
        !          1512:     if (defined $color) {
        !          1513:       $color = trim($color);
        !          1514:       $color =~ s/^x\s*//;
        !          1515:       if ($color ne '') {
        !          1516:         $new_properties{'color'} = $color;
        !          1517:         $new_properties{'background-color'} = $color;
        !          1518:       }
        !          1519:       $hr->removeAttribute('color');
        !          1520:     }
        !          1521:     my $noshade = $hr->getAttribute('noshade');
        !          1522:     my $size = $hr->getAttribute('size');
        !          1523:     if (defined $noshade) {
        !          1524:       $new_properties{'border-width'} = '0';
        !          1525:       if (!defined $color) {
        !          1526:         $new_properties{'color'} = 'gray';
        !          1527:         $new_properties{'background-color'} = 'gray';
        !          1528:       }
        !          1529:       if (!defined $size) {
        !          1530:         $size = '2';
        !          1531:       }
        !          1532:       $hr->removeAttribute('noshade');
        !          1533:     }
        !          1534:     if (defined $size) {
        !          1535:       $size = trim($size);
        !          1536:       if ($size ne '') {
        !          1537:         $new_properties{'height'} = $size.'px';
        !          1538:       }
        !          1539:       if (defined $hr->getAttribute('size')) {
        !          1540:         $hr->removeAttribute('size');
        !          1541:       }
        !          1542:     }
        !          1543:     my $width = $hr->getAttribute('width');
        !          1544:     if (defined $width) {
        !          1545:       $width = trim($width);
        !          1546:       if ($width ne '') {
        !          1547:         if ($width !~ /\%$/) {
        !          1548:           $width .= 'px';
        !          1549:         }
        !          1550:         $new_properties{'width'} = $width;
        !          1551:       }
        !          1552:       $hr->removeAttribute('width');
        !          1553:     }
        !          1554:     if (scalar(keys %new_properties) > 0) {
        !          1555:       set_css_properties($hr, \%new_properties);
        !          1556:     }
        !          1557:   }
        !          1558: }
        !          1559: 
        !          1560: # Replaces deprecated attributes in img
        !          1561: sub fix_deprecated_in_img {
        !          1562:   my ($root) = @_;
        !          1563:   my @imgs = $root->getElementsByTagName('img');
        !          1564:   foreach my $img (@imgs) {
        !          1565:     my $old_properties = get_css_properties($img);
        !          1566:     tie (my %new_properties, 'Tie::IxHash', ());
        !          1567:     my $align = $img->getAttribute('align');
        !          1568:     if (defined $align) {
        !          1569:       $align = lc(trim($align));
        !          1570:       if ($align eq 'middle' || $align eq 'top' || $align eq 'bottom') {
        !          1571:         $img->removeAttribute('align');
        !          1572:         if (!defined $old_properties->{'vertical-align'}) {
        !          1573:           $new_properties{'vertical-align'} = $align;
        !          1574:         }
        !          1575:       } elsif ($align eq 'left' || $align eq 'right') {
        !          1576:         $img->removeAttribute('align');
        !          1577:         if (!defined $old_properties->{'float'}) {
        !          1578:           $new_properties{'float'} = $align;
        !          1579:         }
        !          1580:       } elsif ($align eq 'center' || $align eq '') {
        !          1581:         $img->removeAttribute('align');
        !          1582:       }
        !          1583:     }
        !          1584:     my $border = $img->getAttribute('border');
        !          1585:     if (defined $border) {
        !          1586:       $border = lc(trim($border));
        !          1587:       if ($border =~ /^[0-9]+\s*(px)?$/) {
        !          1588:         $img->removeAttribute('border');
        !          1589:         if (!defined $old_properties->{'border'}) {
        !          1590:           if ($border !~ /px$/) {
        !          1591:             $border .= 'px';
        !          1592:           }
        !          1593:           $new_properties{'border'} = $border.' solid black';
        !          1594:         }
        !          1595:       }
        !          1596:     }
        !          1597:     my $hspace = $img->getAttribute('hspace');
        !          1598:     if (defined $hspace) {
        !          1599:       $hspace = lc(trim($hspace));
        !          1600:       if ($hspace =~ /^[0-9]+\s*(px)?$/) {
        !          1601:         $img->removeAttribute('hspace');
        !          1602:         if (!defined $old_properties->{'margin-left'} || !defined $old_properties->{'margin-right'}) {
        !          1603:           if ($hspace !~ /px$/) {
        !          1604:             $hspace .= 'px';
        !          1605:           }
        !          1606:           $new_properties{'margin-left'} = $hspace;
        !          1607:           $new_properties{'margin-right'} = $hspace;
        !          1608:         }
        !          1609:       }
        !          1610:     }
        !          1611:     if (scalar(keys %new_properties) > 0) {
        !          1612:       set_css_properties($img, \%new_properties);
        !          1613:     }
        !          1614:   }
        !          1615: }
        !          1616: 
        !          1617: # Replaces deprecated attributes in htmlbody (the style attribute could be used in a div for output)
        !          1618: sub fix_deprecated_in_body {
        !          1619:   my ($root) = @_;
        !          1620:   my $doc = $root->ownerDocument;
        !          1621:   my @bodies = $root->getElementsByTagName('htmlbody');
        !          1622:   foreach my $body (@bodies) {
        !          1623:     my $old_properties = get_css_properties($body);
        !          1624:     tie (my %new_properties, 'Tie::IxHash', ());
        !          1625:     my $bgcolor = $body->getAttribute('bgcolor');
        !          1626:     if (defined $bgcolor) {
        !          1627:       $body->removeAttribute('bgcolor');
        !          1628:       if (!defined $old_properties->{'background-color'}) {
        !          1629:         $bgcolor = trim($bgcolor);
        !          1630:         $bgcolor =~ s/^x\s*//;
        !          1631:         if ($bgcolor ne '') {
        !          1632:           $new_properties{'background-color'} = $bgcolor;
        !          1633:         }
        !          1634:       }
        !          1635:     }
        !          1636:     my $color = $body->getAttribute('text');
        !          1637:     if (defined $color) {
        !          1638:       $body->removeAttribute('text');
        !          1639:       if (!defined $old_properties->{'color'}) {
        !          1640:         $color = trim($color);
        !          1641:         $color =~ s/^x\s*//;
        !          1642:         if ($color ne '') {
        !          1643:           $new_properties{'color'} = $color;
        !          1644:         }
        !          1645:       }
        !          1646:     }
        !          1647:     my $background = $body->getAttribute('background');
        !          1648:     if (defined $background && ($background =~ /\.jpe?g$|\.gif|\.png/i)) {
        !          1649:       $body->removeAttribute('background');
        !          1650:       if (!defined $old_properties->{'background-image'}) {
        !          1651:         $background = trim($background);
        !          1652:         if ($background ne '') {
        !          1653:           $new_properties{'background-image'} = 'url('.$background.')';
        !          1654:         }
        !          1655:       }
        !          1656:     }
        !          1657:     # NOTE: these attributes have never been standard and are better removed with no replacement
        !          1658:     foreach my $bad ('bottommargin', 'leftmargin', 'rightmargin', 'topmargin', 'marginheight', 'marginwidth') {
        !          1659:       if ($body->hasAttribute($bad)) {
        !          1660:         $body->removeAttribute($bad);
        !          1661:       }
        !          1662:     }
        !          1663:     # NOTE: link alink and vlink require a <style> block to be converted
        !          1664:     my $link = $body->getAttribute('link');
        !          1665:     my $alink = $body->getAttribute('alink');
        !          1666:     my $vlink = $body->getAttribute('vlink');
        !          1667:     if (defined $link || defined $alink || defined $vlink) {
        !          1668:       my $head;
        !          1669:       my @heads = $root->getElementsByTagName('htmlhead');
        !          1670:       if (scalar(@heads) > 0) {
        !          1671:         $head = $heads[0];
        !          1672:       } else {
        !          1673:         $head = $doc->createElement('htmlhead');
        !          1674:         $root->insertBefore($head, $root->firstChild);
        !          1675:       }
        !          1676:       my $style = $doc->createElement('style');
        !          1677:       $head->appendChild($style);
        !          1678:       my $css = "\n";
        !          1679:       if (defined $link) {
        !          1680:         $body->removeAttribute('link');
        !          1681:         $link = trim($link);
        !          1682:         $link =~ s/^x\s*//;
        !          1683:         $css .= '      a:link { color:'.$link.' }';
        !          1684:         $css .= "\n";
        !          1685:       }
        !          1686:       if (defined $alink) {
        !          1687:         $body->removeAttribute('alink');
        !          1688:         $alink = trim($alink);
        !          1689:         $alink =~ s/^x\s*//;
        !          1690:         $css .= '      a:active { color:'.$alink.' }';
        !          1691:         $css .= "\n";
        !          1692:       }
        !          1693:       if (defined $vlink) {
        !          1694:         $body->removeAttribute('vlink');
        !          1695:         $vlink = trim($vlink);
        !          1696:         $vlink =~ s/^x\s*//;
        !          1697:         $css .= '      a:visited { color:'.$vlink.' }';
        !          1698:         $css .= "\n";
        !          1699:       }
        !          1700:       $css .= '    ';
        !          1701:       $style->appendChild($doc->createTextNode($css));
        !          1702:     }
        !          1703:     if (scalar(keys %new_properties) > 0) {
        !          1704:       set_css_properties($body, \%new_properties);
        !          1705:     } elsif (!$body->hasAttributes) {
        !          1706:       $body->parentNode->removeChild($body);
        !          1707:     }
        !          1708:   }
        !          1709: }
        !          1710: 
        !          1711: # replaces <div align="center"> by <div style="text-align:center;">
        !          1712: # also for p and h1..h6
        !          1713: sub fix_align_attribute {
        !          1714:   my ($root) = @_;
        !          1715:   my @nodes = $root->getElementsByTagName('div');
        !          1716:   push(@nodes, $root->getElementsByTagName('p'));
        !          1717:   for (my $i=1; $i<=6; $i++) {
        !          1718:     push(@nodes, $root->getElementsByTagName('h'.$i));
        !          1719:   }
        !          1720:   foreach my $node (@nodes) {
        !          1721:     my $align = $node->getAttribute('align');
        !          1722:     if (defined $align) {
        !          1723:       $node->removeAttribute('align');
        !          1724:       $align = trim($align);
        !          1725:       if ($align ne '' && !defined get_css_property($node, 'text-align')) {
        !          1726:         set_css_property($node, 'text-align', lc($align));
        !          1727:       }
        !          1728:     }
        !          1729:   }
        !          1730: }
        !          1731: 
        !          1732: # replace center by a div or remove it if there is a table inside
        !          1733: sub replace_center {
        !          1734:   my ($root, $all_block) = @_;
        !          1735:   my $doc = $root->ownerDocument;
        !          1736:   my @centers = $root->getElementsByTagName('center');
        !          1737:   foreach my $center (@centers) {
        !          1738:     if ($center->getChildrenByTagName('table')->size() > 0) { # note: getChildrenByTagName is not DOM (LibXML specific)
        !          1739:       replace_by_children($center);
        !          1740:     } else {
        !          1741:       if ((!defined $center->previousSibling ||
        !          1742:           ($center->previousSibling->nodeType == XML_TEXT_NODE && $center->previousSibling->nodeValue =~ /^\s*$/ && !defined $center->previousSibling->previousSibling)) &&
        !          1743:           (!defined $center->nextSibling ||
        !          1744:           ($center->nextSibling->nodeType == XML_TEXT_NODE && $center->nextSibling->nodeValue =~ /^\s*$/ && !defined $center->nextSibling->nextSibling)) &&
        !          1745:           string_in_array(\@accepting_style, $center->parentNode->nodeName)) {
        !          1746:         # use CSS on the parent block and replace center by its children
        !          1747:         set_css_property($center->parentNode, 'text-align', 'center');
        !          1748:         replace_by_children($center);
        !          1749:       } else {
        !          1750:         # use p or div ? check if there is a block inside
        !          1751:         my $found_block = 0;
        !          1752:         for (my $child=$center->firstChild; defined $child; $child=$child->nextSibling) {
        !          1753:           if ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
        !          1754:             $found_block = 1;
        !          1755:             last;
        !          1756:           }
        !          1757:         }
        !          1758:         my $new_node;
        !          1759:         if ($found_block) {
        !          1760:           $new_node = $doc->createElement('div');
        !          1761:           $new_node->setAttribute('style', 'text-align: center; margin: 0 auto');
        !          1762:         } else {
        !          1763:           $new_node = $doc->createElement('p');
        !          1764:           $new_node->setAttribute('style', 'text-align: center');
        !          1765:         }
        !          1766:         my $next;
        !          1767:         for (my $child=$center->firstChild; defined $child; $child=$next) {
        !          1768:           $next = $child->nextSibling;
        !          1769:           $center->removeChild($child);
        !          1770:           $new_node->appendChild($child);
        !          1771:         }
        !          1772:         $center->parentNode->replaceChild($new_node, $center);
        !          1773:       }
        !          1774:     }
        !          1775:   }
        !          1776: }
        !          1777: 
        !          1778: # replaces <nobr> by <span style="white-space:nowrap">
        !          1779: sub replace_nobr {
        !          1780:   my ($root) = @_;
        !          1781:   my @nobrs = $root->getElementsByTagName('nobr');
        !          1782:   foreach my $nobr (@nobrs) {
        !          1783:     if (!defined $nobr->previousSibling && !defined $nobr->nextSibling &&
        !          1784:         string_in_array(\@accepting_style, $nobr->parentNode->nodeName)) {
        !          1785:       # use CSS on the parent block
        !          1786:       set_css_property($nobr->parentNode, 'white-space', 'nowrap');
        !          1787:       replace_by_children($nobr);
        !          1788:     } else {
        !          1789:       $nobr->setNodeName('span');
        !          1790:       $nobr->setAttribute('style', 'white-space:nowrap');
        !          1791:     }
        !          1792:   }
        !          1793: }
        !          1794: 
        !          1795: # removes notsolved tags in the case <hintgroup showoncorrect="no"><notsolved>...</notsolved></hintgroup>
        !          1796: # and in the case <notsolved><hintgroup showoncorrect="no">...</hintgroup></notsolved>
        !          1797: sub remove_useless_notsolved {
        !          1798:   my ($root) = @_;
        !          1799:   my @hintgroups = $root->getElementsByTagName('hintgroup');
        !          1800:   foreach my $hintgroup (@hintgroups) {
        !          1801:     my $showoncorrect = get_non_empty_attribute($hintgroup, 'showoncorrect');
        !          1802:     if (!defined $showoncorrect || $showoncorrect eq 'no') {
        !          1803:       my @notsolveds = $hintgroup->getElementsByTagName('notsolved');
        !          1804:       foreach my $notsolved (@notsolveds) {
        !          1805:         replace_by_children($notsolved);
        !          1806:       }
        !          1807:     }
        !          1808:     my $parent = $hintgroup->parentNode;
        !          1809:     if ($parent->nodeName eq 'notsolved' && scalar(@{$parent->nonBlankChildNodes()}) == 1) {
        !          1810:       replace_by_children($parent);
        !          1811:     }
        !          1812:   }
        !          1813: }
        !          1814: 
        !          1815: # adds a paragraph inside if needed and calls fix_paragraph for all paragraphs (including new ones)
        !          1816: sub fix_paragraphs_inside {
        !          1817:   my ($node, $all_block) = @_;
        !          1818:   # blocks in which paragrahs will be added:
        !          1819:   my @blocks_with_p = ('loncapa','library','problem','part','problemtype','window','block','while','postanswerdate','preduedate','solved','notsolved','languageblock','translated','lang','instructorcomment','togglebox','standalone','form');
        !          1820:   my @fix_p_if_br_or_p = (@responses,'foil','item','text','label','hintgroup','hintpart','hint','web','windowlink','div','li','dd','td','th','blockquote');
        !          1821:   if ((string_in_array(\@blocks_with_p, $node->nodeName) && paragraph_needed($node)) ||
        !          1822:       (string_in_array(\@fix_p_if_br_or_p, $node->nodeName) && paragraph_inside($node))) {
        !          1823:     # if non-empty, add paragraphs where needed between all br and remove br
        !          1824:     # (it would be easier to just put everything in a p and fix it afterwards, but there are performance issues
        !          1825:     #  when a paragraph has many blocks directly inside)
        !          1826:     my $doc = $node->ownerDocument;
        !          1827:     my $p = undef;
        !          1828:     my @new_children = ();
        !          1829:     my $next;
        !          1830:     for (my $child=$node->firstChild; defined $child; $child=$next) {
        !          1831:       $next = $child->nextSibling;
        !          1832:       $node->removeChild($child);
        !          1833:       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'br') {
        !          1834:         if (defined $p) {
        !          1835:           push(@new_children, $p);
        !          1836:         } else {
        !          1837:           push(@new_children, $doc->createElement('p'));
        !          1838:         }
        !          1839:         $p = undef;
        !          1840:       } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array(\@inline_like_block, $child->nodeName)) {
        !          1841:         # inline_like_block: use the paragraph if there is one, otherwise do not create one
        !          1842:         if (defined $p) {
        !          1843:           $p->appendChild($child);
        !          1844:         } else {
        !          1845:           push(@new_children, $child);
        !          1846:         }
        !          1847:       } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
        !          1848:         # these children are blocks and should not be in a paragraph
        !          1849:         if (defined $p) {
        !          1850:           push(@new_children, $p);
        !          1851:           $p = undef;
        !          1852:         }
        !          1853:         push(@new_children, $child);
        !          1854:       } elsif ($child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^[ \t\f\n\r]*$/) {
        !          1855:         # blank text: add to paragraph if there is one and there is a next node, otherwise keep out of the paragraph
        !          1856:         if (defined $p) {
        !          1857:           if (defined $next) {
        !          1858:             $p->appendChild($child);
        !          1859:           } else {
        !          1860:             push(@new_children, $p);
        !          1861:             $p = undef;
        !          1862:             push(@new_children, $child);
        !          1863:           }
        !          1864:         } else {
        !          1865:           push(@new_children, $child);
        !          1866:         }
        !          1867:       } elsif ($child->nodeType == XML_TEXT_NODE ||
        !          1868:             $child->nodeType == XML_ELEMENT_NODE || $child->nodeType == XML_CDATA_SECTION_NODE ||
        !          1869:             $child->nodeType == XML_ENTITY_NODE || $child->nodeType == XML_ENTITY_REF_NODE) {
        !          1870:         # these children require a paragraph
        !          1871:         if (!defined $p) {
        !          1872:           $p = $doc->createElement('p');
        !          1873:         }
        !          1874:         $p->appendChild($child);
        !          1875:       } else {
        !          1876:         # these children do not require a paragraph (XML comments, PI)
        !          1877:         # -> do not move them in a new paragraph
        !          1878:         if (defined $p) {
        !          1879:           push(@new_children, $p);
        !          1880:           $p = undef;
        !          1881:         }
        !          1882:         push(@new_children, $child);
        !          1883:       }
        !          1884:     }
        !          1885:     if (defined $p) {
        !          1886:       push(@new_children, $p);
        !          1887:     }
        !          1888:     foreach my $child (@new_children) {
        !          1889:       $node->appendChild($child);
        !          1890:     }
        !          1891:   }
        !          1892:   # now fix the paragraphs everywhere, so that all inline nodes are inside a paragraph, and block nodes are outside
        !          1893:   my $next;
        !          1894:   for (my $child=$node->firstChild; defined $child; $child=$next) {
        !          1895:     $next = $child->nextSibling;
        !          1896:     if ($child->nodeType == XML_ELEMENT_NODE && defined $child->firstChild) {
        !          1897:       if ($child->nodeName eq 'p') {
        !          1898:         fix_paragraph($child, $all_block);
        !          1899:       } else {
        !          1900:         fix_paragraphs_inside($child, $all_block);
        !          1901:       }
        !          1902:     }
        !          1903:   }
        !          1904: }
        !          1905: 
        !          1906: # returns 1 if a paragraph is needed inside this node (assuming the parent can have paragraphs)
        !          1907: sub paragraph_needed {
        !          1908:   my ($node) = @_;
        !          1909:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
        !          1910:     if (($child->nodeType == XML_TEXT_NODE && $child->nodeValue !~ /^\s*$/) ||
        !          1911:         ($child->nodeType == XML_ELEMENT_NODE && !string_in_array(\@inline_like_block, $child->nodeName)) ||
        !          1912:         $child->nodeType == XML_CDATA_SECTION_NODE ||
        !          1913:         $child->nodeType == XML_ENTITY_NODE || $child->nodeType == XML_ENTITY_REF_NODE) {
        !          1914:       return(1);
        !          1915:     }
        !          1916:   }
        !          1917:   return(0);
        !          1918: }
        !          1919: 
        !          1920: # returns 1 if there is a paragraph or br in a child of this node, or inside an inline child
        !          1921: sub paragraph_inside {
        !          1922:   my ($node) = @_;
        !          1923:   # inline elements that can be split in half if there is a paragraph inside (currently all HTML):
        !          1924:   # (also used in first_block below)
        !          1925:   my @splitable_inline = ('span', 'a', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u');
        !          1926:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
        !          1927:     if ($child->nodeType == XML_ELEMENT_NODE) {
        !          1928:       my $name = $child->nodeName;
        !          1929:       if ($name eq 'p' || $name eq 'br') {
        !          1930:         return(1);
        !          1931:       } elsif (string_in_array(\@splitable_inline, $name)) {
        !          1932:         if (paragraph_inside($child)) {
        !          1933:           return(1);
        !          1934:         }
        !          1935:       }
        !          1936:     }
        !          1937:   }
        !          1938:   return(0);
        !          1939: }
        !          1940: 
        !          1941: # fixes paragraphs inside paragraphs (without a block in-between)
        !          1942: sub fix_paragraph {
        !          1943:   my ($p, $all_block) = @_;
        !          1944:   my $loop_right = 1; # this loops is to avoid out of memory errors with recurse, see below
        !          1945:   while ($loop_right) {
        !          1946:     $loop_right = 0;
        !          1947:     my $block = find_first_block($p, $all_block);
        !          1948:     if (defined $block) {
        !          1949:       my $trees = clone_ancestor_around_node($p, $block);
        !          1950:       my $doc = $p->ownerDocument;
        !          1951:       my $replacement = $doc->createDocumentFragment();
        !          1952:       my $left = $trees->{'left'};
        !          1953:       my $middle = $trees->{'middle'};
        !          1954:       my $right = $trees->{'right'};
        !          1955:       my $left_needs_p = 0; # 1 if it needs a paragraph (used to replace br later)
        !          1956:       
        !          1957:       if (defined $left) {
        !          1958:         # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
        !          1959:         for (my $child=$left->firstChild; defined $child; $child=$child->nextSibling) {
        !          1960:           if ($child->nodeType == XML_ELEMENT_NODE) {
        !          1961:             fix_paragraphs_inside($child, $all_block);
        !          1962:           }
        !          1963:         }
        !          1964:         if (!paragraph_needed($left)) {
        !          1965:           # this was just blank text, comments or inline responses, it should not create a new paragraph
        !          1966:           my $next;
        !          1967:           for (my $child=$left->firstChild; defined $child; $child=$next) {
        !          1968:             $next = $child->nextSibling;
        !          1969:             $left->removeChild($child);
        !          1970:             $replacement->appendChild($child);
        !          1971:           }
        !          1972:         } else {
        !          1973:           $left_needs_p = 1;
        !          1974:           $replacement->appendChild($left);
        !          1975:         }
        !          1976:       }
        !          1977:       
        !          1978:       my $n = $middle->firstChild;
        !          1979:       while (defined $n) {
        !          1980:         if ($n->nodeType == XML_ELEMENT_NODE && (string_in_array($all_block, $n->nodeName) || $n->nodeName eq 'br')) {
        !          1981:           if ($n->nodeName eq 'p') {
        !          1982:             my $parent = $n->parentNode;
        !          1983:             # first apply recursion
        !          1984:             fix_paragraph($n, $all_block);
        !          1985:             # now the p might have been replaced by several nodes, which should replace the initial p
        !          1986:             my $next_block;
        !          1987:             for (my $block=$parent->firstChild; defined $block; $block=$next_block) {
        !          1988:               $next_block = $block->nextSibling;
        !          1989:               if ($block->nodeName eq 'p') {
        !          1990:                 $parent->removeChild($block);
        !          1991:                 # for each parent before $middle, clone in-between the p and its children (to preserve the styles)
        !          1992:                 if (defined $block->firstChild) {
        !          1993:                   for (my $p=$parent; $p!=$middle; $p=$p->parentNode) {
        !          1994:                     my $newp = $p->cloneNode(0);
        !          1995:                     my $next;
        !          1996:                     for (my $child=$block->firstChild; defined $child; $child=$next) {
        !          1997:                       $next = $child->nextSibling;
        !          1998:                       $block->removeChild($child);
        !          1999:                       $newp->appendChild($child);
        !          2000:                     }
        !          2001:                     $block->appendChild($newp);
        !          2002:                   }
        !          2003:                 }
        !          2004:               }
        !          2005:               $replacement->appendChild($block);
        !          2006:             }
        !          2007:           } else {
        !          2008:             # replace the whole p by this block, forgetting about intermediate inline elements
        !          2009:             $n->parentNode->removeChild($n);
        !          2010:             if ($n->nodeName eq 'br') {
        !          2011:               # replace a br by a paragraph if there was nothing before in the paragraph,
        !          2012:               # otherwise remove it because it already broke the paragraph in half
        !          2013:               if (!defined $left || !$left_needs_p) {
        !          2014:                 $replacement->appendChild($middle);
        !          2015:               }
        !          2016:             } else {
        !          2017:               fix_paragraphs_inside($n, $all_block);
        !          2018:               $replacement->appendChild($n);
        !          2019:             }
        !          2020:           }
        !          2021:           last;
        !          2022:         }
        !          2023:         $n = $n->firstChild;
        !          2024:         if (defined $n && defined $n->nextSibling) {
        !          2025:           die "Error in post_xml.fix_paragraph: block not found";
        !          2026:         }
        !          2027:       }
        !          2028:       
        !          2029:       if (defined $right) {
        !          2030:         if ($block->nodeName eq 'p') {
        !          2031:           # remove attributes on the right paragraph
        !          2032:           my @attributelist = $right->attributes();
        !          2033:           foreach my $att (@attributelist) {
        !          2034:             $right->removeAttribute($att->nodeName);
        !          2035:           }
        !          2036:         }
        !          2037:         if ($right->firstChild->nodeType == XML_TEXT_NODE && $right->firstChild->nodeValue =~ /^[ \t\f\n\r]*$/) {
        !          2038:           # remove the first text node with whitespace only from the p, it should not trigger the creation of a p
        !          2039:           # (but take nbsp into account, so we should not use \s here)
        !          2040:           my $first = $right->firstChild;
        !          2041:           $right->removeChild($first);
        !          2042:           $replacement->appendChild($first);
        !          2043:         }
        !          2044:         if (defined $right->firstChild) {
        !          2045:           if (paragraph_needed($right)) {
        !          2046:             $replacement->appendChild($right);
        !          2047:             #fix_paragraph($right, $all_block); This is taking way too much memory for blocks with many children
        !          2048:             # -> loop instead of recurse
        !          2049:             $loop_right = 1;
        !          2050:           } else {
        !          2051:             # this was just blank text, comments or inline responses, it should not create a new paragraph
        !          2052:             my $next;
        !          2053:             for (my $child=$right->firstChild; defined $child; $child=$next) {
        !          2054:               $next = $child->nextSibling;
        !          2055:               $right->removeChild($child);
        !          2056:               $replacement->appendChild($child);
        !          2057:               # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
        !          2058:               if ($child->nodeType == XML_ELEMENT_NODE) {
        !          2059:                 fix_paragraphs_inside($child, $all_block);
        !          2060:               }
        !          2061:             }
        !          2062:           }
        !          2063:         }
        !          2064:       }
        !          2065:       
        !          2066:       $p->parentNode->replaceChild($replacement, $p);
        !          2067:       
        !          2068:       if ($loop_right) {
        !          2069:         $p = $right;
        !          2070:       }
        !          2071:       
        !          2072:     } else {
        !          2073:       # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
        !          2074:       my $next;
        !          2075:       for (my $child=$p->firstChild; defined $child; $child=$next) {
        !          2076:         $next = $child->nextSibling;
        !          2077:         if ($child->nodeType == XML_ELEMENT_NODE) {
        !          2078:           fix_paragraphs_inside($child, $all_block);
        !          2079:         }
        !          2080:       }
        !          2081:     }
        !          2082:   }
        !          2083: }
        !          2084: 
        !          2085: sub find_first_block {
        !          2086:   my ($node, $all_block) = @_;
        !          2087:   # inline elements that can be split in half if there is a paragraph inside (currently all HTML):
        !          2088:   my @splitable_inline = ('span', 'a', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u');
        !          2089:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
        !          2090:     if ($child->nodeType == XML_ELEMENT_NODE) {
        !          2091:       if (string_in_array($all_block, $child->nodeName) || $child->nodeName eq 'br') {
        !          2092:         return($child);
        !          2093:       }
        !          2094:       if (string_in_array(\@splitable_inline, $child->nodeName)) {
        !          2095:         my $block = find_first_block($child, $all_block);
        !          2096:         if (defined $block) {
        !          2097:           return($block);
        !          2098:         }
        !          2099:       }
        !          2100:     }
        !          2101:   }
        !          2102:   return(undef);
        !          2103: }
        !          2104: 
        !          2105: # Creates clones of the ancestor containing the descendants before the node, at the node, and after the node.
        !          2106: # returns a hash with: left, middle, right (left and right can be undef)
        !          2107: sub clone_ancestor_around_node {
        !          2108:   my ($ancestor, $node) = @_;
        !          2109:   my $middle_node;
        !          2110:   my ($left, $middle, $right);
        !          2111:   for (my $child=$ancestor->firstChild; defined $child; $child=$child->nextSibling) {
        !          2112:     if ($child == $node || is_ancestor_of($child, $node)) {
        !          2113:       $middle_node = $child;
        !          2114:       last;
        !          2115:     }
        !          2116:   }
        !          2117:   if (!defined $middle_node) {
        !          2118:     die "error in split_ancestor_around_node: middle not found";
        !          2119:   }
        !          2120:   if (defined $middle_node->previousSibling) {
        !          2121:     $left = $ancestor->cloneNode(0);
        !          2122:     for (my $child=$ancestor->firstChild; $child != $middle_node; $child=$child->nextSibling) {
        !          2123:       $left->appendChild($child->cloneNode(1));
        !          2124:     }
        !          2125:   }
        !          2126:   $middle = $ancestor->cloneNode(0);
        !          2127:   if ($middle_node == $node) {
        !          2128:     $middle->appendChild($middle_node->cloneNode(1));
        !          2129:   } else {
        !          2130:     my $subres = clone_ancestor_around_node($middle_node, $node);
        !          2131:     my $subleft = $subres->{'left'};
        !          2132:     if (defined $subleft) {
        !          2133:       if (!defined $left) {
        !          2134:         $left = $ancestor->cloneNode(0);
        !          2135:       }
        !          2136:       $left->appendChild($subleft);
        !          2137:     }
        !          2138:     $middle->appendChild($subres->{'middle'});
        !          2139:     my $subright = $subres->{'right'};
        !          2140:     if (defined $subright) {
        !          2141:       $right = $ancestor->cloneNode(0);
        !          2142:       $right->appendChild($subright);
        !          2143:     }
        !          2144:   }
        !          2145:   if (defined $middle_node->nextSibling) {
        !          2146:     if (!defined $right) {
        !          2147:       $right = $ancestor->cloneNode(0);
        !          2148:     }
        !          2149:     for (my $child=$middle_node->nextSibling; defined $child; $child=$child->nextSibling) {
        !          2150:       $right->appendChild($child->cloneNode(1));
        !          2151:     }
        !          2152:   }
        !          2153:   my %result = ();
        !          2154:   $result{'left'} = $left;
        !          2155:   $result{'middle'} = $middle;
        !          2156:   $result{'right'} = $right;
        !          2157:   return(\%result);
        !          2158: }
        !          2159: 
        !          2160: sub is_ancestor_of {
        !          2161:   my ($n1, $n2) = @_;
        !          2162:   my $n = $n2->parentNode;
        !          2163:   while (defined $n) {
        !          2164:     if ($n == $n1) {
        !          2165:       return(1);
        !          2166:     }
        !          2167:     $n = $n->parentNode;
        !          2168:   }
        !          2169:   return(0);
        !          2170: }
        !          2171: 
        !          2172: # removes empty style elements and replaces the ones with only whitespaces inside by their content
        !          2173: # also remove hints that have become empty after empty style removal.
        !          2174: sub remove_empty_style {
        !          2175:   my ($root) = @_;
        !          2176:   # actually, preserve some elements like ins when they have whitespace, only remove if they are empty
        !          2177:   my @remove_if_empty = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u', 'hint');
        !          2178:   my @remove_if_blank = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'tt', 'var', 'small', 'big', 'font', 'u', 'hint');
        !          2179:   foreach my $name (@remove_if_empty) {
        !          2180:     my @nodes = $root->getElementsByTagName($name);
        !          2181:     while (scalar(@nodes) > 0) {
        !          2182:       my $node = pop(@nodes);
        !          2183:       if (!defined $node->firstChild) {
        !          2184:         my $parent = $node->parentNode;
        !          2185:         if (defined $node->previousSibling && $node->previousSibling->nodeType == XML_TEXT_NODE &&
        !          2186:             $node->previousSibling->nodeValue =~ /\$\S*$/) {
        !          2187:           # case $a<sup></sup>x
        !          2188:           my $value = $node->previousSibling->nodeValue;
        !          2189:           $value =~ s/\$(\S*)$/\$\{$1\}/;
        !          2190:           $node->previousSibling->setData($value);
        !          2191:         }
        !          2192:         $parent->removeChild($node);
        !          2193:         $parent->normalize();
        !          2194:         # now that we removed the node, check if the parent has become an empty style, and so on
        !          2195:         while (defined $parent && string_in_array(\@remove_if_empty, $parent->nodeName) && !defined $parent->firstChild) {
        !          2196:           my $grandparent = $parent->parentNode;
        !          2197:           $grandparent->removeChild($parent);
        !          2198:           remove_reference_from_array(\@nodes, $parent);
        !          2199:           $parent = $grandparent;
        !          2200:         }
        !          2201:       }
        !          2202:     }
        !          2203:   }
        !          2204:   foreach my $name (@remove_if_blank) {
        !          2205:     my @nodes = $root->getElementsByTagName($name);
        !          2206:     while (scalar(@nodes) > 0) {
        !          2207:       my $node = pop(@nodes);
        !          2208:       if (defined $node->firstChild && !defined $node->firstChild->nextSibling && $node->firstChild->nodeType == XML_TEXT_NODE) {
        !          2209:         # NOTE: careful, with UTF-8, \s matches non-breaking spaces and we want to preserve these
        !          2210:         if ($node->firstChild->nodeValue =~ /^[\t\n\f\r ]*$/) {
        !          2211:           my $parent = $node->parentNode;
        !          2212:           replace_by_children($node);
        !          2213:           $parent->normalize();
        !          2214:           # now that we removed the node, check if the parent has become a style with only whitespace, and so on
        !          2215:           while (defined $parent && string_in_array(\@remove_if_blank, $parent->nodeName) &&
        !          2216:               (!defined $parent->firstChild ||
        !          2217:               (!defined $parent->firstChild->nextSibling && $parent->firstChild->nodeType == XML_TEXT_NODE &&
        !          2218:               $parent->firstChild->nodeValue =~ /^^[\t\n\f\r ]*$/))) {
        !          2219:             my $grandparent = $parent->parentNode;
        !          2220:             replace_by_children($parent);
        !          2221:             remove_reference_from_array(\@nodes, $parent);
        !          2222:             $parent = $grandparent;
        !          2223:           }
        !          2224:         }
        !          2225:       }
        !          2226:     }
        !          2227:   }
        !          2228: }
        !          2229: 
        !          2230: # remove whitespace inside LON-CAPA elements that have an empty content-model (HTML ones are handled by html_to_xml)
        !          2231: sub fix_empty_lc_elements {
        !          2232:   my ($node) = @_;
        !          2233:   my @lcempty = ('arc','axis','backgroundplot','drawoptionlist','drawvectorsum','fill','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','hiddenline','hiddensubmission','key','line','location','organicstructure','parameter','plotobject','plotvector','responseparam','spline','textline');
        !          2234:   if (string_in_array(\@lcempty, $node->nodeName)) {
        !          2235:     if (defined $node->firstChild && !defined $node->firstChild->nextSibling &&
        !          2236:         $node->firstChild->nodeType == XML_TEXT_NODE && $node->firstChild->nodeValue =~ /^\s*$/) {
        !          2237:       $node->removeChild($node->firstChild);
        !          2238:     }
        !          2239:     if (defined $node->firstChild) {
        !          2240:       if ($warnings) {
        !          2241:         print "Warning: a ".$node->nodeName." has something inside\n";
        !          2242:       }
        !          2243:     }
        !          2244:     return;
        !          2245:   }
        !          2246:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
        !          2247:     if ($child->nodeType == XML_ELEMENT_NODE) {
        !          2248:       fix_empty_lc_elements($child);
        !          2249:     }
        !          2250:   }
        !          2251: }
        !          2252: 
        !          2253: # turn some attribute values into lowercase when they should be
        !          2254: sub lowercase_attribute_values {
        !          2255:   my ($root) = @_;
        !          2256:   my @with_yesno = (['radiobuttonresponse', ['randomize']],
        !          2257:                     ['optionresponse', ['randomize']],
        !          2258:                     ['matchresponse', ['randomize']],
        !          2259:                     ['itemgroup', ['randomize']],
        !          2260:                     ['rankresponse', ['randomize']],
        !          2261:                     ['functionplotresponse', ['xaxisvisible', 'yaxisvisible', 'gridvisible']],
        !          2262:                     ['backgroundplot', ['fixed']],
        !          2263:                     ['drawvectorsum', ['showvalue']],
        !          2264:                     ['textline', ['readonly']],
        !          2265:                     ['hint', ['showoncorrect']],
        !          2266:                     ['body', ['dir']],
        !          2267:                     ['img', ['encrypturl']],
        !          2268:                     ['form', ['method']],
        !          2269:                     ['input', ['type']]
        !          2270:                    );
        !          2271:   foreach my $el_attributes (@with_yesno) {
        !          2272:     my $el_name = $el_attributes->[0];
        !          2273:     my @elements = $root->getElementsByTagName($el_name);
        !          2274:     foreach my $element (@elements) {
        !          2275:       my $att_list = $el_attributes->[1];
        !          2276:       foreach my $att_name (@$att_list) {
        !          2277:         my $att_value = $element->getAttribute($att_name);
        !          2278:         if (!defined $att_value) {
        !          2279:           next;
        !          2280:         }
        !          2281:         if ($att_value eq 'yes' || $att_value eq 'no') {
        !          2282:           next;
        !          2283:         }
        !          2284:         if ($att_value =~ /\s*yes\s*/i) {
        !          2285:           $element->setAttribute($att_name, 'yes');
        !          2286:         } elsif ($att_value =~ /\s*no\s*/i) {
        !          2287:           $element->setAttribute($att_name, 'no');
        !          2288:         }
        !          2289:       }
        !          2290:     }
        !          2291:   }
        !          2292: }
        !          2293: 
        !          2294: # fixes spelling mistakes for numericalresponse/@unit
        !          2295: sub replace_numericalresponse_unit_attribute {
        !          2296:   my ($root) = @_;
        !          2297:   my @numericalresponses = $root->getElementsByTagName('numericalresponse');
        !          2298:   foreach my $numericalresponse (@numericalresponses) {
        !          2299:     if (defined $numericalresponse->getAttribute('units') && !defined $numericalresponse->getAttribute('unit')) {
        !          2300:       $numericalresponse->setAttribute('unit', $numericalresponse->getAttribute('units'));
        !          2301:       $numericalresponse->removeAttribute('units');
        !          2302:     }
        !          2303:   }
        !          2304:   
        !          2305: }
        !          2306: 
        !          2307: # Replaces &format and &prettyprint by <num> whenever possible.
        !          2308: # Also replaces &chemparse by <chem>.
        !          2309: # If the function call is enclosed in <display>, the <display> element is removed.
        !          2310: sub replace_functions_by_elements {
        !          2311:   my ($root) = @_;
        !          2312:   my $doc = $root->ownerDocument;
        !          2313:   my @preserve = ('script','answer','parse','m','tm','dtm','numericalhintscript'); # display is handled later
        !          2314:   my @all = $root->getElementsByTagName('*');
        !          2315:   foreach my $element (@all) {
        !          2316:     if (string_in_array(\@preserve, $element->nodeName)) {
        !          2317:       next;
        !          2318:     }
        !          2319:     my $changed = 0;
        !          2320:     my $next;
        !          2321:     for (my $child=$element->firstChild; defined $child; $child=$next) {
        !          2322:       $next = $child->nextSibling;
        !          2323:       if ($child->nodeType == XML_TEXT_NODE) {
        !          2324:         my $value = $child->nodeValue;
        !          2325:         if ($value =~ /^(.*)&(?:format|prettyprint)\((\$\{?[a-zA-Z0-9]*\}?(?:\[[^\]]*\])?|[0-9.]+)\s?,\s?(["'][,.\$]?[0-9][eEfFgGsS]["']|\$[a-zA-Z0-9]*)\)(.*)$/s) {
        !          2326:           # NOTE: we don't check for &prettyprint's 3rd argument (target), but it has not been seen outside of script elements.
        !          2327:           # NOTE: the format options ',' and '$' are not supported by &format in current LON-CAPA since rev 1.81 of default_homework.lcpm,
        !          2328:           #       but are supported by &prettyprint;
        !          2329:           #       if we use (like current LON-CAPA) &prettyprint for <num> implementation, it will change a few resulting documents
        !          2330:           #       (by making them display something they were probably intended to display, but which did not).
        !          2331:           #       Usage of <num> with &prettyprint instead of &format might also change the display when there is an exponent.
        !          2332:           my $before = $1;
        !          2333:           my $number = $2;
        !          2334:           my $format = $3;
        !          2335:           my $after = $4;
        !          2336:           $format =~ s/^['"]|['"]$//g;
        !          2337:           # do not change this if the parent is <display> and there are other things before or after &format
        !          2338:           if ($element->nodeName eq 'display' && (defined $child->previousSibling || defined $next ||
        !          2339:               $before !~ /^\s*$/ || $after !~ /^\s*$/)) {
        !          2340:             last;
        !          2341:           }
        !          2342:           my $replacement = $doc->createDocumentFragment();
        !          2343:           my $num = $doc->createElement('num');
        !          2344:           $num->setAttribute('format', $format);
        !          2345:           $num->appendChild($doc->createTextNode($number));
        !          2346:           if (length($before) > 0) {
        !          2347:             $replacement->appendChild($doc->createTextNode($before));
        !          2348:           }
        !          2349:           $replacement->appendChild($num);
        !          2350:           if (length($after) > 0) {
        !          2351:             $replacement->appendChild($doc->createTextNode($after));
        !          2352:           }
        !          2353:           $element->replaceChild($replacement, $child);
        !          2354:           $changed = 1;
        !          2355:           $next = $element->firstChild; # start over, there might be another &format in the same text node
        !          2356:         } elsif ($value =~ /^(.*)&chemparse\(([^'"()]*|'[^']*'|"[^"]*")\)(.*)$/s) {
        !          2357:           my $before = $1;
        !          2358:           my $reaction = $2;
        !          2359:           my $after = $3;
        !          2360:           $reaction =~ s/^'(.*)'$/$1/;
        !          2361:           $reaction =~ s/^"(.*)"$/$1/;
        !          2362:           if ($element->nodeName eq 'display' && (defined $child->previousSibling || defined $next ||
        !          2363:               $before !~ /^\s*$/ || $after !~ /^\s*$/)) {
        !          2364:             last;
        !          2365:           }
        !          2366:           my $replacement = $doc->createDocumentFragment();
        !          2367:           my $chem = $doc->createElement('chem');
        !          2368:           $chem->appendChild($doc->createTextNode($reaction));
        !          2369:           if (length($before) > 0) {
        !          2370:             $replacement->appendChild($doc->createTextNode($before));
        !          2371:           }
        !          2372:           $replacement->appendChild($chem);
        !          2373:           if (length($after) > 0) {
        !          2374:             $replacement->appendChild($doc->createTextNode($after));
        !          2375:           }
        !          2376:           $element->replaceChild($replacement, $child);
        !          2377:           $changed = 1;
        !          2378:           $next = $element->firstChild;
        !          2379:         }
        !          2380:       }
        !          2381:     }
        !          2382:     if ($changed && $element->nodeName eq 'display') {
        !          2383:       my $first = $element->firstChild;
        !          2384:       if ($first->nodeType == XML_ELEMENT_NODE && string_in_array(['num','chem'], $first->nodeName) &&
        !          2385:           !defined $first->nextSibling) {
        !          2386:         # remove useless display element
        !          2387:         replace_by_children($element);
        !          2388:       }
        !          2389:     }
        !          2390:   }
        !          2391: }
        !          2392: 
        !          2393: # pretty-print using im-memory DOM tree
        !          2394: sub pretty {
        !          2395:   my ($node, $all_block, $indent_level) = @_;
        !          2396:   my $doc = $node->ownerDocument;
        !          2397:   $indent_level ||= 0;
        !          2398:   my $type = $node->nodeType;
        !          2399:   if ($type == XML_ELEMENT_NODE) {
        !          2400:     my $name = $node->nodeName;
        !          2401:     if ((string_in_array($all_block, $name) || string_in_array(\@inline_like_block, $name)) &&
        !          2402:         !string_in_array(\@preserve_elements, $name)) {
        !          2403:       # make sure there is a newline at the beginning and at the end if there is anything inside
        !          2404:       if (defined $node->firstChild && !string_in_array(\@no_newline_inside, $name)) {
        !          2405:         my $first = $node->firstChild;
        !          2406:         if ($first->nodeType == XML_TEXT_NODE) {
        !          2407:           my $text = $first->nodeValue;
        !          2408:           if ($text !~ /^ *\n/) {
        !          2409:             $first->setData("\n" . $text);
        !          2410:           }
        !          2411:         } else {
        !          2412:           $node->insertBefore($doc->createTextNode("\n"), $first);
        !          2413:         }
        !          2414:         my $last = $node->lastChild;
        !          2415:         if ($last->nodeType == XML_TEXT_NODE) {
        !          2416:           my $text = $last->nodeValue;
        !          2417:           if ($text !~ /\n *$/) {
        !          2418:             $last->setData($text . "\n");
        !          2419:           }
        !          2420:         } else {
        !          2421:           $node->appendChild($doc->createTextNode("\n"));
        !          2422:         }
        !          2423:       }
        !          2424:       
        !          2425:       # indent and make sure there is a newline before and after a block element
        !          2426:       my $newline_indent = "\n".(' ' x (2*($indent_level + 1)));
        !          2427:       my $newline_indent_last = "\n".(' ' x (2*$indent_level));
        !          2428:       my $next;
        !          2429:       for (my $child=$node->firstChild; defined $child; $child=$next) {
        !          2430:         $next = $child->nextSibling;
        !          2431:         if ($child->nodeType == XML_ELEMENT_NODE) {
        !          2432:           if (string_in_array($all_block, $child->nodeName) || string_in_array(\@inline_like_block, $child->nodeName)) {
        !          2433:             # make sure there is a newline before and after a block element
        !          2434:             if (defined $child->previousSibling && $child->previousSibling->nodeType == XML_TEXT_NODE) {
        !          2435:               my $prev = $child->previousSibling;
        !          2436:               my $text = $prev->nodeValue;
        !          2437:               if ($text !~ /\n *$/) {
        !          2438:                 $prev->setData($text . $newline_indent);
        !          2439:               }
        !          2440:             } else {
        !          2441:               $node->insertBefore($doc->createTextNode($newline_indent), $child);
        !          2442:             }
        !          2443:             if (defined $next && $next->nodeType == XML_TEXT_NODE) {
        !          2444:               my $text = $next->nodeValue;
        !          2445:               if ($text !~ /^ *\n/) {
        !          2446:                 $next->setData($newline_indent . $text);
        !          2447:               }
        !          2448:             } else {
        !          2449:               $node->insertAfter($doc->createTextNode($newline_indent), $child);
        !          2450:             }
        !          2451:           }
        !          2452:           pretty($child, $all_block, $indent_level+1);
        !          2453:         } elsif ($child->nodeType == XML_TEXT_NODE) {
        !          2454:           my $text = $child->nodeValue;
        !          2455:           # collapse newlines
        !          2456:           $text =~ s/\n([\t ]*\n)+/\n/g;
        !          2457:           # indent and remove spaces and tabs before newlines
        !          2458:           if (defined $next) {
        !          2459:             $text =~ s/[\t ]*\n[\t ]*/$newline_indent/ge;
        !          2460:           } else {
        !          2461:             $text =~ s/[\t ]*\n[\t ]*/$newline_indent/ge;
        !          2462:             $text =~ s/[\t ]*\n[\t ]*$/$newline_indent_last/e;
        !          2463:           }
        !          2464:           $child->setData($text);
        !          2465:         }
        !          2466:       }
        !          2467:       
        !          2468:       # removes whitespace at the beginning and end of p td, th and li (except for nbsp at the beginning)
        !          2469:       my @to_trim = ('p','td','th','li');
        !          2470:       if (string_in_array(\@to_trim, $name) && defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
        !          2471:         my $text = $node->firstChild->nodeValue;
        !          2472:         $text =~ s/^[ \t\f\n\r]*//;
        !          2473:         if ($text eq '') {
        !          2474:           $node->removeChild($node->firstChild);
        !          2475:         } else {
        !          2476:           $node->firstChild->setData($text);
        !          2477:         }
        !          2478:       }
        !          2479:       if (string_in_array(\@to_trim, $name) && defined $node->lastChild && $node->lastChild->nodeType == XML_TEXT_NODE) {
        !          2480:         my $text = $node->lastChild->nodeValue;
        !          2481:         $text =~ s/\s*$//;
        !          2482:         if ($text eq '') {
        !          2483:           $node->removeChild($node->lastChild);
        !          2484:         } else {
        !          2485:           $node->lastChild->setData($text);
        !          2486:         }
        !          2487:       }
        !          2488:     } elsif (string_in_array(\@preserve_elements, $name)) {
        !          2489:       # collapse newlines at the beginning and the end of scripts
        !          2490:       if (defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
        !          2491:         my $text = $node->firstChild->nodeValue;
        !          2492:         $text =~ s/^\n( *\n)+/\n/;
        !          2493:         if ($text eq '') {
        !          2494:           $node->removeChild($node->firstChild);
        !          2495:         } else {
        !          2496:           $node->firstChild->setData($text);
        !          2497:         }
        !          2498:       }
        !          2499:       if (defined $node->lastChild && $node->lastChild->nodeType == XML_TEXT_NODE) {
        !          2500:         my $text = $node->lastChild->nodeValue;
        !          2501:         $text =~ s/\n( *\n)+$/\n/;
        !          2502:         if ($text eq '') {
        !          2503:           $node->removeChild($node->lastChild);
        !          2504:         } else {
        !          2505:           $node->lastChild->setData($text);
        !          2506:         }
        !          2507:       }
        !          2508:     }
        !          2509:   }
        !          2510: }
        !          2511: 
        !          2512: sub replace_tm_dtm {
        !          2513:   my ($root) = @_;
        !          2514:   my $doc = $root->ownerDocument;
        !          2515:   my @elements = $root->getElementsByTagName('tm');
        !          2516:   push(@elements, $root->getElementsByTagName('dtm'));
        !          2517:   foreach my $element (@elements) {
        !          2518:     my $first = $element->firstChild;
        !          2519:     if (defined $first && $first->nodeType == XML_TEXT_NODE) {
        !          2520:       my $text = $first->nodeValue;
        !          2521:       if ($element->nodeName eq 'tm') {
        !          2522:         $first->setData('$'.$text.'$');
        !          2523:       } else {
        !          2524:         $first->setData('$$'.$text.'$$');
        !          2525:       }
        !          2526:     }
        !          2527:     $element->setNodeName('m');
        !          2528:   }
        !          2529: }
        !          2530: 
        !          2531: 
        !          2532: ######## utilities ########
        !          2533: 
        !          2534: ##
        !          2535: # Trims a string (really, this should be built-in in Perl, this is ridiculous, ugly and slow)
        !          2536: # @param {string} s - the string to trim
        !          2537: # @returns the trimmed string
        !          2538: ##
        !          2539: sub trim {
        !          2540:   my ($s) = @_;
        !          2541:   $s =~ s/^\s+//;
        !          2542:   $s =~ s/\s+$//;
        !          2543:   return($s);
        !          2544: }
        !          2545: 
        !          2546: ##
        !          2547: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
        !          2548: # @param {Array<string>} array - reference to the array of strings
        !          2549: # @param {string} value - the string to look for
        !          2550: # @returns 1 if found, 0 otherwise
        !          2551: ##
        !          2552: sub string_in_array {
        !          2553:   my ($array, $value) = @_;
        !          2554:   # NOTE: would it be faster to use "any { $value eq $_ } @{$array}" from List::Util ?
        !          2555:   foreach my $v (@{$array}) {
        !          2556:     if ($v eq $value) {
        !          2557:       return 1;
        !          2558:     }
        !          2559:   }
        !          2560:   return 0;
        !          2561: }
        !          2562: 
        !          2563: ##
        !          2564: # Tests if an object is in an array (using ==)
        !          2565: # @param {Array<Object>} array - reference to the array of references
        !          2566: # @param {Object} ref - the reference to look for
        !          2567: # @returns 1 if found, 0 otherwise
        !          2568: ##
        !          2569: sub reference_in_array {
        !          2570:   my ($array, $ref) = @_;
        !          2571:   foreach my $v (@{$array}) {
        !          2572:     if ($v == $ref) {
        !          2573:       return 1;
        !          2574:     }
        !          2575:   }
        !          2576:   return 0;
        !          2577: }
        !          2578: 
        !          2579: ##
        !          2580: # returns the index of a string in an array
        !          2581: # @param {Array<Object>} array - reference to the array of strings
        !          2582: # @param {string} s - the string to look for (using eq)
        !          2583: # @returns the index if found, -1 otherwise
        !          2584: ##
        !          2585: sub index_of_string {
        !          2586:   my ($array, $s) = @_;
        !          2587:   for (my $i=0; $i<scalar(@{$array}); $i++) {
        !          2588:     if ($array->[$i] eq $s) {
        !          2589:       return $i;
        !          2590:     }
        !          2591:   }
        !          2592:   return -1;
        !          2593: }
        !          2594: 
        !          2595: ##
        !          2596: # returns the index of a reference in an array
        !          2597: # @param {Array<Object>} array - reference to the array of references
        !          2598: # @param {Object} ref - the reference to look for
        !          2599: # @returns the index if found, -1 otherwise
        !          2600: ##
        !          2601: sub index_of_reference {
        !          2602:   my ($array, $ref) = @_;
        !          2603:   for (my $i=0; $i<scalar(@{$array}); $i++) {
        !          2604:     if ($array->[$i] == $ref) {
        !          2605:       return $i;
        !          2606:     }
        !          2607:   }
        !          2608:   return -1;
        !          2609: }
        !          2610: 
        !          2611: ##
        !          2612: # if found, removes a string from an array, otherwise do nothing
        !          2613: # @param {Array<string>} array - reference to the array of string
        !          2614: # @param {string} s - the string to look for (using eq)
        !          2615: ##
        !          2616: sub remove_string_from_array {
        !          2617:   my ($array, $s) = @_;
        !          2618:   my $index = index_of_string($array, $s);
        !          2619:   if ($index != -1) {
        !          2620:     splice(@$array, $index, 1);
        !          2621:   }
        !          2622: }
        !          2623: 
        !          2624: ##
        !          2625: # if found, removes a reference from an array, otherwise do nothing
        !          2626: # @param {Array<Object>} array - reference to the array of references
        !          2627: # @param {Object} ref - the reference to look for
        !          2628: ##
        !          2629: sub remove_reference_from_array {
        !          2630:   my ($array, $ref) = @_;
        !          2631:   my $index = index_of_reference($array, $ref);
        !          2632:   if ($index != -1) {
        !          2633:     splice(@$array, $index, 1);
        !          2634:   }
        !          2635: }
        !          2636: 
        !          2637: ##
        !          2638: # replaces a node by its children
        !          2639: # @param {Node} node - the DOM node
        !          2640: ##
        !          2641: sub replace_by_children {
        !          2642:   my ($node) = @_;
        !          2643:   my $parent = $node->parentNode;
        !          2644:   my $next;
        !          2645:   my $previous;
        !          2646:   for (my $child=$node->firstChild; defined $child; $child=$next) {
        !          2647:     $next = $child->nextSibling;
        !          2648:     if ((!defined $previous || !defined $next) &&
        !          2649:         $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
        !          2650:       next; # do not keep first and last whitespace nodes
        !          2651:     } else {
        !          2652:       if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
        !          2653:         # remove whitespace at the beginning
        !          2654:         my $value = $child->nodeValue;
        !          2655:         $value =~ s/^\s+//;
        !          2656:         $child->setData($value);
        !          2657:       }
        !          2658:       if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
        !          2659:         # and at the end
        !          2660:         my $value = $child->nodeValue;
        !          2661:         $value =~ s/\s+$//;
        !          2662:         $child->setData($value);
        !          2663:       }
        !          2664:     }
        !          2665:     $node->removeChild($child);
        !          2666:     $parent->insertBefore($child, $node);
        !          2667:     $previous = $child;
        !          2668:   }
        !          2669:   $parent->removeChild($node);
        !          2670: }
        !          2671: 
        !          2672: ##
        !          2673: # returns the trimmed attribute value if the attribute exists and is not blank, undef otherwise
        !          2674: # @param {Node} node - the DOM node
        !          2675: # @param {string} attribute_name - the attribute name
        !          2676: ##
        !          2677: sub get_non_empty_attribute {
        !          2678:   my ($node, $attribute_name) = @_;
        !          2679:   my $value = $node->getAttribute($attribute_name);
        !          2680:   if (defined $value && $value !~ /^\s*$/) {
        !          2681:     $value = trim($value);
        !          2682:     return($value);
        !          2683:   }
        !          2684:   return(undef);
        !          2685: }
        !          2686: 
        !          2687: ##
        !          2688: # Returns a CSS property value from the style attribute of the element, or undef if not defined
        !          2689: # @param {Element} el - the DOM element
        !          2690: # @param {string} property_name - the CSS property name
        !          2691: ##
        !          2692: sub get_css_property {
        !          2693:   my ($el, $property_name) = @_;
        !          2694:   my $style = $el->getAttribute('style');
        !          2695:   if (defined $style) {
        !          2696:     $style =~ s/^\s*;\s*//;
        !          2697:     $style =~ s/\s*;\s*$//;
        !          2698:   } else {
        !          2699:     $style = '';
        !          2700:   }
        !          2701:   my @pairs = split(';', $style);
        !          2702:   foreach my $pair (@pairs) {
        !          2703:     my @name_value = split(':', $pair);
        !          2704:     if (scalar(@name_value) != 2) {
        !          2705:       next;
        !          2706:     }
        !          2707:     my $name = trim($name_value[0]);
        !          2708:     my $value = trim($name_value[1]);
        !          2709:     if (lc($name) eq $property_name) {
        !          2710:       return($value); # return the first one found
        !          2711:     }
        !          2712:   }
        !          2713:   return(undef);
        !          2714: }
        !          2715: 
        !          2716: ##
        !          2717: # Returns the reference to a hash CSS property name => value from the style attribute of the element.
        !          2718: # Returns an empty list if the style attribute is not defined,
        !          2719: # @param {Element} el - the DOM element
        !          2720: # @return {Hash<string, string>} reference to the hash  property name => property value
        !          2721: ##
        !          2722: sub get_css_properties {
        !          2723:   my ($el) = @_;
        !          2724:   my $style = $el->getAttribute('style');
        !          2725:   if (defined $style) {
        !          2726:     $style =~ s/^\s*;\s*//;
        !          2727:     $style =~ s/\s*;\s*$//;
        !          2728:   } else {
        !          2729:     $style = '';
        !          2730:   }
        !          2731:   my @pairs = split(';', $style);
        !          2732:   tie (my %hash, 'Tie::IxHash', ());
        !          2733:   foreach my $pair (@pairs) {
        !          2734:     my @name_value = split(':', $pair);
        !          2735:     if (scalar(@name_value) != 2) {
        !          2736:       next;
        !          2737:     }
        !          2738:     my $name = trim($name_value[0]);
        !          2739:     my $value = trim($name_value[1]);
        !          2740:     if (defined $hash{$name}) {
        !          2741:       # duplicate property in the style attribute: keep only the last one
        !          2742:       delete $hash{$name};
        !          2743:     }
        !          2744:     $hash{$name} = $value;
        !          2745:   }
        !          2746:   return(\%hash);
        !          2747: }
        !          2748: 
        !          2749: ##
        !          2750: # Sets a CSS property in the style attribute of an element
        !          2751: # @param {Element} el - the DOM element
        !          2752: # @param {string} property_name - the CSS property name
        !          2753: # @param {string} property_value - the CSS property value
        !          2754: ##
        !          2755: sub set_css_property {
        !          2756:   my ($el, $property_name, $property_value) = @_;
        !          2757:   my $hash_ref = { $property_name => $property_value };
        !          2758:   set_css_properties($el, $hash_ref);
        !          2759: }
        !          2760: 
        !          2761: ##
        !          2762: # Sets several CSS properties in the style attribute of an element
        !          2763: # @param {Element} el - the DOM element
        !          2764: # @param {Hash<string, string>} properties - reference to the hash property name => property value
        !          2765: ##
        !          2766: sub set_css_properties {
        !          2767:   my ($el, $properties) = @_;
        !          2768:   my $hash = get_css_properties($el);
        !          2769:   foreach my $property_name (keys %$properties) {
        !          2770:     my $property_value = $properties->{$property_name};
        !          2771:     if (defined $hash->{$property_name}) {
        !          2772:       delete $hash->{$property_name}; # to add the new one at the end
        !          2773:     }
        !          2774:     $hash->{$property_name} = $property_value;
        !          2775:   }
        !          2776:   my $style = '';
        !          2777:   foreach my $key (keys %$hash) {
        !          2778:     $style .= $key.':'.$hash->{$key}.'; ';
        !          2779:   }
        !          2780:   $style =~ s/; $//;
        !          2781:   $el->setAttribute('style', $style);
        !          2782: }
        !          2783: 
        !          2784: 1;
        !          2785: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>