Annotation of loncom/homework/cleanxml/post_xml.pm, revision 1.2

1.1       damieng     1: # The LearningOnline Network
                      2: # Third step to clean a file.
                      3: #
1.2     ! damieng     4: # $Id: post_xml.pm,v 1.1 2015/12/03 20:40:31 damieng Exp $
1.1       damieng     5: #
                      6: # Copyright Michigan State University Board of Trustees
                      7: #
                      8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                      9: #
                     10: # LON-CAPA is free software; you can redistribute it and/or modify
                     11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
                     28: ###
                     29: 
                     30: #!/usr/bin/perl
                     31: 
                     32: package Apache::post_xml;
                     33: 
                     34: use strict;
                     35: use utf8;
                     36: use warnings;
                     37: 
                     38: use File::Basename;
                     39: use File::Temp qw/ tempfile /;
                     40: use Cwd 'abs_path';
                     41: use XML::LibXML;
                     42: use HTML::TokeParser; # used to parse sty files
                     43: use Tie::IxHash; # for ordered hashes
                     44: 
                     45: no warnings 'recursion'; # yes, fix_paragraph is using heavy recursion, I know
                     46: 
                     47: # these are constants
                     48: my @block_elements = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm');
                     49: my @inline_like_block = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); # inline elements treated like blocks for pretty print and some other things
                     50: my @responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse','essayresponse','radiobuttonresponse','matchresponse','rankresponse','imageresponse','functionplotresponse');
                     51: my @block_html = ('html','head','body','section','h1','h2','h3','h4','h5','h6','div','p','ul','ol','li','table','tbody','tr','td','th','dl','dt','dd','pre','noscript','hr','address','blockquote','object','applet','embed','map','form','fieldset','iframe','center','frameset');
                     52: my @no_newline_inside = ('import','parserlib','scriptlib','data','function','label','xlabel','ylabel','tic','text','rectangle','image','title','h1','h2','h3','h4','h5','h6','li','td','p');
                     53: my @preserve_elements = ('script','answer','pre');
                     54: my @accepting_style = ('section','h1','h2','h3','h4','h5','h6','div','p','li','td','th','dt','dd','pre','blockquote');
                     55: my @latex_math = ('\alpha', '\theta', '\omicron', '\tau', '\beta', '\vartheta', '\pi', '\upsilon', '\gamma', '\gamma', '\varpi', '\phi', '\delta', '\kappa', '\rho', '\varphi', '\epsilon', '\lambda', '\varrho', '\chi', '\varepsilon', '\mu', '\sigma', '\psi', '\zeta', '\nu', '\varsigma', '\omega', '\eta', '\xi',
                     56:   '\Gamma', '\Lambda', '\Sigma', '\Psi', '\Delta', '\Xi', '\Upsilon', '\Omega', '\Theta', '\Pi', '\Phi',
                     57:   '\pm', '\cap', '\diamond', '\oplus', '\mp', '\cup', '\bigtriangleup', '\ominus', '\times', '\uplus', '\bigtriangledown', '\otimes', '\div', '\sqcap', '\triangleleft', '\oslash', '\ast', '\sqcup', '\triangleright', '\odot', '\star', '\vee', '\lhd$', '\bigcirc', '\circ', '\wedge', '\rhd$', '\dagger', '\bullet', '\setminus', '\unlhd$', '\ddagger', '\cdot', '\wr', '\unrhd$', '\amalg', '+', '-',
                     58:   '\leq', '\geq', '\equiv', '\models', '\prec', '\succ', '\sim', '\perp', '\preceq', '\succeq', '\simeq', '\mid', '\ll', '\gg', '\asymp', '\parallel', '\subset', '\supset', '\approx', '\bowtie', '\subseteq', '\supseteq', '\cong', '\Join$', '\sqsubset$', '\sqsupset$', '\neq', '\smile', '\sqsubseteq', '\sqsupseteq', '\doteq', '\frown', '\in', '\ni', '\propto', '\vdash', '\dashv',
                     59:   '\colon', '\ldotp', '\cdotp',
                     60:   '\leftarrow', '\longleftarrow', '\uparrow', '\Leftarrow', '\Longleftarrow', '\Uparrow', '\rightarrow', '\longrightarrow', '\downarrow', '\Rightarrow', '\Longrightarrow', '\Downarrow', '\leftrightarrow', '\longleftrightarrow', '\updownarrow', '\Leftrightarrow', '\Longleftrightarrow', '\Updownarrow', '\mapsto', '\longmapsto', '\nearrow', '\hookleftarrow', '\hookrightarrow', '\searrow', '\leftharpoonup', '\rightharpoonup', '\swarrow', '\leftharpoondown', '\rightharpoondown', '\nwarrow', '\rightleftharpoons', '\leadsto$',
                     61:   '\ldots', '\cdots', '\vdots', '\ddots', '\aleph', '\prime', '\forall', '\infty', '\hbar', '\emptyset', '\exists', '\Box$', '\imath', '\nabla', '\neg', '\Diamond$', '\jmath', '\surd', '\flat', '\triangle', '\ell', '\top', '\natural', '\clubsuit', '\wp', '\bot', '\sharp', '\diamondsuit', '\Re', '\|', '\backslash', '\heartsuit', '\Im', '\angle', '\partial', '\spadesuit', '\mho$',
                     62:   '\sum', '\bigcap', '\bigodot', '\prod', '\bigcup', '\bigotimes', '\coprod', '\bigsqcup', '\bigoplus', '\int', '\bigvee', '\biguplus', '\oint', '\bigwedge',
                     63:   '\arccos', '\cos', '\csc', '\exp', '\ker', '\limsup', '\min', '\sinh', '\arcsin', '\cosh', '\deg', '\gcd', '\lg', '\ln', '\Pr', '\sup', '\arctan', '\cot', '\det', '\hom', '\lim', '\log', '\sec', '\tan', '\arg', '\coth', '\dim', '\inf', '\liminf', '\max', '\sin', '\tanh',
                     64:   '\uparrow', '\Uparrow', '\downarrow', '\Downarrow', '\updownarrow', '\Updownarrow', '\lfloor', '\rfloor', '\lceil', '\rceil', '\langle', '\rangle', '\backslash',
                     65:   '\rmoustache', '\lmoustache', '\rgroup', '\lgroup', '\arrowvert', '\Arrowvert', '\bracevert',
                     66:   '\hat{', '\acute{', '\bar{', '\dot{', '\breve{', '\check{', '\grave{', '\vec{', '\ddot{', '\tilde{',
                     67:   '\widetilde{', '\widehat{', '\overleftarrow{', '\overrightarrow{', '\overline{', '\underline{', '\overbrace{', '\underbrace{', '\sqrt{', '\sqrt[', '\frac{'
                     68: );
                     69: # list of elements that can contain style elements:
                     70: my @containing_styles = ('library','problem',@responses,'foil','item','text','hintgroup','hintpart','label','part','preduedate','postanswerdate','solved','notsolved','block','while','web','standalone','problemtype','languageblock','translated','lang','window','windowlink','togglebox','instructorcomment','body','section','div','p','li','dd','td','th','blockquote','object','applet','video','audio','canvas','fieldset','button',
                     71: 'span','strong','em','b','i','sup','sub','code','kbd','samp','tt','ins','del','var','small','big','u','font');
                     72: my @html_styles = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'tt', 'var', 'small', 'big', 'u');
                     73: 
                     74: my $warnings; # 1 = print warnings
                     75: 
                     76: 
                     77: # Parses the XML document and fixes many things to turn it into a document matching the schema.
1.2     ! damieng    78: # @param {reference} textref - reference to the text of the document
        !            79: # @param {string} file_path - path of the file being processed (we only extract the directory path from it)
        !            80: # @param {string} res_dir - path of res directory parent (without the / at the end)
        !            81: # @param {boolean} warn - 1 to print warnings, 0 otherwise
        !            82: # @returns the text of the document as a byte string.
1.1       damieng    83: sub post_xml {
1.2     ! damieng    84:   my ($textref, $file_path, $res_dir, $warn) = @_;
1.1       damieng    85:   $warnings = $warn;
                     86:   
                     87:   my $dom_doc = XML::LibXML->load_xml(string => $textref);
                     88: 
                     89:   my $root = fix_structure($dom_doc);
                     90: 
                     91:   remove_elements($root, ['startouttext','startoutext','startottext','startouttex','startouttect','atartouttext','starouttext','starttextout','starttext','starttextarea','endouttext','endoutext','endoutttext','endouttxt','endouutext','ednouttext','endouttex','endoouttext','endouttest','endtextout','endtextarea','startpartmarker','endpartmarker','basefont','x-claris-tagview','x-claris-window','x-sas-window']);
                     92:   
                     93:   remove_empty_attributes($root);
                     94:   
                     95:   fix_attribute_case($root);
                     96:   
                     97:   my $fix_by_hand = replace_m($root);
                     98:   
                     99:   my @all_block = (@block_elements, @block_html);
1.2     ! damieng   100:   add_sty_blocks($file_path, $res_dir, $root, \@all_block); # must come before the subs using @all_block
1.1       damieng   101: 
                    102:   fix_block_styles($root, \@all_block);
                    103:   $root->normalize();
                    104:   
                    105:   fix_fonts($root, \@all_block);
                    106:   
                    107:   replace_u($root);
                    108: 
                    109:   remove_bad_cdata_sections($root);
                    110:   
                    111:   add_cdata_sections($root);
                    112:   
                    113:   fix_style_element($root);
                    114:   
                    115:   fix_tables($root);
                    116: 
                    117:   fix_lists($root);
                    118:   
                    119:   fix_wrong_name_for_img($root); # should be before replace_deprecated_attributes_by_css
                    120: 
                    121:   replace_deprecated_attributes_by_css($root);
                    122:   
                    123:   replace_center($root, \@all_block); # must come after replace_deprecated_attributes_by_css
                    124:   
                    125:   replace_nobr($root);
                    126:   
                    127:   remove_useless_notsolved($root);
                    128:   
                    129:   fix_paragraphs_inside($root, \@all_block);
                    130: 
                    131:   remove_empty_style($root);
                    132:   
                    133:   fix_empty_lc_elements($root);
                    134:   
                    135:   lowercase_attribute_values($root);
                    136:   
                    137:   replace_numericalresponse_unit_attribute($root);
                    138:   
                    139:   replace_functions_by_elements($root);
                    140:   
                    141:   pretty($root, \@all_block);
                    142: 
                    143:   replace_tm_dtm($root);
                    144:   
                    145:   if ($fix_by_hand) {
                    146:     die "The file has been converted but it should be fixed by hand.";
                    147:   }
                    148:   return $dom_doc->toString(); # byte string !
                    149: }
                    150: 
                    151: sub fix_structure {
                    152:   my ($doc) = @_;
                    153:   # the root element has already been added in pre_xml
                    154:   my $root = $doc->documentElement;
                    155:   # inside the root, replace html, problem and library elements by their content
                    156:   my @toreplace = ('html','problem','library');
                    157:   foreach my $name (@toreplace) {
                    158:     my @elements = $root->getElementsByTagName($name);
                    159:     foreach my $element (@elements) {
                    160:       replace_by_children($element);
                    161:     }
                    162:   }
                    163:   # insert all link and style elements inside a new head element
                    164:   my $current_node = undef;
                    165:   my @heads = $doc->getElementsByTagName('head');
                    166:   my @links = $doc->getElementsByTagName('link');
                    167:   my @styles = $doc->getElementsByTagName('style');
                    168:   my @titles = $doc->getElementsByTagName('title');
                    169:   if (scalar(@titles) > 0) {
                    170:     # NOTE: there is a title element in gnuplot, not to be confused with the one inside HTML head
                    171:     for (my $i=0; $i<scalar(@titles); $i++) {
                    172:       my $title = $titles[$i];
                    173:       my $found_gnuplot = 0;
                    174:       my $ancestor = $title->parentNode;
                    175:       while (defined $ancestor) {
                    176:         if ($ancestor->nodeName eq 'gnuplot') {
                    177:           $found_gnuplot = 1;
                    178:           last;
                    179:         }
                    180:         $ancestor = $ancestor->parentNode;
                    181:       }
                    182:       if ($found_gnuplot) {
                    183:         splice(@titles, $i, 1);
                    184:         $i--;
                    185:       }
                    186:     }
                    187:   }
                    188:   if (scalar(@heads) > 0 || scalar(@titles) > 0 || scalar(@links) > 0 || scalar(@styles) > 0) {
                    189:     my $htmlhead = $doc->createElement('head');
                    190:     foreach my $head (@heads) {
                    191:       my $next;
                    192:       for (my $child=$head->firstChild; defined $child; $child=$next) {
                    193:         $next = $child->nextSibling;
                    194:         $head->removeChild($child);
                    195:         if ($child->nodeType != XML_ELEMENT_NODE ||
                    196:             string_in_array(['title','script','style','meta','link','import','base'], $child->nodeName)) {
                    197:           $htmlhead->appendChild($child);
                    198:         } else {
                    199:           # this should not be in head
                    200:           insert_after_or_first($root, $child, $current_node);
                    201:         }
                    202:       }
                    203:       $head->parentNode->removeChild($head);
                    204:     }
                    205:     foreach my $child (@titles, @links, @styles) {
                    206:       $child->parentNode->removeChild($child);
                    207:       $htmlhead->appendChild($child);
                    208:     }
                    209:     insert_after_or_first($root, $htmlhead, $current_node);
                    210:     $current_node = $htmlhead;
                    211:   }
                    212:   # body
                    213:   my $htmlbody = undef;
                    214:   my @bodies = $doc->getElementsByTagName('body');
                    215:   if (scalar(@bodies) > 0) {
                    216:     # TODO: fix content and position of body elements
                    217:     if ($root->nodeName eq 'problem') {
                    218:       foreach my $body (@bodies) {
                    219:         replace_by_children($body);
                    220:       }
                    221:     }
                    222:   }
                    223:   # add all the meta elements afterwards when they are LON-CAPA meta. Remove all HTML meta.
                    224:   my @meta_names = ('abstract','author','authorspace','avetries','avetries_list','clear','comefrom','comefrom_list','copyright','correct','count','course','course_list','courserestricted','creationdate','dependencies','depth','difficulty','difficulty_list','disc','disc_list','domain','end','field','firstname','generation','goto','goto_list','groupname','helpful','highestgradelevel','hostname','id','keynum','keywords','language','lastname','lastrevisiondate','lowestgradelevel','middlename','mime','modifyinguser','notes','owner','permanentemail','scope','sequsage','sequsage_list','standards','start','stdno','stdno_list','subject','technical','title','url','username','value','version');
                    225:   my @metas = $doc->getElementsByTagName('meta');
                    226:   foreach my $meta (@metas) {
                    227:     $meta->parentNode->removeChild($meta);
                    228:     my $name = $meta->getAttribute('name');
                    229:     my $content = $meta->getAttribute('content');
                    230:     if (defined $name && defined $content && string_in_array(\@meta_names, lc($name))) {
                    231:       my $lcmeta = $doc->createElement('meta');
                    232:       $lcmeta->setAttribute('name', lc($name));
                    233:       $lcmeta->setAttribute('content', $content);
                    234:       insert_after_or_first($root, $lcmeta, $current_node);
                    235:       $current_node = $lcmeta;
                    236:     }
                    237:   }
                    238:   return($root);
                    239: }
                    240: 
                    241: # insert the new child under parent after the reference child, or as the first child if the reference child is not defined
                    242: sub insert_after_or_first {
                    243:   my ($parent, $newchild, $refchild) = @_;
                    244:   if (defined $refchild) {
                    245:     $parent->insertAfter($newchild, $refchild);
                    246:   } elsif (defined $parent->firstChild) {
                    247:     $parent->insertBefore($newchild, $parent->firstChild);
                    248:   } else {
                    249:     $parent->appendChild($newchild);
                    250:   }
                    251: }
                    252: 
                    253: # removes all elements with given names inside the node, but keep the content
                    254: sub remove_elements {
                    255:   my ($node, $to_remove) = @_;
                    256:   my $nextChild;
                    257:   for (my $child=$node->firstChild; defined $child; $child=$nextChild) {
                    258:     $nextChild = $child->nextSibling;
                    259:     my $type = $node->nodeType;
                    260:     if ($type == XML_ELEMENT_NODE) {
                    261:       if (string_in_array($to_remove, $child->nodeName)) {
                    262:         my $first_non_white = $child->firstChild;
                    263:         if (defined $first_non_white && $first_non_white->nodeType == XML_TEXT_NODE &&
                    264:             $first_non_white->nodeValue =~ /^\s*$/) {
                    265:           $first_non_white = $first_non_white->nextSibling;
                    266:         }
                    267:         if (defined $first_non_white) {
                    268:           $nextChild = $first_non_white;
                    269:           replace_by_children($child);
                    270:         } else {
                    271:           $node->removeChild($child);
                    272:         }
                    273:       } else {
                    274:         remove_elements($child, $to_remove);
                    275:       }
                    276:     }
                    277:   }
                    278: }
                    279: 
                    280: # removes some attributes that have an invalid empty value
                    281: sub remove_empty_attributes {
                    282:   my ($root) = @_;
                    283:   my $doc = $root->ownerDocument;
                    284:   # this list is based on validation errors in the MSU subset (it could be more complete if it was based on the schema)
                    285:   my @attributes = (
                    286:     ['curve', ['pointsize']],
                    287:     ['foil', ['location']],
                    288:     ['foilgroup', ['checkboxoptions', 'options', 'texoptions']],
                    289:     ['gnuplot', ['pattern', 'texwidth']],
                    290:     ['img', ['height', 'texheight', 'texwidth', 'texwrap', 'width']],
                    291:     ['import', ['importmode']],
                    292:     ['optionresponse', ['max']],
                    293:     ['organicstructure', ['options']],
                    294:     ['radiobuttonresponse', ['max']],
                    295:     ['randomlabel', ['height', 'texwidth', 'width']],
                    296:     ['stringresponse', ['type']],
                    297:     ['textline', ['size']],
                    298:   );
                    299:   foreach my $element_attributes (@attributes) {
                    300:     my $element_name = $element_attributes->[0];
                    301:     my $attribute_names = $element_attributes->[1];
                    302:     my @elements = $doc->getElementsByTagName($element_name);
                    303:     foreach my $element (@elements) {
                    304:       foreach my $attribute_name (@$attribute_names) {
                    305:         my $value = $element->getAttribute($attribute_name);
                    306:         if (defined $value && $value =~ /^\s*$/) {
                    307:           $element->removeAttribute($attribute_name);
                    308:         }
                    309:       }
                    310:     }
                    311:   }
                    312: }
                    313: 
                    314: # fixes the case for a few attributes that are not all lowercase
                    315: # (the HTML parser used in html_to_xml turns everything lowercase, which is a good thing in general)
                    316: sub fix_attribute_case {
                    317:   my ($root) = @_;
                    318:   my $doc = $root->ownerDocument;
                    319:   my @attributes = (
                    320:     ['labelgroup', ['TeXsize']],
                    321:     ['h1', ['TeXsize']],
                    322:     ['h2', ['TeXsize']],
                    323:     ['h3', ['TeXsize']],
                    324:     ['h4', ['TeXsize']],
                    325:     ['h5', ['TeXsize']],
                    326:     ['h6', ['TeXsize']],
                    327:     # font and basefont have a TeXsize but will be removed
                    328:     ['optionresponse', ['TeXlayout']],
                    329:     ['itemgroup', ['TeXitemgroupwidth']],
                    330:     ['Task', ['OptionalRequired']],
                    331:     ['Question', ['OptionalRequired','Mandatory']],
                    332:     ['Instance', ['OptionalRequired','Disabled']],
                    333:     ['Criteria', ['Mandatory']],
                    334:     ['table', ['TeXwidth','TeXtheme']],
                    335:     ['td', ['TeXwidth']],
                    336:     ['th', ['TeXwidth']],
                    337:     ['img', ['TeXwidth','TeXheight','TeXwrap']],
                    338:   );
                    339:   foreach my $element_attributes (@attributes) {
                    340:     my $element_name = $element_attributes->[0];
                    341:     my $attribute_names = $element_attributes->[1];
                    342:     my @elements = $doc->getElementsByTagName($element_name);
                    343:     foreach my $element (@elements) {
                    344:       foreach my $attribute_name (@$attribute_names) {
                    345:         my $value = $element->getAttribute(lc($attribute_name));
                    346:         if (defined $value) {
                    347:           $element->removeAttribute(lc($attribute_name));
                    348:           $element->setAttribute($attribute_name, $value);
                    349:         }
                    350:       }
                    351:     }
                    352:   }
                    353: }
                    354: 
                    355: # Replaces m by HTML, tm and/or dtm (which will be replaced by <m> later, but they are useful
                    356: #   to know if the element is a block element or not).
                    357: # m might contain non-math LaTeX, while tm and dtm may only contain math.
                    358: # Returns 1 if the file should be fixed by hand, 0 otherwise.
                    359: sub replace_m {
                    360:   my ($root) = @_;
                    361:   my $doc = $root->ownerDocument;
                    362:   my $fix_by_hand = 0;
                    363:   # search for variable declarations
                    364:   my @variables = ();
                    365:   my @scripts = $root->getElementsByTagName('script');
                    366:   foreach my $script (@scripts) {
                    367:     my $type = $script->getAttribute('type');
                    368:     if (defined $type && $type eq 'loncapa/perl') {
                    369:       if (defined $script->firstChild && $script->firstChild->nodeType == XML_TEXT_NODE) {
                    370:         my $text = $script->firstChild->nodeValue;
                    371:         # NOTE: we are not interested in replacing "@value", only "$value"
                    372:         # this regexp is for "  $a = ..." and "  $a[...] = ..."
                    373:         while ($text =~ /^[ \t]*\$([a-zA-Z_0-9]+)(?:\[[^\]]+\])?[ \t]*=/gm) {
                    374:           if (!string_in_array(\@variables, $1)) {
                    375:             push(@variables, $1);
                    376:           }
                    377:         }
                    378:         # this regexp is for "...;  $a = ..." and "...;  $a[...] = ..."
                    379:         while ($text =~ /^[^'"\/;]+;[ \t]*\$([a-zA-Z_0-9]+)(?:\[[^\]]+\])?[ \t]*=/gm) {
                    380:           if (!string_in_array(\@variables, $1)) {
                    381:             push(@variables, $1);
                    382:           }
                    383:         }
                    384:         # this regexp is for "  @a = ..."
                    385:         while ($text =~ /^[ \t]*\@([a-zA-Z_0-9]+)[ \t]*=/gm) {
                    386:           if (!string_in_array(\@variables, $1)) {
                    387:             push(@variables, $1);
                    388:           }
                    389:         }
                    390:         # this regexp is for "  ($a, $b, $c) = ..."
                    391:         my @matches = ($text =~ /^[ \t]*\([ \t]*\$([a-zA-Z_0-9]+)(?:[ \t]*,[ \t]*\$([a-zA-Z_0-9]+))*[ \t]*\)[ \t]*=/gm);
                    392:         foreach my $match (@matches) {
                    393:           if (!defined $match) {
                    394:             next; # not sure why it happens, but it does
                    395:           }
                    396:           if (!string_in_array(\@variables, $match)) {
                    397:             push(@variables, $match);
                    398:           }
                    399:         }
                    400:         # and this one is for "push @a"
                    401:         while ($text =~ /^[ \t]*push @([a-zA-Z_0-9]+)[ \t,]*/gm) {
                    402:           if (!string_in_array(\@variables, $1)) {
                    403:             push(@variables, $1);
                    404:           }
                    405:         }
                    406:         # use the opportunity to report usage of <m> in Perl scripts
                    407:         if ($text =~ /^[^#].*<m[ >]/m) {
                    408:           if ($warnings) {
                    409:             print "WARNING: <m> is used in a script, it should be converted by hand\n";
                    410:           }
                    411:           $fix_by_hand = 1;
                    412:         }
                    413:       }
                    414:     }
                    415:   }
                    416:   my @ms = $root->getElementsByTagName('m');
                    417:   foreach my $m (@ms) {
                    418:     if (!defined $m->firstChild) {
                    419:       $m->parentNode->removeChild($m);
                    420:       next;
                    421:     }
                    422:     if (defined $m->firstChild->nextSibling || $m->firstChild->nodeType != XML_TEXT_NODE) {
                    423:       if ($warnings) {
                    424:         print "WARNING: m value is not simple text\n";
                    425:       }
                    426:       $fix_by_hand = 1;
                    427:       next;
                    428:     }
                    429:     my $text = $m->firstChild->nodeValue;
                    430:     my $text_before_variable_replacement = $text;
                    431:     my $var_key1 = 'dfhg3df54hg65hg4';
                    432:     my $var_key2 = 'dfhg654d6f5g4h5f';
                    433:     my $eval = defined $m->getAttribute('eval') && $m->getAttribute('eval') eq 'on';
                    434:     if ($eval) {
                    435:       # replace variables
                    436:       foreach my $variable (@variables) {
                    437:         my $replacement = $var_key1.$variable.$var_key2;
                    438:         $text =~ s/\$$variable(?![a-zA-Z])/$replacement/ge;
                    439:         $text =~ s/\$\{$variable\}/$replacement/ge;
                    440:       }
                    441:     }
                    442:     # check if the expression is enclosed in math separators: $ $$ \( \) \[ \]
                    443:     # if so, replace the whole node by dtm or tm
                    444:     my $new_text;
                    445:     my $new_node_name;
                    446:     if ($text =~ /^\s*\$\$([^\$]*)\$\$\s*$/) {
                    447:       $new_node_name = 'dtm';
                    448:       $new_text = $1;
                    449:     } elsif ($text =~ /^\s*\\\[(.*)\\\]\s*$/s) {
                    450:       $new_node_name = 'dtm';
                    451:       $new_text = $1;
                    452:     } elsif ($text =~ /^\s*\$([^\$]*)\$\s*$/) {
                    453:       $new_node_name = 'tm';
                    454:       $new_text = $1;
                    455:     } elsif ($text =~ /^\s*\\\((.*)\\\)\s*$/s) {
                    456:       $new_node_name = 'tm';
                    457:       $new_text = $1;
                    458:     }
                    459:     if (defined $new_node_name) {
                    460:       if ($eval) {
                    461:         foreach my $variable (@variables) {
                    462:           my $replacement = $var_key1.$variable.$var_key2;
                    463:           $new_text =~ s/$replacement([a-zA-Z])/\${$variable}$1/g;
                    464:           $new_text =~ s/$replacement/\$$variable/g;
                    465:         }
                    466:       }
                    467:       my $new_node = $doc->createElement($new_node_name);
                    468:       if ($eval) {
                    469:         $new_node->setAttribute('eval', 'on');
                    470:       }
                    471:       $new_node->appendChild($doc->createTextNode($new_text));
                    472:       $m->parentNode->replaceChild($new_node, $m);
                    473:       next;
                    474:     }
                    475:     if ($text !~ /\$|\\\(|\\\)|\\\[|\\\]/) {
                    476:       # there are no math separators inside
                    477:       # try to guess if this is meant as math
                    478:       my $found_math = 0;
                    479:       foreach my $symbol (@latex_math) {
                    480:         if (index($text, $symbol) != -1) {
                    481:           $found_math = 1;
                    482:           last;
                    483:         }
                    484:       }
                    485:       if ($found_math) {
                    486:         # interpret the whole text as LaTeX inline math
                    487:         my $new_node = $doc->createElement('tm');
                    488:         if ($eval) {
                    489:           $new_node->setAttribute('eval', 'on');
                    490:         }
                    491:         $new_node->appendChild($doc->createTextNode($text_before_variable_replacement));
                    492:         $m->parentNode->replaceChild($new_node, $m);
                    493:         next;
                    494:       }
                    495:       # no math symbol found, we will convert the text with tth
                    496:     }
                    497:     
                    498:     # there are math separators inside, even after hiding variables, or there was no math symbol
                    499:     
                    500:     # hide math parts inside before running tth
                    501:     my $math_key1 = '#ghjgdh5hg45gf';
                    502:     my $math_key2 = '#';
                    503:     my @maths = ();
                    504:     my @separators = (['$$','$$'], ['\\(','\\)'], ['\\[','\\]'], ['$','$']);
                    505:     foreach my $seps (@separators) {
                    506:       my $sep1 = $seps->[0];
                    507:       my $sep2 = $seps->[1];
                    508:       my $pos1 = index($text, $sep1);
                    509:       if ($pos1 == -1) {
                    510:         next;
                    511:       }
                    512:       my $pos2 = index($text, $sep2, $pos1+length($sep1));
                    513:       while ($pos1 != -1 && $pos2 != -1) {
                    514:         my $replace = substr($text, $pos1, $pos2+length($sep2)-$pos1);
                    515:         push(@maths, $replace);
                    516:         my $by = $math_key1.scalar(@maths).$math_key2;
                    517:         $text = substr($text, 0, $pos1).$by.substr($text, $pos2+length($sep2));
                    518:         $pos1 = index($text, $sep1);
                    519:         if ($pos1 != -1) {
                    520:           $pos2 = index($text, $sep2, $pos1+length($sep1));
                    521:         }
                    522:       }
                    523:     }
                    524:     # get HTML as text from tth
                    525:     my $html_text = tth($text);
                    526:     # replace math by replacements
                    527:     for (my $i=0; $i < scalar(@maths); $i++) {
                    528:       my $math = $maths[$i];
                    529:       $math =~ s/&/&amp;/g;
                    530:       $math =~ s/</&lt;/g;
                    531:       $math =~ s/>/&gt;/g;
                    532:       if ($math =~ /^\$\$(.*)\$\$$/s) {
                    533:         $math = '<dtm>'.$1.'</dtm>';
                    534:       } elsif ($math =~ /^\\\[(.*)\\\]$/s) {
                    535:         $math = '<dtm>'.$1.'</dtm>';
                    536:       } elsif ($math =~ /^\\\((.*)\\\)$/s) {
                    537:         $math = '<tm>'.$1.'</tm>';
                    538:       } elsif ($math =~ /^\$(.*)\$$/s) {
                    539:         $math = '<tm>'.$1.'</tm>';
                    540:       }
                    541:       my $replace = $math_key1.($i+1).$math_key2;
                    542:       $html_text =~ s/$replace/$math/;
                    543:     }
                    544:     # replace variables if necessary
                    545:     if ($eval) {
                    546:       foreach my $variable (@variables) {
                    547:         my $replacement = $var_key1.$variable.$var_key2;
                    548:         $html_text =~ s/$replacement([a-zA-Z])/\${$variable}$1/g;
                    549:         $html_text =~ s/$replacement/\$$variable/g;
                    550:       }
                    551:     }
                    552:     my $fragment = html_to_dom($html_text);
                    553:     $doc->adoptNode($fragment);
                    554:     $m->parentNode->replaceChild($fragment, $m);
                    555:     
                    556:   }
                    557:   return $fix_by_hand;
                    558: }
                    559: 
                    560: # Returns the HTML equivalent of LaTeX input, using tth
                    561: sub tth {
                    562:   my ($text) = @_;
                    563:   my ($fh, $tmp_path) = tempfile();
                    564:   binmode($fh, ':utf8');
                    565:   print $fh $text;
                    566:   close $fh;
                    567:   my $output = `tth -r -w2 -u -y0 < $tmp_path 2>/dev/null`;
                    568:   # hopefully the temp file will not be removed before this point (otherwise we should use unlink_on_destroy 0)
                    569:   $output =~ s/^\s*|\s*$//;
                    570:   $output =~ s/<div class="p"><!----><\/div>/<br\/>/; # why is tth using such ugly markup for \newline ?
                    571:   return $output;
                    572: }
                    573: 
                    574: # transform simple HTML into a DOM fragment (which will need to be adopted by the document)
                    575: sub html_to_dom {
                    576:   my ($text) = @_;
                    577:   $text = '<root>'.$text.'</root>';
                    578:   my $textref = html_to_xml::html_to_xml(\$text);
                    579:   utf8::upgrade($$textref); # otherwise the XML parser fails when the HTML parser turns &nbsp; into a character
                    580:   my $dom_doc = XML::LibXML->load_xml(string => $textref);
                    581:   my $root = $dom_doc->documentElement;
                    582:   remove_empty_style($root);
                    583:   my $fragment = $dom_doc->createDocumentFragment();
                    584:   my $next;
                    585:   for (my $n=$root->firstChild; defined $n; $n=$next) {
                    586:     $next = $n->nextSibling;
                    587:     $root->removeChild($n);
                    588:     $fragment->appendChild($n);
                    589:   }
                    590:   return($fragment);
                    591: }
                    592: 
                    593: # Use the linked sty files to guess which newly defined elements should be considered blocks.
                    594: # Also adds to @containing_styles the sty elements that contain styles.
                    595: # @param {string} fn - the file path (we only extract the directory path from it)
1.2     ! damieng   596: # @param {string} res_dir - path of res directory parent (without the / at the end)
        !           597: # @param {Element} root - the root element
1.1       damieng   598: sub add_sty_blocks {
1.2     ! damieng   599:   my ($fn, $res_dir, $root, $all_block) = @_;
1.1       damieng   600:   my $doc = $root->ownerDocument;
                    601:   my @parserlibs = $doc->getElementsByTagName('parserlib');
                    602:   my @libs = ();
                    603:   foreach my $parserlib (@parserlibs) {
                    604:     if (defined $parserlib->firstChild && $parserlib->firstChild->nodeType == XML_TEXT_NODE) {
                    605:       my $value = $parserlib->firstChild->nodeValue;
                    606:       $value =~ s/^\s+|\s+$//g;
                    607:       if ($value ne '') {
                    608:         push(@libs, $value);
                    609:       }
                    610:     }
                    611:   }
                    612:   my ($name, $path, $suffix) = fileparse($fn);
                    613:   foreach my $sty (@libs) {
                    614:     if (substr($sty, 0, 1) eq '/') {
1.2     ! damieng   615:       $sty = $res_dir.$sty;
1.1       damieng   616:     } else {
                    617:       $sty = $path.$sty;
                    618:     }
                    619:     my $new_elements = parse_sty($sty, $all_block);
                    620:     better_guess($root, $new_elements, $all_block);
                    621:     my $new_blocks = $new_elements->{'block'};
                    622:     my $new_inlines = $new_elements->{'inline'};
                    623:     push(@$all_block, @{$new_blocks});
                    624:     #push(@inlines, @{$new_inlines}); # we are not using a list of inline elements at this point
                    625:   }
                    626: }
                    627: 
                    628: ##
                    629: # Parses a sty file and returns lists of block and inline elements.
                    630: # @param {string} fn - the file path
                    631: ##
                    632: sub parse_sty {
                    633:   my ($fn, $all_block) = @_;
                    634:   my @blocks = ();
                    635:   my @inlines = ();
                    636:   my $p = HTML::TokeParser->new($fn);
                    637:   if (! $p) {
                    638:     die "post_xml.pl: parse_sty: Error reading $fn\n";
                    639:   }
                    640:   $p->empty_element_tags(1);
                    641:   my $in_definetag = 0;
                    642:   my $in_render = 0;
                    643:   my %newtags = ();
                    644:   my $newtag = '';
                    645:   my $is_block = 0;
                    646:   while (my $token = $p->get_token) {
                    647:     if ($token->[0] eq 'S') {
                    648:       my $tag = lc($token->[1]);
                    649:       if ($tag eq 'definetag') {
                    650:         $in_definetag = 1;
                    651:         $is_block = 0;
                    652:         my $attributes = $token->[2];
                    653:         $newtag = $attributes->{'name'};
                    654:         if (substr($newtag, 0, 1) eq '/') {
                    655:           $newtag = substr($newtag, 1);
                    656:         }
                    657:       } elsif ($in_definetag && $tag eq 'render') {
                    658:         $in_render = 1;
                    659:         $is_block = 0;
                    660:       } elsif ($in_render) {
                    661:         if (string_in_array($all_block, $tag)) {
                    662:           $is_block = 1;
                    663:         }
                    664:       }
                    665:     } elsif ($token->[0] eq 'E') {
                    666:       my $tag = lc($token->[1]);
                    667:       if ($tag eq 'definetag') {
                    668:         $in_definetag = 0;
                    669:         if (defined $newtags{$newtag}) {
                    670:           $newtags{$newtag} = $newtags{$newtag} || $is_block;
                    671:         } else {
                    672:           $newtags{$newtag} = $is_block;
                    673:         }
                    674:       } elsif ($in_definetag && $tag eq 'render') {
                    675:         $in_render = 0;
                    676:       }
                    677:     }
                    678:   }
                    679:   foreach $newtag (keys(%newtags)) {
                    680:     if ($newtags{$newtag} == 1) {
                    681:       push(@blocks, $newtag);
                    682:     } else {
                    683:       push(@inlines, $newtag);
                    684:     }
                    685:   }
                    686:   return {'block'=>\@blocks, 'inline'=>\@inlines};
                    687: }
                    688: 
                    689: ##
                    690: # Marks as block the elements that contain block elements in the input file.
                    691: # Also adds to @containing_styles the sty elements that contain styles.
                    692: # @param {string} fn - the file path
                    693: # @param {Hash<string,Array>} new_elements - contains arrays in 'block' and 'inline'
                    694: ##
                    695: sub better_guess {
                    696:   my ($root, $new_elements, $all_block) = @_;
                    697:   my $new_blocks = $new_elements->{'block'};
                    698:   my $new_inlines = $new_elements->{'inline'};
                    699:   
                    700:   my @change = (); # change these elements from inline to block
                    701:   foreach my $tag (@{$new_inlines}) {
                    702:     my @nodes = $root->getElementsByTagName($tag);
                    703:     NODE_LOOP: foreach my $node (@nodes) {
                    704:       for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
                    705:         if ($child->nodeType == XML_ELEMENT_NODE) {
                    706:           if (string_in_array($all_block, $child->nodeName) || string_in_array($new_blocks, $child->nodeName)) {
                    707:             push(@change, $tag);
                    708:             last NODE_LOOP;
                    709:           }
                    710:         }
                    711:       }
                    712:     }
                    713:   }
                    714:   foreach my $inline (@change) {
                    715:     my $index = 0;
                    716:     $index++ until $new_inlines->[$index] eq $inline;
                    717:     splice(@{$new_inlines}, $index, 1);
                    718:     push(@{$new_blocks}, $inline);
                    719:   }
                    720:   # add to @containing_styles when a style is used inside
                    721:   # NOTE: some sty elements will be added even though they should not, but if we don't do that
                    722:   # all style will be removed in the sty elements.
                    723:   foreach my $tag ((@{$new_blocks}, @{$new_inlines})) {
                    724:     my @nodes = $root->getElementsByTagName($tag);
                    725:     NODE_LOOP: foreach my $node (@nodes) {
                    726:       for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
                    727:         if ($child->nodeType == XML_ELEMENT_NODE) {
                    728:           if (string_in_array(\@html_styles, $child->nodeName)) {
                    729:             push(@containing_styles, $tag);
                    730:             last NODE_LOOP;
                    731:           }
                    732:         }
                    733:       }
                    734:     }
                    735:   }
                    736: }
                    737: 
                    738: # When a style element contains a block, move the style inside the block where it is allowed.
                    739: # style/block/other -> block/style/other
                    740: # When a style is used where it is not allowed, move it inside its children or remove it (unless it contains only text)
                    741: # element_not_containing_styles/style/other -> element_not_containing_styles/other/style (except if other is a style)
                    742: # The fix is not perfect in the case of element_not_containing_styles/style1/style2/block/text (style1 will be lost):
                    743: # element_not_containing_styles/style1/style2/block/text -> element_not_containing_styles/block/style2/text
                    744: # (a solution to this problem would be to merge the styles in a span)
                    745: # NOTE: .sty defined elements might have been added to @containing_styles by better_guess().
                    746: sub fix_block_styles {
                    747:   my ($element, $all_block) = @_;
                    748:   my $doc = $element->ownerDocument;
                    749:   if (string_in_array(\@html_styles, $element->nodeName)) {
                    750:     # move spaces out of the style element
                    751:     if (defined $element->firstChild && $element->firstChild->nodeType == XML_TEXT_NODE) {
                    752:       my $child = $element->firstChild;
                    753:       if ($child->nodeValue =~ /^(\s+)(\S.*)$/s) {
                    754:         $element->parentNode->insertBefore($doc->createTextNode($1), $element);
                    755:         $child->setData($2);
                    756:       }
                    757:     }
                    758:     if (defined $element->lastChild && $element->lastChild->nodeType == XML_TEXT_NODE) {
                    759:       my $child = $element->lastChild;
                    760:       if ($child->nodeValue =~ /^(.*\S)(\s+)$/s) {
                    761:         $element->parentNode->insertAfter($doc->createTextNode($2), $element);
                    762:         $child->setData($1);
                    763:       }
                    764:     }
                    765:     
                    766:     my $found_block = 0;
                    767:     for (my $child=$element->firstChild; defined $child; $child=$child->nextSibling) {
                    768:       if ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
                    769:         $found_block = 1;
                    770:         last;
                    771:       }
                    772:     }
                    773:     my $no_style_here = !string_in_array(\@containing_styles, $element->parentNode->nodeName);
                    774:     if ($found_block || $no_style_here) {
                    775:       # there is a block or the style is not allowed here,
                    776:       # the style element has to be replaced by its modified children
                    777:       my $s; # a clone of the style
                    778:       my $next;
                    779:       for (my $child=$element->firstChild; defined $child; $child=$next) {
                    780:         $next = $child->nextSibling;
                    781:         if ($child->nodeType == XML_ELEMENT_NODE && (string_in_array($all_block, $child->nodeName) ||
                    782:             $child->nodeName eq 'br' || $no_style_here)) {
                    783:           # avoid inverting a style with a style with $no_style_here (that would cause endless recursion)
                    784:           if (!$no_style_here || (!string_in_array(\@html_styles, $child->nodeName) &&
                    785:               string_in_array(\@containing_styles, $child->nodeName))) {
                    786:             # block node or inline node when the style is not allowed:
                    787:             # move all children inside the style, and make the style the only child
                    788:             $s = $element->cloneNode();
                    789:             my $next2;
                    790:             for (my $child2=$child->firstChild; defined $child2; $child2=$next2) {
                    791:               $next2 = $child2->nextSibling;
                    792:               $child->removeChild($child2);
                    793:               $s->appendChild($child2);
                    794:             }
                    795:             $child->appendChild($s);
                    796:           }
                    797:           $s = undef;
                    798:         } elsif (($child->nodeType == XML_TEXT_NODE && $child->nodeValue !~ /^\s*$/) ||
                    799:             $child->nodeType == XML_ELEMENT_NODE) {
                    800:           # if the style is allowed, move text and inline nodes inside the style
                    801:           if (!$no_style_here) {
                    802:             if (!defined $s) {
                    803:               $s = $element->cloneNode();
                    804:               $element->insertBefore($s, $child);
                    805:             }
                    806:             $element->removeChild($child);
                    807:             $s->appendChild($child);
                    808:           }
                    809:         } else {
                    810:           # do not put other nodes inside the style
                    811:           $s = undef;
                    812:         }
                    813:       }
                    814:       # now replace by children and fix them
                    815:       my $parent = $element->parentNode;
                    816:       for (my $child=$element->firstChild; defined $child; $child=$next) {
                    817:         $next = $child->nextSibling;
                    818:         $element->removeChild($child);
                    819:         $parent->insertBefore($child, $element);
                    820:         if ($child->nodeType == XML_ELEMENT_NODE) {
                    821:           fix_block_styles($child, $all_block);
                    822:         }
                    823:       }
                    824:       $parent->removeChild($element);
                    825:       return;
                    826:     }
                    827:   }
                    828:   # otherwise fix all children
                    829:   my $next;
                    830:   for (my $child=$element->firstChild; defined $child; $child=$next) {
                    831:     $next = $child->nextSibling;
                    832:     if ($child->nodeType == XML_ELEMENT_NODE) {
                    833:       fix_block_styles($child, $all_block);
                    834:     }
                    835:   }
                    836: }
                    837: 
                    838: # removes empty font elements and font elements that contain at least one block element
                    839: # replaces other font elements by equivalent span
                    840: sub fix_fonts {
                    841:   my ($root, $all_block) = @_;
                    842:   my $doc = $root->ownerDocument;
                    843:   my @fonts = $root->getElementsByTagName('font');
                    844:   @fonts = reverse(@fonts); # to deal with the ancestor last in the case of font/font
                    845:   foreach my $font (@fonts) {
                    846:     my $block = 0;
                    847:     for (my $child=$font->firstChild; defined $child; $child=$child->nextSibling) {
                    848:       if (string_in_array($all_block, $child->nodeName) || string_in_array(\@inline_like_block, $child->nodeName)) {
                    849:         $block = 1;
                    850:         last;
                    851:       }
                    852:     }
                    853:     if (!defined $font->firstChild || $block) {
                    854:       # empty font or font containing block elements
                    855:       # replace this node by its content
                    856:       replace_by_children($font);
                    857:     } else {
                    858:       # replace by equivalent span
                    859:       my $color = get_non_empty_attribute($font, 'color');
                    860:       my $size = get_non_empty_attribute($font, 'size');
                    861:       my $face = get_non_empty_attribute($font, 'face');
                    862:       if (defined $face) {
                    863:         $face =~ s/^,|,$//;
                    864:       }
                    865:       if (!defined $color && !defined $size && !defined $face) {
                    866:         # useless font element: replace this node by its content
                    867:         replace_by_children($font);
                    868:         next;
                    869:       }
                    870:       my $replacement;
                    871:       tie (my %properties, 'Tie::IxHash', ());
                    872:       if (!defined $color && !defined $size && defined $face && lc($face) eq 'symbol') {
                    873:         $replacement = $doc->createDocumentFragment();
                    874:       } else {
                    875:         $replacement = $doc->createElement('span');
                    876:         my $css = '';
                    877:         if (defined $color) {
                    878:           $color =~ s/^x/#/;
                    879:           $properties{'color'} = $color;
                    880:         }
                    881:         if (defined $size) {
                    882:           my %hash = (
                    883:             '1' => 'x-small',
                    884:             '2' => 'small',
                    885:             '3' => 'medium',
                    886:             '4' => 'large',
                    887:             '5' => 'x-large',
                    888:             '6' => 'xx-large',
                    889:             '7' => '300%',
                    890:             '-1' => 'small',
                    891:             '-2' => 'x-small',
                    892:             '+1' => 'large',
                    893:             '+2' => 'x-large',
                    894:             '+3' => 'xx-large',
                    895:             '+4' => '300%',
                    896:           );
                    897:           my $value = $hash{$size};
                    898:           if (!defined $value) {
                    899:             $value = 'medium';
                    900:           }
                    901:           $properties{'font-size'} = $value;
                    902:         }
                    903:         if (defined $face) {
                    904:           if (lc($face) ne 'symbol' && lc($face) ne 'bold') {
                    905:             $properties{'font-family'} = $face;
                    906:           }
                    907:         }
                    908:         set_css_properties($replacement, \%properties);
                    909:       }
                    910:       if (defined $face && lc($face) eq 'symbol') {
                    911:         # convert all content to unicode
                    912:         my $next;
                    913:         for (my $child=$font->firstChild; defined $child; $child=$next) {
                    914:           $next = $child->nextSibling;
                    915:           if ($child->nodeType == XML_TEXT_NODE) {
                    916:             my $value = $child->nodeValue;
                    917:             $value =~ tr/ABGDEZHQIKLMNXOPRSTUFCYWabgdezhqiklmnxoprVstufcywJjv¡«¬®/ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϕϖϒ↔←→/;
                    918:             $child->setData($value);
                    919:           }
                    920:         }
                    921:       }
                    922:       # replace the font node
                    923:       if ($replacement->nodeType == XML_ELEMENT_NODE && !defined $font->previousSibling &&
                    924:           !defined $font->nextSibling && string_in_array(\@accepting_style, $font->parentNode->nodeName)) {
                    925:         # use CSS on the parent block and replace font by its children instead of using a new element
                    926:         set_css_properties($font->parentNode, \%properties);
                    927:         replace_by_children($font);
                    928:       } else {
                    929:         # move all font children inside the replacement (span or fragment)
                    930:         my $next;
                    931:         for (my $child=$font->firstChild; defined $child; $child=$next) {
                    932:           $next = $child->nextSibling;
                    933:           $font->removeChild($child);
                    934:           $replacement->appendChild($child);
                    935:         }
                    936:         # replace font
                    937:         $font->parentNode->replaceChild($replacement, $font);
                    938:       }
                    939:     }
                    940:   }
                    941:   $root->normalize();
                    942: }
                    943: 
                    944: # replaces u by <span style="text-decoration: underline">
                    945: sub replace_u {
                    946:   my ($root) = @_;
                    947:   my $doc = $root->ownerDocument;
                    948:   my @us = $root->getElementsByTagName('u');
                    949:   foreach my $u (@us) {
                    950:     my $span = $doc->createElement('span');
                    951:     $span->setAttribute('style', 'text-decoration: underline');
                    952:     my $next;
                    953:     for (my $child=$u->firstChild; defined $child; $child=$next) {
                    954:       $next = $child->nextSibling;
                    955:       $u->removeChild($child);
                    956:       $span->appendChild($child);
                    957:     }
                    958:     $u->parentNode->replaceChild($span, $u);
                    959:   }
                    960: }
                    961: 
                    962: # removes CDATA sections tags that have not been parsed correcty by the HTML parser
                    963: # also removes bad comments in script elements
                    964: sub remove_bad_cdata_sections {
                    965:   my ($root) = @_;
                    966:   my $doc = $root->ownerDocument;
                    967:   foreach my $name (@preserve_elements) {
                    968:     my @nodes = $root->getElementsByTagName($name);
                    969:     foreach my $node (@nodes) {
                    970:       if (defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
                    971:         my $value = $node->firstChild->nodeValue;
                    972:         if ($name eq 'script' && (!defined $node->getAttribute('type') || $node->getAttribute('type') ne 'loncapa/perl') &&
                    973:             !defined $node->firstChild->nextSibling && $value =~ /^(\s*)<!--(.*)-->(\s*)$/) {
                    974:           # web browsers interpret that as a real comment when it is on 1 line, but the Perl HTML parser thinks it is the script
                    975:           # -> turning it back into a comment
                    976:           # (this is only true for Javascript script elements, since LON-CAPA does not parse loncapa/perl scripts in the same way)
                    977:           $node->removeChild($node->firstChild);
                    978:           $node->appendChild($doc->createComment($2));
                    979:           next;
                    980:         }
                    981:         # at the beginning:
                    982:         $value =~ s/^(\s*)<!\[CDATA\[/$1/; # <![CDATA[
                    983:         $value =~ s/^(\s*)\/\*\s*<!\[CDATA\[\s*\*\//$1/; # /* <![CDATA[ */
                    984:         $value =~ s/^(\s*)\/\/\s*<!\[CDATA\[/$1/; # // <![CDATA[
                    985:         $value =~ s/^(\s*)(\/\/)?\s*<!--/$1/; # // <!--
                    986:         # at the end:
                    987:         $value =~ s/\/\/\s*\]\]>(\s*)$/$1/; # // ]]>
                    988:         $value =~ s/\]\]>(\s*)$/$1/; # ]]>
                    989:         $value =~ s/(\/\/)?\s*-->(\s*)$/$2/; # // -->
                    990:         $value =~ s/\/\*\s*\]\]>\s*\*\/(\s*)$/$1/; # /* ]]> */
                    991:         
                    992:         $value = "\n".$value."\n";
                    993:         $value =~ s/\s*(\n[ \t]*)/$1/;
                    994:         $value =~ s/\s+$/\n/;
                    995:         $node->firstChild->setData($value);
                    996:       }
                    997:     }
                    998:   }
                    999: }
                   1000: 
                   1001: # adds CDATA sections to scripts
                   1002: sub add_cdata_sections {
                   1003:   my ($root) = @_;
                   1004:   my $doc = $root->ownerDocument;
                   1005:   my @scripts = $root->getElementsByTagName('script');
                   1006:   my @answers = $root->getElementsByTagName('answer');
                   1007:   foreach my $answer (@answers) {
                   1008:     my $ancestor = $answer->parentNode;
                   1009:     my $found_capa_response = 0;
                   1010:     while (defined $ancestor) {
                   1011:       if ($ancestor->nodeName eq 'numericalresponse' || $ancestor->nodeName eq 'formularesponse') {
                   1012:         $found_capa_response = 1;
                   1013:         last;
                   1014:       }
                   1015:       $ancestor = $ancestor->parentNode;
                   1016:     }
                   1017:     if (!$found_capa_response) {
                   1018:       push(@scripts, $answer);
                   1019:     }
                   1020:   }
                   1021:   foreach my $script (@scripts) {
                   1022:     # use a CDATA section in the normal situation, for any script
                   1023:     my $first = $script->firstChild;
                   1024:     if (defined $first && $first->nodeType == XML_TEXT_NODE && !defined $first->nextSibling) {
                   1025:       my $cdata = $doc->createCDATASection($first->nodeValue);
                   1026:       $script->replaceChild($cdata, $first);
                   1027:     }
                   1028:   }
                   1029: }
                   1030: 
                   1031: # removes "<!--" and "-->" at the beginning and end of style elements
                   1032: sub fix_style_element {
                   1033:   my ($root) = @_;
                   1034:   my @styles = $root->getElementsByTagName('style');
                   1035:   foreach my $style (@styles) {
                   1036:     if (defined $style->firstChild && $style->firstChild->nodeType == XML_TEXT_NODE &&
                   1037:         !defined $style->firstChild->nextSibling) {
                   1038:       my $text = $style->firstChild->nodeValue;
                   1039:       if ($text =~ /^\s*<!--(.*)-->\s*$/s) {
                   1040:         $style->firstChild->setData($1);
                   1041:       }
                   1042:     }
                   1043:   }
                   1044: }
                   1045: 
                   1046: # create missing cells at the end of table rows
                   1047: sub fix_tables {
                   1048:   my ($root) = @_;
                   1049:   my @tables = $root->getElementsByTagName('table');
                   1050:   foreach my $table (@tables) {
                   1051:     fix_cells($table);
                   1052:     foreach my $tbody ($table->getChildrenByTagName('tbody')) {
                   1053:       fix_cells($tbody);
                   1054:     }
                   1055:     foreach my $thead ($table->getChildrenByTagName('thead')) {
                   1056:       fix_cells($thead);
                   1057:     }
                   1058:     foreach my $tfoot ($table->getChildrenByTagName('tfoot')) {
                   1059:       fix_cells($tfoot);
                   1060:     }
                   1061:   }
                   1062: }
                   1063: 
                   1064: # create missing cells at the end of table rows
                   1065: sub fix_cells {
                   1066:   my ($table) = @_; # could actually be table, tbody, thead or tfoot
                   1067:   my $doc = $table->ownerDocument;
                   1068:   my @nb_cells = ();
                   1069:   my $max_nb_cells = 0;
                   1070:   my @rowspans = ();
                   1071:   my @trs = $table->getChildrenByTagName('tr');
                   1072:   foreach my $tr (@trs) {
                   1073:     my $nb_cells;
                   1074:     if (defined $rowspans[0]) {
                   1075:       $nb_cells = shift(@rowspans);
                   1076:     } else {
                   1077:       $nb_cells = 0;
                   1078:     }
                   1079:     for (my $cell=$tr->firstChild; defined $cell; $cell=$cell->nextSibling) {
                   1080:       if ($cell->nodeName eq 'td' || $cell->nodeName eq 'th') {
                   1081:         my $colspan = $cell->getAttribute('colspan');
                   1082:         if (defined $colspan && $colspan =~ /^\s*[0-9]+\s*$/) {
                   1083:           $nb_cells += $colspan;
                   1084:         } else {
                   1085:           $nb_cells++;
                   1086:         }
                   1087:         my $rowspan = $cell->getAttribute('rowspan');
                   1088:         if (defined $rowspan && $rowspan =~ /^\s*[0-9]+\s*$/) {
                   1089:           for (my $i=0; $i < $rowspan-1; $i++) {
                   1090:             if (!defined $rowspans[$i]) {
                   1091:               $rowspans[$i] = 1;
                   1092:             } else {
                   1093:               $rowspans[$i]++;
                   1094:             }
                   1095:           }
                   1096:         }
                   1097:       }
                   1098:     }
                   1099:     push(@nb_cells, $nb_cells);
                   1100:     if ($nb_cells > $max_nb_cells) {
                   1101:       $max_nb_cells = $nb_cells;
                   1102:     }
                   1103:   }
                   1104:   foreach my $tr (@trs) {
                   1105:     my $nb_cells = shift(@nb_cells);
                   1106:     if ($nb_cells < $max_nb_cells) {
                   1107:       for (1..($max_nb_cells - $nb_cells)) {
                   1108:         $tr->appendChild($doc->createElement('td'));
                   1109:       }
                   1110:     }
                   1111:   }
                   1112: }
                   1113: 
                   1114: # replaces ul/ul by ul/li/ul and the same for ol (using the previous li if possible)
                   1115: # also adds a ul element when a li has no ul/ol ancestor
                   1116: sub fix_lists {
                   1117:   my ($root) = @_;
                   1118:   my $doc = $root->ownerDocument;
                   1119:   my @uls = $root->getElementsByTagName('ul');
                   1120:   my @ols = $root->getElementsByTagName('ol');
                   1121:   my @lists = (@uls, @ols);
                   1122:   foreach my $list (@lists) {
                   1123:     my $next;
                   1124:     for (my $child=$list->firstChild; defined $child; $child=$next) {
                   1125:       $next = $child->nextSibling;
                   1126:       if ($child->nodeType == XML_ELEMENT_NODE && string_in_array(['ul','ol'], $child->nodeName)) {
                   1127:         my $previous = $child->previousNonBlankSibling(); # note: non-DOM method
                   1128:         $list->removeChild($child);
                   1129:         if (defined $previous && $previous->nodeType == XML_ELEMENT_NODE && $previous->nodeName eq 'li') {
                   1130:           $previous->appendChild($child);
                   1131:         } else {
                   1132:           my $li = $doc->createElement('li');
                   1133:           $li->appendChild($child);
                   1134:           if (!defined $next) {
                   1135:             $list->appendChild($li);
                   1136:           } else {
                   1137:             $list->insertBefore($li, $next);
                   1138:           }
                   1139:         }
                   1140:       }
                   1141:     }
                   1142:   }
                   1143:   my @lis = $root->getElementsByTagName('li');
                   1144:   foreach my $li (@lis) {
                   1145:     my $found_list_ancestor = 0;
                   1146:     my $ancestor = $li->parentNode;
                   1147:     while (defined $ancestor) {
                   1148:       if ($ancestor->nodeName eq 'ul' || $ancestor->nodeName eq 'ol') {
                   1149:         $found_list_ancestor = 1;
                   1150:         last;
                   1151:       }
                   1152:       $ancestor = $ancestor->parentNode;
                   1153:     }
                   1154:     if (!$found_list_ancestor) {
                   1155:       # replace li by ul and add li under ul
                   1156:       my $ul = $doc->createElement('ul');
                   1157:       $li->parentNode->insertBefore($ul, $li);
                   1158:       $li->parentNode->removeChild($li);
                   1159:       $ul->appendChild($li);
                   1160:       # add all other li afterwards inside ul (there might be text nodes in-between)
                   1161:       my $next = $ul->nextSibling;
                   1162:       while (defined $next) {
                   1163:         my $next_next = $next->nextSibling;
                   1164:         if ($next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^\s*$/ &&
                   1165:             defined $next_next && $next_next->nodeType == XML_ELEMENT_NODE && $next_next->nodeName eq 'li') {
                   1166:           $next->parentNode->removeChild($next);
                   1167:           $ul->appendChild($next);
                   1168:           $next = $next_next;
                   1169:           $next_next = $next_next->nextSibling;
                   1170:         }
                   1171:         if ($next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'li') {
                   1172:           $next->parentNode->removeChild($next);
                   1173:           $ul->appendChild($next);
                   1174:         } else {
                   1175:           last;
                   1176:         }
                   1177:         $next = $next_next;
                   1178:       }
                   1179:     }
                   1180:   }
                   1181: }
                   1182: 
                   1183: # Some "image" elements are actually img element with a wrong name. This renames them.
                   1184: # Amazingly enough, "<image src=..." displays an image in some browsers
                   1185: # ("image" has existed at some point as an experimental HTML element).
                   1186: sub fix_wrong_name_for_img {
                   1187:   my ($root) = @_;
                   1188:   my @images = $root->getElementsByTagName('image');
                   1189:   foreach my $image (@images) {
                   1190:     if (!defined $image->getAttribute('src')) {
                   1191:       next;
                   1192:     }
                   1193:     my $found_correct_ancestor = 0;
                   1194:     my $ancestor = $image->parentNode;
                   1195:     while (defined $ancestor) {
                   1196:       if ($ancestor->nodeName eq 'drawimage' || $ancestor->nodeName eq 'imageresponse') {
                   1197:         $found_correct_ancestor = 1;
                   1198:         last;
                   1199:       }
                   1200:       $ancestor = $ancestor->parentNode;
                   1201:     }
                   1202:     if ($found_correct_ancestor) {
                   1203:       next;
                   1204:     }
                   1205:     # this really has to be renamed "img"
                   1206:     $image->setNodeName('img');
                   1207:   }
                   1208: }
                   1209: 
                   1210: # Replaces many deprecated attributes and replaces them by equivalent CSS when possible
                   1211: sub replace_deprecated_attributes_by_css {
                   1212:   my ($root) = @_;
                   1213:   
                   1214:   fix_deprecated_in_tables($root);
                   1215:   
                   1216:   fix_deprecated_in_table_rows($root);
                   1217:   
                   1218:   fix_deprecated_in_table_cells($root);
                   1219:   
                   1220:   fix_deprecated_in_lists($root);
                   1221:   
                   1222:   fix_deprecated_in_list_items($root);
                   1223:   
                   1224:   fix_deprecated_in_hr($root);
                   1225:   
                   1226:   fix_deprecated_in_img($root);
                   1227:   
                   1228:   fix_deprecated_in_body($root);
                   1229:   
                   1230:   fix_align_attribute($root);
                   1231: }
                   1232: 
                   1233: # Replaces deprecated attributes in tables
                   1234: sub fix_deprecated_in_tables {
                   1235:   my ($root) = @_;
                   1236:   my @tables = $root->getElementsByTagName('table');
                   1237:   foreach my $table (@tables) {
                   1238:     tie (my %new_properties, 'Tie::IxHash', ());
                   1239:     my $align = $table->getAttribute('align');
                   1240:     if (defined $align) {
                   1241:       $table->removeAttribute('align');
                   1242:       $align = lc(trim($align));
                   1243:     }
                   1244:     if ($table->parentNode->nodeName eq 'center' || (defined $align && $align eq 'center') ||
                   1245:         (defined $table->parentNode->getAttribute('align') && $table->parentNode->getAttribute('align') eq 'center')) {
                   1246:       $new_properties{'margin-left'} = 'auto';
                   1247:       $new_properties{'margin-right'} = 'auto';
                   1248:     }
                   1249:     if (defined $align && ($align eq 'left' || $align eq 'right')) {
                   1250:       $new_properties{'float'} = $align;
                   1251:     }
                   1252:     my $width = $table->getAttribute('width');
                   1253:     if (defined $width) {
                   1254:       $table->removeAttribute('width');
                   1255:       $width = trim($width);
                   1256:       if ($width =~ /^[0-9]+$/) {
                   1257:         $width .= 'px';
                   1258:       }
                   1259:       if ($width ne '') {
                   1260:         $new_properties{'width'} = $width;
                   1261:       }
                   1262:     }
                   1263:     my $height = $table->getAttribute('height');
                   1264:     if (defined $height) {
                   1265:       $table->removeAttribute('height');
                   1266:       # no replacement for table height
                   1267:     }
                   1268:     my $bgcolor = $table->getAttribute('bgcolor');
                   1269:     if (defined $bgcolor) {
                   1270:       $table->removeAttribute('bgcolor');
                   1271:       $bgcolor = trim($bgcolor);
                   1272:       $bgcolor =~ s/^x\s*//;
                   1273:       if ($bgcolor ne '') {
                   1274:         $new_properties{'background-color'} = $bgcolor;
                   1275:       }
                   1276:     }
                   1277:     my $frame = $table->getAttribute('frame');
                   1278:     if (defined $frame) {
                   1279:       $table->removeAttribute('frame');
                   1280:       $frame = lc(trim($frame));
                   1281:       if ($frame eq 'void') {
                   1282:         $new_properties{'border'} = 'none';
                   1283:       } elsif ($frame eq 'above') {
                   1284:         $new_properties{'border-top'} = '1px solid black';
                   1285:       } elsif ($frame eq 'below') {
                   1286:         $new_properties{'border-bottom'} = '1px solid black';
                   1287:       } elsif ($frame eq 'hsides') {
                   1288:         $new_properties{'border-top'} = '1px solid black';
                   1289:         $new_properties{'border-bottom'} = '1px solid black';
                   1290:       } elsif ($frame eq 'vsides') {
                   1291:         $new_properties{'border-left'} = '1px solid black';
                   1292:         $new_properties{'border-right'} = '1px solid black';
                   1293:       } elsif ($frame eq 'lhs') {
                   1294:         $new_properties{'border-left'} = '1px solid black';
                   1295:       } elsif ($frame eq 'rhs') {
                   1296:         $new_properties{'border-right'} = '1px solid black';
                   1297:       } elsif ($frame eq 'box') {
                   1298:         $new_properties{'border'} = '1px solid black';
                   1299:       } elsif ($frame eq 'border') {
                   1300:         $new_properties{'border'} = '1px solid black';
                   1301:       }
                   1302:     }
                   1303:     if (scalar(keys %new_properties) > 0) {
                   1304:       set_css_properties($table, \%new_properties);
                   1305:     }
                   1306:     # we can't replace the border attribute without creating a style block, but we can improve things like border="BORDER"
                   1307:     my $border = $table->getAttribute('border');
                   1308:     if (defined $border) {
                   1309:       $border = trim($border);
                   1310:       if ($border !~ /^\s*[0-9]+\s*(px)?\s*$/) {
                   1311:         $table->setAttribute('border', '1');
                   1312:       }
                   1313:     }
                   1314:   }
                   1315:   
                   1316: }
                   1317: 
                   1318: # Replaces deprecated attributes in tr elements
                   1319: sub fix_deprecated_in_table_rows {
                   1320:   my ($root) = @_;
                   1321:   my @trs = $root->getElementsByTagName('tr');
                   1322:   foreach my $tr (@trs) {
                   1323:     my $old_properties = get_css_properties($tr);
                   1324:     tie (my %new_properties, 'Tie::IxHash', ());
                   1325:     my $bgcolor = $tr->getAttribute('bgcolor');
                   1326:     if (defined $bgcolor) {
                   1327:       $tr->removeAttribute('bgcolor');
                   1328:       if (!defined $old_properties->{'background-color'}) {
                   1329:         $bgcolor = trim($bgcolor);
                   1330:         $bgcolor =~ s/^x\s*//;
                   1331:         if ($bgcolor ne '') {
                   1332:           $new_properties{'background-color'} = $bgcolor;
                   1333:         }
                   1334:       }
                   1335:     }
                   1336:     my $align = $tr->getAttribute('align');
                   1337:     if (defined $align && $align !~ /\s*char\s*/i) {
                   1338:       $tr->removeAttribute('align');
                   1339:       if (!defined $old_properties->{'text-align'}) {
                   1340:         $align = lc(trim($align));
                   1341:         if ($align ne '') {
                   1342:           $new_properties{'text-align'} = $align;
                   1343:         }
                   1344:       }
                   1345:     }
                   1346:     my $valign = $tr->getAttribute('valign');
                   1347:     if (defined $valign) {
                   1348:       $tr->removeAttribute('valign');
                   1349:       if (!defined $old_properties->{'vertical-align'}) {
                   1350:         $valign = lc(trim($valign));
                   1351:         if ($valign ne '') {
                   1352:           $new_properties{'vertical-align'} = $valign;
                   1353:         }
                   1354:       }
                   1355:     }
                   1356:     if (scalar(keys %new_properties) > 0) {
                   1357:       set_css_properties($tr, \%new_properties);
                   1358:     }
                   1359:   }
                   1360: }
                   1361: 
                   1362: # Replaces deprecated attributes in table cells (td and th)
                   1363: sub fix_deprecated_in_table_cells {
                   1364:   my ($root) = @_;
                   1365:   my @tds = $root->getElementsByTagName('td');
                   1366:   my @ths = $root->getElementsByTagName('th');
                   1367:   my @cells = (@tds, @ths);
                   1368:   foreach my $cell (@cells) {
                   1369:     my $old_properties = get_css_properties($cell);
                   1370:     tie (my %new_properties, 'Tie::IxHash', ());
                   1371:     my $width = $cell->getAttribute('width');
                   1372:     if (defined $width) {
                   1373:       $cell->removeAttribute('width');
                   1374:       if (!defined $old_properties->{'width'}) {
                   1375:         $width = trim($width);
                   1376:         if ($width =~ /^[0-9]+$/) {
                   1377:           $width .= 'px';
                   1378:         }
                   1379:         if ($width ne '') {
                   1380:           $new_properties{'width'} = $width;
                   1381:         }
                   1382:       }
                   1383:     }
                   1384:     my $height = $cell->getAttribute('height');
                   1385:     if (defined $height) {
                   1386:       $cell->removeAttribute('height');
                   1387:       if (!defined $old_properties->{'height'}) {
                   1388:         $height = trim($height);
                   1389:         if ($height =~ /^[0-9]+$/) {
                   1390:           $height .= 'px';
                   1391:         }
                   1392:         if ($height ne '') {
                   1393:           $new_properties{'height'} = $height;
                   1394:         }
                   1395:       }
                   1396:     }
                   1397:     my $bgcolor = $cell->getAttribute('bgcolor');
                   1398:     if (defined $bgcolor) {
                   1399:       $cell->removeAttribute('bgcolor');
                   1400:       if (!defined $old_properties->{'background-color'}) {
                   1401:         $bgcolor = trim($bgcolor);
                   1402:         $bgcolor =~ s/^x\s*//;
                   1403:         if ($bgcolor ne '') {
                   1404:           $new_properties{'background-color'} = $bgcolor;
                   1405:         }
                   1406:       }
                   1407:     }
                   1408:     my $align = $cell->getAttribute('align');
                   1409:     if (defined $align && $align !~ /\s*char\s*/i) {
                   1410:       $cell->removeAttribute('align');
                   1411:       if (!defined $old_properties->{'text-align'}) {
                   1412:         $align = lc(trim($align));
                   1413:         if ($align ne '') {
                   1414:           $new_properties{'text-align'} = $align;
                   1415:         }
                   1416:       }
                   1417:     }
                   1418:     my $valign = $cell->getAttribute('valign');
                   1419:     if (defined $valign) {
                   1420:       $cell->removeAttribute('valign');
                   1421:       if (!defined $old_properties->{'vertical-align'}) {
                   1422:         $valign = lc(trim($valign));
                   1423:         if ($valign ne '') {
                   1424:           $new_properties{'vertical-align'} = $valign;
                   1425:         }
                   1426:       }
                   1427:     }
                   1428:     if (scalar(keys %new_properties) > 0) {
                   1429:       set_css_properties($cell, \%new_properties);
                   1430:     }
                   1431:   }
                   1432: }
                   1433: 
                   1434: # Replaces deprecated attributes in lists (ul and ol)
                   1435: sub fix_deprecated_in_lists {
                   1436:   my ($root) = @_;
                   1437:   my @uls = $root->getElementsByTagName('ul');
                   1438:   my @ols = $root->getElementsByTagName('ol');
                   1439:   my @lists = (@uls, @ols);
                   1440:   foreach my $list (@lists) {
                   1441:     my $type = $list->getAttribute('type');
                   1442:     if (defined $type) {
                   1443:       my $lst = list_style_type($type);
                   1444:       if (defined $lst) {
                   1445:         $list->removeAttribute('type');
                   1446:         if (!defined get_css_property($list, 'list-style-type')) {
                   1447:           set_css_property($list, 'list-style-type', $lst);
                   1448:         }
                   1449:       }
                   1450:     }
                   1451:   }
                   1452: }
                   1453: 
                   1454: # Replaces deprecated attributes in list items (li)
                   1455: sub fix_deprecated_in_list_items {
                   1456:   my ($root) = @_;
                   1457:   my @lis = $root->getElementsByTagName('li');
                   1458:   foreach my $li (@lis) {
                   1459:     my $type = $li->getAttribute('type');
                   1460:     if (defined $type) {
                   1461:       my $lst = list_style_type($type);
                   1462:       if (defined $lst) {
                   1463:         $li->removeAttribute('type');
                   1464:         if (!defined get_css_property($li, 'list-style-type')) {
                   1465:           set_css_property($li, 'list-style-type', $lst);
                   1466:         }
                   1467:       }
                   1468:     }
                   1469:   }
                   1470: }
                   1471: 
                   1472: # returns the CSS list-style-type value equivalent to the given type attribute for a list or list item
                   1473: sub list_style_type {
                   1474:   my ($type) = @_;
                   1475:   my $value;
                   1476:   $type = trim($type);
                   1477:   if (lc($type) eq 'circle') {
                   1478:     $value = 'circle';
                   1479:   } elsif (lc($type) eq 'disc') {
                   1480:     $value = 'disc';
                   1481:   } elsif (lc($type) eq 'square') {
                   1482:     $value = 'square';
                   1483:   } elsif ($type eq 'a') {
                   1484:     $value = 'lower-latin';
                   1485:   } elsif ($type eq 'A') {
                   1486:     $value = 'upper-latin';
                   1487:   } elsif ($type eq 'i') {
                   1488:     $value = 'lower-roman';
                   1489:   } elsif ($type eq 'I') {
                   1490:     $value = 'upper-roman';
                   1491:   } elsif ($type eq '1') {
                   1492:     $value = 'decimal';
                   1493:   }
                   1494:   return $value;
                   1495: }
                   1496: 
                   1497: # Replaces deprecated attributes in hr
                   1498: sub fix_deprecated_in_hr {
                   1499:   my ($root) = @_;
                   1500:   my @hrs = $root->getElementsByTagName('hr');
                   1501:   foreach my $hr (@hrs) {
                   1502:     tie (my %new_properties, 'Tie::IxHash', ());
                   1503:     my $align = $hr->getAttribute('align');
                   1504:     if (defined $align) {
                   1505:       $align = lc(trim($align));
                   1506:       if ($align eq 'left') {
                   1507:         $new_properties{'text-align'} = 'left';
                   1508:         $new_properties{'margin-left'} = '0';
                   1509:       } elsif ($align eq 'right') {
                   1510:         $new_properties{'text-align'} = 'right';
                   1511:         $new_properties{'margin-right'} = '0';
                   1512:       }
                   1513:       $hr->removeAttribute('align');
                   1514:     }
                   1515:     my $color = $hr->getAttribute('color');
                   1516:     if (defined $color) {
                   1517:       $color = trim($color);
                   1518:       $color =~ s/^x\s*//;
                   1519:       if ($color ne '') {
                   1520:         $new_properties{'color'} = $color;
                   1521:         $new_properties{'background-color'} = $color;
                   1522:       }
                   1523:       $hr->removeAttribute('color');
                   1524:     }
                   1525:     my $noshade = $hr->getAttribute('noshade');
                   1526:     my $size = $hr->getAttribute('size');
                   1527:     if (defined $noshade) {
                   1528:       $new_properties{'border-width'} = '0';
                   1529:       if (!defined $color) {
                   1530:         $new_properties{'color'} = 'gray';
                   1531:         $new_properties{'background-color'} = 'gray';
                   1532:       }
                   1533:       if (!defined $size) {
                   1534:         $size = '2';
                   1535:       }
                   1536:       $hr->removeAttribute('noshade');
                   1537:     }
                   1538:     if (defined $size) {
                   1539:       $size = trim($size);
                   1540:       if ($size ne '') {
                   1541:         $new_properties{'height'} = $size.'px';
                   1542:       }
                   1543:       if (defined $hr->getAttribute('size')) {
                   1544:         $hr->removeAttribute('size');
                   1545:       }
                   1546:     }
                   1547:     my $width = $hr->getAttribute('width');
                   1548:     if (defined $width) {
                   1549:       $width = trim($width);
                   1550:       if ($width ne '') {
                   1551:         if ($width !~ /\%$/) {
                   1552:           $width .= 'px';
                   1553:         }
                   1554:         $new_properties{'width'} = $width;
                   1555:       }
                   1556:       $hr->removeAttribute('width');
                   1557:     }
                   1558:     if (scalar(keys %new_properties) > 0) {
                   1559:       set_css_properties($hr, \%new_properties);
                   1560:     }
                   1561:   }
                   1562: }
                   1563: 
                   1564: # Replaces deprecated attributes in img
                   1565: sub fix_deprecated_in_img {
                   1566:   my ($root) = @_;
                   1567:   my @imgs = $root->getElementsByTagName('img');
                   1568:   foreach my $img (@imgs) {
                   1569:     my $old_properties = get_css_properties($img);
                   1570:     tie (my %new_properties, 'Tie::IxHash', ());
                   1571:     my $align = $img->getAttribute('align');
                   1572:     if (defined $align) {
                   1573:       $align = lc(trim($align));
                   1574:       if ($align eq 'middle' || $align eq 'top' || $align eq 'bottom') {
                   1575:         $img->removeAttribute('align');
                   1576:         if (!defined $old_properties->{'vertical-align'}) {
                   1577:           $new_properties{'vertical-align'} = $align;
                   1578:         }
                   1579:       } elsif ($align eq 'left' || $align eq 'right') {
                   1580:         $img->removeAttribute('align');
                   1581:         if (!defined $old_properties->{'float'}) {
                   1582:           $new_properties{'float'} = $align;
                   1583:         }
                   1584:       } elsif ($align eq 'center' || $align eq '') {
                   1585:         $img->removeAttribute('align');
                   1586:       }
                   1587:     }
                   1588:     my $border = $img->getAttribute('border');
                   1589:     if (defined $border) {
                   1590:       $border = lc(trim($border));
                   1591:       if ($border =~ /^[0-9]+\s*(px)?$/) {
                   1592:         $img->removeAttribute('border');
                   1593:         if (!defined $old_properties->{'border'}) {
                   1594:           if ($border !~ /px$/) {
                   1595:             $border .= 'px';
                   1596:           }
                   1597:           $new_properties{'border'} = $border.' solid black';
                   1598:         }
                   1599:       }
                   1600:     }
                   1601:     my $hspace = $img->getAttribute('hspace');
                   1602:     if (defined $hspace) {
                   1603:       $hspace = lc(trim($hspace));
                   1604:       if ($hspace =~ /^[0-9]+\s*(px)?$/) {
                   1605:         $img->removeAttribute('hspace');
                   1606:         if (!defined $old_properties->{'margin-left'} || !defined $old_properties->{'margin-right'}) {
                   1607:           if ($hspace !~ /px$/) {
                   1608:             $hspace .= 'px';
                   1609:           }
                   1610:           $new_properties{'margin-left'} = $hspace;
                   1611:           $new_properties{'margin-right'} = $hspace;
                   1612:         }
                   1613:       }
                   1614:     }
                   1615:     if (scalar(keys %new_properties) > 0) {
                   1616:       set_css_properties($img, \%new_properties);
                   1617:     }
                   1618:   }
                   1619: }
                   1620: 
                   1621: # Replaces deprecated attributes in htmlbody (the style attribute could be used in a div for output)
                   1622: sub fix_deprecated_in_body {
                   1623:   my ($root) = @_;
                   1624:   my $doc = $root->ownerDocument;
                   1625:   my @bodies = $root->getElementsByTagName('htmlbody');
                   1626:   foreach my $body (@bodies) {
                   1627:     my $old_properties = get_css_properties($body);
                   1628:     tie (my %new_properties, 'Tie::IxHash', ());
                   1629:     my $bgcolor = $body->getAttribute('bgcolor');
                   1630:     if (defined $bgcolor) {
                   1631:       $body->removeAttribute('bgcolor');
                   1632:       if (!defined $old_properties->{'background-color'}) {
                   1633:         $bgcolor = trim($bgcolor);
                   1634:         $bgcolor =~ s/^x\s*//;
                   1635:         if ($bgcolor ne '') {
                   1636:           $new_properties{'background-color'} = $bgcolor;
                   1637:         }
                   1638:       }
                   1639:     }
                   1640:     my $color = $body->getAttribute('text');
                   1641:     if (defined $color) {
                   1642:       $body->removeAttribute('text');
                   1643:       if (!defined $old_properties->{'color'}) {
                   1644:         $color = trim($color);
                   1645:         $color =~ s/^x\s*//;
                   1646:         if ($color ne '') {
                   1647:           $new_properties{'color'} = $color;
                   1648:         }
                   1649:       }
                   1650:     }
                   1651:     my $background = $body->getAttribute('background');
                   1652:     if (defined $background && ($background =~ /\.jpe?g$|\.gif|\.png/i)) {
                   1653:       $body->removeAttribute('background');
                   1654:       if (!defined $old_properties->{'background-image'}) {
                   1655:         $background = trim($background);
                   1656:         if ($background ne '') {
                   1657:           $new_properties{'background-image'} = 'url('.$background.')';
                   1658:         }
                   1659:       }
                   1660:     }
                   1661:     # NOTE: these attributes have never been standard and are better removed with no replacement
                   1662:     foreach my $bad ('bottommargin', 'leftmargin', 'rightmargin', 'topmargin', 'marginheight', 'marginwidth') {
                   1663:       if ($body->hasAttribute($bad)) {
                   1664:         $body->removeAttribute($bad);
                   1665:       }
                   1666:     }
                   1667:     # NOTE: link alink and vlink require a <style> block to be converted
                   1668:     my $link = $body->getAttribute('link');
                   1669:     my $alink = $body->getAttribute('alink');
                   1670:     my $vlink = $body->getAttribute('vlink');
                   1671:     if (defined $link || defined $alink || defined $vlink) {
                   1672:       my $head;
                   1673:       my @heads = $root->getElementsByTagName('htmlhead');
                   1674:       if (scalar(@heads) > 0) {
                   1675:         $head = $heads[0];
                   1676:       } else {
                   1677:         $head = $doc->createElement('htmlhead');
                   1678:         $root->insertBefore($head, $root->firstChild);
                   1679:       }
                   1680:       my $style = $doc->createElement('style');
                   1681:       $head->appendChild($style);
                   1682:       my $css = "\n";
                   1683:       if (defined $link) {
                   1684:         $body->removeAttribute('link');
                   1685:         $link = trim($link);
                   1686:         $link =~ s/^x\s*//;
                   1687:         $css .= '      a:link { color:'.$link.' }';
                   1688:         $css .= "\n";
                   1689:       }
                   1690:       if (defined $alink) {
                   1691:         $body->removeAttribute('alink');
                   1692:         $alink = trim($alink);
                   1693:         $alink =~ s/^x\s*//;
                   1694:         $css .= '      a:active { color:'.$alink.' }';
                   1695:         $css .= "\n";
                   1696:       }
                   1697:       if (defined $vlink) {
                   1698:         $body->removeAttribute('vlink');
                   1699:         $vlink = trim($vlink);
                   1700:         $vlink =~ s/^x\s*//;
                   1701:         $css .= '      a:visited { color:'.$vlink.' }';
                   1702:         $css .= "\n";
                   1703:       }
                   1704:       $css .= '    ';
                   1705:       $style->appendChild($doc->createTextNode($css));
                   1706:     }
                   1707:     if (scalar(keys %new_properties) > 0) {
                   1708:       set_css_properties($body, \%new_properties);
                   1709:     } elsif (!$body->hasAttributes) {
                   1710:       $body->parentNode->removeChild($body);
                   1711:     }
                   1712:   }
                   1713: }
                   1714: 
                   1715: # replaces <div align="center"> by <div style="text-align:center;">
                   1716: # also for p and h1..h6
                   1717: sub fix_align_attribute {
                   1718:   my ($root) = @_;
                   1719:   my @nodes = $root->getElementsByTagName('div');
                   1720:   push(@nodes, $root->getElementsByTagName('p'));
                   1721:   for (my $i=1; $i<=6; $i++) {
                   1722:     push(@nodes, $root->getElementsByTagName('h'.$i));
                   1723:   }
                   1724:   foreach my $node (@nodes) {
                   1725:     my $align = $node->getAttribute('align');
                   1726:     if (defined $align) {
                   1727:       $node->removeAttribute('align');
                   1728:       $align = trim($align);
                   1729:       if ($align ne '' && !defined get_css_property($node, 'text-align')) {
                   1730:         set_css_property($node, 'text-align', lc($align));
                   1731:       }
                   1732:     }
                   1733:   }
                   1734: }
                   1735: 
                   1736: # replace center by a div or remove it if there is a table inside
                   1737: sub replace_center {
                   1738:   my ($root, $all_block) = @_;
                   1739:   my $doc = $root->ownerDocument;
                   1740:   my @centers = $root->getElementsByTagName('center');
                   1741:   foreach my $center (@centers) {
                   1742:     if ($center->getChildrenByTagName('table')->size() > 0) { # note: getChildrenByTagName is not DOM (LibXML specific)
                   1743:       replace_by_children($center);
                   1744:     } else {
                   1745:       if ((!defined $center->previousSibling ||
                   1746:           ($center->previousSibling->nodeType == XML_TEXT_NODE && $center->previousSibling->nodeValue =~ /^\s*$/ && !defined $center->previousSibling->previousSibling)) &&
                   1747:           (!defined $center->nextSibling ||
                   1748:           ($center->nextSibling->nodeType == XML_TEXT_NODE && $center->nextSibling->nodeValue =~ /^\s*$/ && !defined $center->nextSibling->nextSibling)) &&
                   1749:           string_in_array(\@accepting_style, $center->parentNode->nodeName)) {
                   1750:         # use CSS on the parent block and replace center by its children
                   1751:         set_css_property($center->parentNode, 'text-align', 'center');
                   1752:         replace_by_children($center);
                   1753:       } else {
                   1754:         # use p or div ? check if there is a block inside
                   1755:         my $found_block = 0;
                   1756:         for (my $child=$center->firstChild; defined $child; $child=$child->nextSibling) {
                   1757:           if ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
                   1758:             $found_block = 1;
                   1759:             last;
                   1760:           }
                   1761:         }
                   1762:         my $new_node;
                   1763:         if ($found_block) {
                   1764:           $new_node = $doc->createElement('div');
                   1765:           $new_node->setAttribute('style', 'text-align: center; margin: 0 auto');
                   1766:         } else {
                   1767:           $new_node = $doc->createElement('p');
                   1768:           $new_node->setAttribute('style', 'text-align: center');
                   1769:         }
                   1770:         my $next;
                   1771:         for (my $child=$center->firstChild; defined $child; $child=$next) {
                   1772:           $next = $child->nextSibling;
                   1773:           $center->removeChild($child);
                   1774:           $new_node->appendChild($child);
                   1775:         }
                   1776:         $center->parentNode->replaceChild($new_node, $center);
                   1777:       }
                   1778:     }
                   1779:   }
                   1780: }
                   1781: 
                   1782: # replaces <nobr> by <span style="white-space:nowrap">
                   1783: sub replace_nobr {
                   1784:   my ($root) = @_;
                   1785:   my @nobrs = $root->getElementsByTagName('nobr');
                   1786:   foreach my $nobr (@nobrs) {
                   1787:     if (!defined $nobr->previousSibling && !defined $nobr->nextSibling &&
                   1788:         string_in_array(\@accepting_style, $nobr->parentNode->nodeName)) {
                   1789:       # use CSS on the parent block
                   1790:       set_css_property($nobr->parentNode, 'white-space', 'nowrap');
                   1791:       replace_by_children($nobr);
                   1792:     } else {
                   1793:       $nobr->setNodeName('span');
                   1794:       $nobr->setAttribute('style', 'white-space:nowrap');
                   1795:     }
                   1796:   }
                   1797: }
                   1798: 
                   1799: # removes notsolved tags in the case <hintgroup showoncorrect="no"><notsolved>...</notsolved></hintgroup>
                   1800: # and in the case <notsolved><hintgroup showoncorrect="no">...</hintgroup></notsolved>
                   1801: sub remove_useless_notsolved {
                   1802:   my ($root) = @_;
                   1803:   my @hintgroups = $root->getElementsByTagName('hintgroup');
                   1804:   foreach my $hintgroup (@hintgroups) {
                   1805:     my $showoncorrect = get_non_empty_attribute($hintgroup, 'showoncorrect');
                   1806:     if (!defined $showoncorrect || $showoncorrect eq 'no') {
                   1807:       my @notsolveds = $hintgroup->getElementsByTagName('notsolved');
                   1808:       foreach my $notsolved (@notsolveds) {
                   1809:         replace_by_children($notsolved);
                   1810:       }
                   1811:     }
                   1812:     my $parent = $hintgroup->parentNode;
                   1813:     if ($parent->nodeName eq 'notsolved' && scalar(@{$parent->nonBlankChildNodes()}) == 1) {
                   1814:       replace_by_children($parent);
                   1815:     }
                   1816:   }
                   1817: }
                   1818: 
                   1819: # adds a paragraph inside if needed and calls fix_paragraph for all paragraphs (including new ones)
                   1820: sub fix_paragraphs_inside {
                   1821:   my ($node, $all_block) = @_;
                   1822:   # blocks in which paragrahs will be added:
                   1823:   my @blocks_with_p = ('loncapa','library','problem','part','problemtype','window','block','while','postanswerdate','preduedate','solved','notsolved','languageblock','translated','lang','instructorcomment','togglebox','standalone','form');
                   1824:   my @fix_p_if_br_or_p = (@responses,'foil','item','text','label','hintgroup','hintpart','hint','web','windowlink','div','li','dd','td','th','blockquote');
                   1825:   if ((string_in_array(\@blocks_with_p, $node->nodeName) && paragraph_needed($node)) ||
                   1826:       (string_in_array(\@fix_p_if_br_or_p, $node->nodeName) && paragraph_inside($node))) {
                   1827:     # if non-empty, add paragraphs where needed between all br and remove br
                   1828:     # (it would be easier to just put everything in a p and fix it afterwards, but there are performance issues
                   1829:     #  when a paragraph has many blocks directly inside)
                   1830:     my $doc = $node->ownerDocument;
                   1831:     my $p = undef;
                   1832:     my @new_children = ();
                   1833:     my $next;
                   1834:     for (my $child=$node->firstChild; defined $child; $child=$next) {
                   1835:       $next = $child->nextSibling;
                   1836:       $node->removeChild($child);
                   1837:       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'br') {
                   1838:         if (defined $p) {
                   1839:           push(@new_children, $p);
                   1840:         } else {
                   1841:           push(@new_children, $doc->createElement('p'));
                   1842:         }
                   1843:         $p = undef;
                   1844:       } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array(\@inline_like_block, $child->nodeName)) {
                   1845:         # inline_like_block: use the paragraph if there is one, otherwise do not create one
                   1846:         if (defined $p) {
                   1847:           $p->appendChild($child);
                   1848:         } else {
                   1849:           push(@new_children, $child);
                   1850:         }
                   1851:       } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
                   1852:         # these children are blocks and should not be in a paragraph
                   1853:         if (defined $p) {
                   1854:           push(@new_children, $p);
                   1855:           $p = undef;
                   1856:         }
                   1857:         push(@new_children, $child);
                   1858:       } elsif ($child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^[ \t\f\n\r]*$/) {
                   1859:         # blank text: add to paragraph if there is one and there is a next node, otherwise keep out of the paragraph
                   1860:         if (defined $p) {
                   1861:           if (defined $next) {
                   1862:             $p->appendChild($child);
                   1863:           } else {
                   1864:             push(@new_children, $p);
                   1865:             $p = undef;
                   1866:             push(@new_children, $child);
                   1867:           }
                   1868:         } else {
                   1869:           push(@new_children, $child);
                   1870:         }
                   1871:       } elsif ($child->nodeType == XML_TEXT_NODE ||
                   1872:             $child->nodeType == XML_ELEMENT_NODE || $child->nodeType == XML_CDATA_SECTION_NODE ||
                   1873:             $child->nodeType == XML_ENTITY_NODE || $child->nodeType == XML_ENTITY_REF_NODE) {
                   1874:         # these children require a paragraph
                   1875:         if (!defined $p) {
                   1876:           $p = $doc->createElement('p');
                   1877:         }
                   1878:         $p->appendChild($child);
                   1879:       } else {
                   1880:         # these children do not require a paragraph (XML comments, PI)
                   1881:         # -> do not move them in a new paragraph
                   1882:         if (defined $p) {
                   1883:           push(@new_children, $p);
                   1884:           $p = undef;
                   1885:         }
                   1886:         push(@new_children, $child);
                   1887:       }
                   1888:     }
                   1889:     if (defined $p) {
                   1890:       push(@new_children, $p);
                   1891:     }
                   1892:     foreach my $child (@new_children) {
                   1893:       $node->appendChild($child);
                   1894:     }
                   1895:   }
                   1896:   # now fix the paragraphs everywhere, so that all inline nodes are inside a paragraph, and block nodes are outside
                   1897:   my $next;
                   1898:   for (my $child=$node->firstChild; defined $child; $child=$next) {
                   1899:     $next = $child->nextSibling;
                   1900:     if ($child->nodeType == XML_ELEMENT_NODE && defined $child->firstChild) {
                   1901:       if ($child->nodeName eq 'p') {
                   1902:         fix_paragraph($child, $all_block);
                   1903:       } else {
                   1904:         fix_paragraphs_inside($child, $all_block);
                   1905:       }
                   1906:     }
                   1907:   }
                   1908: }
                   1909: 
                   1910: # returns 1 if a paragraph is needed inside this node (assuming the parent can have paragraphs)
                   1911: sub paragraph_needed {
                   1912:   my ($node) = @_;
                   1913:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
                   1914:     if (($child->nodeType == XML_TEXT_NODE && $child->nodeValue !~ /^\s*$/) ||
                   1915:         ($child->nodeType == XML_ELEMENT_NODE && !string_in_array(\@inline_like_block, $child->nodeName)) ||
                   1916:         $child->nodeType == XML_CDATA_SECTION_NODE ||
                   1917:         $child->nodeType == XML_ENTITY_NODE || $child->nodeType == XML_ENTITY_REF_NODE) {
                   1918:       return(1);
                   1919:     }
                   1920:   }
                   1921:   return(0);
                   1922: }
                   1923: 
                   1924: # returns 1 if there is a paragraph or br in a child of this node, or inside an inline child
                   1925: sub paragraph_inside {
                   1926:   my ($node) = @_;
                   1927:   # inline elements that can be split in half if there is a paragraph inside (currently all HTML):
                   1928:   # (also used in first_block below)
                   1929:   my @splitable_inline = ('span', 'a', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u');
                   1930:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
                   1931:     if ($child->nodeType == XML_ELEMENT_NODE) {
                   1932:       my $name = $child->nodeName;
                   1933:       if ($name eq 'p' || $name eq 'br') {
                   1934:         return(1);
                   1935:       } elsif (string_in_array(\@splitable_inline, $name)) {
                   1936:         if (paragraph_inside($child)) {
                   1937:           return(1);
                   1938:         }
                   1939:       }
                   1940:     }
                   1941:   }
                   1942:   return(0);
                   1943: }
                   1944: 
                   1945: # fixes paragraphs inside paragraphs (without a block in-between)
                   1946: sub fix_paragraph {
                   1947:   my ($p, $all_block) = @_;
                   1948:   my $loop_right = 1; # this loops is to avoid out of memory errors with recurse, see below
                   1949:   while ($loop_right) {
                   1950:     $loop_right = 0;
                   1951:     my $block = find_first_block($p, $all_block);
                   1952:     if (defined $block) {
                   1953:       my $trees = clone_ancestor_around_node($p, $block);
                   1954:       my $doc = $p->ownerDocument;
                   1955:       my $replacement = $doc->createDocumentFragment();
                   1956:       my $left = $trees->{'left'};
                   1957:       my $middle = $trees->{'middle'};
                   1958:       my $right = $trees->{'right'};
                   1959:       my $left_needs_p = 0; # 1 if it needs a paragraph (used to replace br later)
                   1960:       
                   1961:       if (defined $left) {
                   1962:         # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
                   1963:         for (my $child=$left->firstChild; defined $child; $child=$child->nextSibling) {
                   1964:           if ($child->nodeType == XML_ELEMENT_NODE) {
                   1965:             fix_paragraphs_inside($child, $all_block);
                   1966:           }
                   1967:         }
                   1968:         if (!paragraph_needed($left)) {
                   1969:           # this was just blank text, comments or inline responses, it should not create a new paragraph
                   1970:           my $next;
                   1971:           for (my $child=$left->firstChild; defined $child; $child=$next) {
                   1972:             $next = $child->nextSibling;
                   1973:             $left->removeChild($child);
                   1974:             $replacement->appendChild($child);
                   1975:           }
                   1976:         } else {
                   1977:           $left_needs_p = 1;
                   1978:           $replacement->appendChild($left);
                   1979:         }
                   1980:       }
                   1981:       
                   1982:       my $n = $middle->firstChild;
                   1983:       while (defined $n) {
                   1984:         if ($n->nodeType == XML_ELEMENT_NODE && (string_in_array($all_block, $n->nodeName) || $n->nodeName eq 'br')) {
                   1985:           if ($n->nodeName eq 'p') {
                   1986:             my $parent = $n->parentNode;
                   1987:             # first apply recursion
                   1988:             fix_paragraph($n, $all_block);
                   1989:             # now the p might have been replaced by several nodes, which should replace the initial p
                   1990:             my $next_block;
                   1991:             for (my $block=$parent->firstChild; defined $block; $block=$next_block) {
                   1992:               $next_block = $block->nextSibling;
                   1993:               if ($block->nodeName eq 'p') {
                   1994:                 $parent->removeChild($block);
                   1995:                 # for each parent before $middle, clone in-between the p and its children (to preserve the styles)
                   1996:                 if (defined $block->firstChild) {
                   1997:                   for (my $p=$parent; $p!=$middle; $p=$p->parentNode) {
                   1998:                     my $newp = $p->cloneNode(0);
                   1999:                     my $next;
                   2000:                     for (my $child=$block->firstChild; defined $child; $child=$next) {
                   2001:                       $next = $child->nextSibling;
                   2002:                       $block->removeChild($child);
                   2003:                       $newp->appendChild($child);
                   2004:                     }
                   2005:                     $block->appendChild($newp);
                   2006:                   }
                   2007:                 }
                   2008:               }
                   2009:               $replacement->appendChild($block);
                   2010:             }
                   2011:           } else {
                   2012:             # replace the whole p by this block, forgetting about intermediate inline elements
                   2013:             $n->parentNode->removeChild($n);
                   2014:             if ($n->nodeName eq 'br') {
                   2015:               # replace a br by a paragraph if there was nothing before in the paragraph,
                   2016:               # otherwise remove it because it already broke the paragraph in half
                   2017:               if (!defined $left || !$left_needs_p) {
                   2018:                 $replacement->appendChild($middle);
                   2019:               }
                   2020:             } else {
                   2021:               fix_paragraphs_inside($n, $all_block);
                   2022:               $replacement->appendChild($n);
                   2023:             }
                   2024:           }
                   2025:           last;
                   2026:         }
                   2027:         $n = $n->firstChild;
                   2028:         if (defined $n && defined $n->nextSibling) {
                   2029:           die "Error in post_xml.fix_paragraph: block not found";
                   2030:         }
                   2031:       }
                   2032:       
                   2033:       if (defined $right) {
                   2034:         if ($block->nodeName eq 'p') {
                   2035:           # remove attributes on the right paragraph
                   2036:           my @attributelist = $right->attributes();
                   2037:           foreach my $att (@attributelist) {
                   2038:             $right->removeAttribute($att->nodeName);
                   2039:           }
                   2040:         }
                   2041:         if ($right->firstChild->nodeType == XML_TEXT_NODE && $right->firstChild->nodeValue =~ /^[ \t\f\n\r]*$/) {
                   2042:           # remove the first text node with whitespace only from the p, it should not trigger the creation of a p
                   2043:           # (but take nbsp into account, so we should not use \s here)
                   2044:           my $first = $right->firstChild;
                   2045:           $right->removeChild($first);
                   2046:           $replacement->appendChild($first);
                   2047:         }
                   2048:         if (defined $right->firstChild) {
                   2049:           if (paragraph_needed($right)) {
                   2050:             $replacement->appendChild($right);
                   2051:             #fix_paragraph($right, $all_block); This is taking way too much memory for blocks with many children
                   2052:             # -> loop instead of recurse
                   2053:             $loop_right = 1;
                   2054:           } else {
                   2055:             # this was just blank text, comments or inline responses, it should not create a new paragraph
                   2056:             my $next;
                   2057:             for (my $child=$right->firstChild; defined $child; $child=$next) {
                   2058:               $next = $child->nextSibling;
                   2059:               $right->removeChild($child);
                   2060:               $replacement->appendChild($child);
                   2061:               # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
                   2062:               if ($child->nodeType == XML_ELEMENT_NODE) {
                   2063:                 fix_paragraphs_inside($child, $all_block);
                   2064:               }
                   2065:             }
                   2066:           }
                   2067:         }
                   2068:       }
                   2069:       
                   2070:       $p->parentNode->replaceChild($replacement, $p);
                   2071:       
                   2072:       if ($loop_right) {
                   2073:         $p = $right;
                   2074:       }
                   2075:       
                   2076:     } else {
                   2077:       # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
                   2078:       my $next;
                   2079:       for (my $child=$p->firstChild; defined $child; $child=$next) {
                   2080:         $next = $child->nextSibling;
                   2081:         if ($child->nodeType == XML_ELEMENT_NODE) {
                   2082:           fix_paragraphs_inside($child, $all_block);
                   2083:         }
                   2084:       }
                   2085:     }
                   2086:   }
                   2087: }
                   2088: 
                   2089: sub find_first_block {
                   2090:   my ($node, $all_block) = @_;
                   2091:   # inline elements that can be split in half if there is a paragraph inside (currently all HTML):
                   2092:   my @splitable_inline = ('span', 'a', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u');
                   2093:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
                   2094:     if ($child->nodeType == XML_ELEMENT_NODE) {
                   2095:       if (string_in_array($all_block, $child->nodeName) || $child->nodeName eq 'br') {
                   2096:         return($child);
                   2097:       }
                   2098:       if (string_in_array(\@splitable_inline, $child->nodeName)) {
                   2099:         my $block = find_first_block($child, $all_block);
                   2100:         if (defined $block) {
                   2101:           return($block);
                   2102:         }
                   2103:       }
                   2104:     }
                   2105:   }
                   2106:   return(undef);
                   2107: }
                   2108: 
                   2109: # Creates clones of the ancestor containing the descendants before the node, at the node, and after the node.
                   2110: # returns a hash with: left, middle, right (left and right can be undef)
                   2111: sub clone_ancestor_around_node {
                   2112:   my ($ancestor, $node) = @_;
                   2113:   my $middle_node;
                   2114:   my ($left, $middle, $right);
                   2115:   for (my $child=$ancestor->firstChild; defined $child; $child=$child->nextSibling) {
                   2116:     if ($child == $node || is_ancestor_of($child, $node)) {
                   2117:       $middle_node = $child;
                   2118:       last;
                   2119:     }
                   2120:   }
                   2121:   if (!defined $middle_node) {
                   2122:     die "error in split_ancestor_around_node: middle not found";
                   2123:   }
                   2124:   if (defined $middle_node->previousSibling) {
                   2125:     $left = $ancestor->cloneNode(0);
                   2126:     for (my $child=$ancestor->firstChild; $child != $middle_node; $child=$child->nextSibling) {
                   2127:       $left->appendChild($child->cloneNode(1));
                   2128:     }
                   2129:   }
                   2130:   $middle = $ancestor->cloneNode(0);
                   2131:   if ($middle_node == $node) {
                   2132:     $middle->appendChild($middle_node->cloneNode(1));
                   2133:   } else {
                   2134:     my $subres = clone_ancestor_around_node($middle_node, $node);
                   2135:     my $subleft = $subres->{'left'};
                   2136:     if (defined $subleft) {
                   2137:       if (!defined $left) {
                   2138:         $left = $ancestor->cloneNode(0);
                   2139:       }
                   2140:       $left->appendChild($subleft);
                   2141:     }
                   2142:     $middle->appendChild($subres->{'middle'});
                   2143:     my $subright = $subres->{'right'};
                   2144:     if (defined $subright) {
                   2145:       $right = $ancestor->cloneNode(0);
                   2146:       $right->appendChild($subright);
                   2147:     }
                   2148:   }
                   2149:   if (defined $middle_node->nextSibling) {
                   2150:     if (!defined $right) {
                   2151:       $right = $ancestor->cloneNode(0);
                   2152:     }
                   2153:     for (my $child=$middle_node->nextSibling; defined $child; $child=$child->nextSibling) {
                   2154:       $right->appendChild($child->cloneNode(1));
                   2155:     }
                   2156:   }
                   2157:   my %result = ();
                   2158:   $result{'left'} = $left;
                   2159:   $result{'middle'} = $middle;
                   2160:   $result{'right'} = $right;
                   2161:   return(\%result);
                   2162: }
                   2163: 
                   2164: sub is_ancestor_of {
                   2165:   my ($n1, $n2) = @_;
                   2166:   my $n = $n2->parentNode;
                   2167:   while (defined $n) {
                   2168:     if ($n == $n1) {
                   2169:       return(1);
                   2170:     }
                   2171:     $n = $n->parentNode;
                   2172:   }
                   2173:   return(0);
                   2174: }
                   2175: 
                   2176: # removes empty style elements and replaces the ones with only whitespaces inside by their content
                   2177: # also remove hints that have become empty after empty style removal.
                   2178: sub remove_empty_style {
                   2179:   my ($root) = @_;
                   2180:   # actually, preserve some elements like ins when they have whitespace, only remove if they are empty
                   2181:   my @remove_if_empty = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u', 'hint');
                   2182:   my @remove_if_blank = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'tt', 'var', 'small', 'big', 'font', 'u', 'hint');
                   2183:   foreach my $name (@remove_if_empty) {
                   2184:     my @nodes = $root->getElementsByTagName($name);
                   2185:     while (scalar(@nodes) > 0) {
                   2186:       my $node = pop(@nodes);
                   2187:       if (!defined $node->firstChild) {
                   2188:         my $parent = $node->parentNode;
                   2189:         if (defined $node->previousSibling && $node->previousSibling->nodeType == XML_TEXT_NODE &&
                   2190:             $node->previousSibling->nodeValue =~ /\$\S*$/) {
                   2191:           # case $a<sup></sup>x
                   2192:           my $value = $node->previousSibling->nodeValue;
                   2193:           $value =~ s/\$(\S*)$/\$\{$1\}/;
                   2194:           $node->previousSibling->setData($value);
                   2195:         }
                   2196:         $parent->removeChild($node);
                   2197:         $parent->normalize();
                   2198:         # now that we removed the node, check if the parent has become an empty style, and so on
                   2199:         while (defined $parent && string_in_array(\@remove_if_empty, $parent->nodeName) && !defined $parent->firstChild) {
                   2200:           my $grandparent = $parent->parentNode;
                   2201:           $grandparent->removeChild($parent);
                   2202:           remove_reference_from_array(\@nodes, $parent);
                   2203:           $parent = $grandparent;
                   2204:         }
                   2205:       }
                   2206:     }
                   2207:   }
                   2208:   foreach my $name (@remove_if_blank) {
                   2209:     my @nodes = $root->getElementsByTagName($name);
                   2210:     while (scalar(@nodes) > 0) {
                   2211:       my $node = pop(@nodes);
                   2212:       if (defined $node->firstChild && !defined $node->firstChild->nextSibling && $node->firstChild->nodeType == XML_TEXT_NODE) {
                   2213:         # NOTE: careful, with UTF-8, \s matches non-breaking spaces and we want to preserve these
                   2214:         if ($node->firstChild->nodeValue =~ /^[\t\n\f\r ]*$/) {
                   2215:           my $parent = $node->parentNode;
                   2216:           replace_by_children($node);
                   2217:           $parent->normalize();
                   2218:           # now that we removed the node, check if the parent has become a style with only whitespace, and so on
                   2219:           while (defined $parent && string_in_array(\@remove_if_blank, $parent->nodeName) &&
                   2220:               (!defined $parent->firstChild ||
                   2221:               (!defined $parent->firstChild->nextSibling && $parent->firstChild->nodeType == XML_TEXT_NODE &&
                   2222:               $parent->firstChild->nodeValue =~ /^^[\t\n\f\r ]*$/))) {
                   2223:             my $grandparent = $parent->parentNode;
                   2224:             replace_by_children($parent);
                   2225:             remove_reference_from_array(\@nodes, $parent);
                   2226:             $parent = $grandparent;
                   2227:           }
                   2228:         }
                   2229:       }
                   2230:     }
                   2231:   }
                   2232: }
                   2233: 
                   2234: # remove whitespace inside LON-CAPA elements that have an empty content-model (HTML ones are handled by html_to_xml)
                   2235: sub fix_empty_lc_elements {
                   2236:   my ($node) = @_;
                   2237:   my @lcempty = ('arc','axis','backgroundplot','drawoptionlist','drawvectorsum','fill','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','hiddenline','hiddensubmission','key','line','location','organicstructure','parameter','plotobject','plotvector','responseparam','spline','textline');
                   2238:   if (string_in_array(\@lcempty, $node->nodeName)) {
                   2239:     if (defined $node->firstChild && !defined $node->firstChild->nextSibling &&
                   2240:         $node->firstChild->nodeType == XML_TEXT_NODE && $node->firstChild->nodeValue =~ /^\s*$/) {
                   2241:       $node->removeChild($node->firstChild);
                   2242:     }
                   2243:     if (defined $node->firstChild) {
                   2244:       if ($warnings) {
                   2245:         print "Warning: a ".$node->nodeName." has something inside\n";
                   2246:       }
                   2247:     }
                   2248:     return;
                   2249:   }
                   2250:   for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
                   2251:     if ($child->nodeType == XML_ELEMENT_NODE) {
                   2252:       fix_empty_lc_elements($child);
                   2253:     }
                   2254:   }
                   2255: }
                   2256: 
                   2257: # turn some attribute values into lowercase when they should be
                   2258: sub lowercase_attribute_values {
                   2259:   my ($root) = @_;
                   2260:   my @with_yesno = (['radiobuttonresponse', ['randomize']],
                   2261:                     ['optionresponse', ['randomize']],
                   2262:                     ['matchresponse', ['randomize']],
                   2263:                     ['itemgroup', ['randomize']],
                   2264:                     ['rankresponse', ['randomize']],
                   2265:                     ['functionplotresponse', ['xaxisvisible', 'yaxisvisible', 'gridvisible']],
                   2266:                     ['backgroundplot', ['fixed']],
                   2267:                     ['drawvectorsum', ['showvalue']],
                   2268:                     ['textline', ['readonly']],
                   2269:                     ['hint', ['showoncorrect']],
                   2270:                     ['body', ['dir']],
                   2271:                     ['img', ['encrypturl']],
                   2272:                     ['form', ['method']],
                   2273:                     ['input', ['type']]
                   2274:                    );
                   2275:   foreach my $el_attributes (@with_yesno) {
                   2276:     my $el_name = $el_attributes->[0];
                   2277:     my @elements = $root->getElementsByTagName($el_name);
                   2278:     foreach my $element (@elements) {
                   2279:       my $att_list = $el_attributes->[1];
                   2280:       foreach my $att_name (@$att_list) {
                   2281:         my $att_value = $element->getAttribute($att_name);
                   2282:         if (!defined $att_value) {
                   2283:           next;
                   2284:         }
                   2285:         if ($att_value eq 'yes' || $att_value eq 'no') {
                   2286:           next;
                   2287:         }
                   2288:         if ($att_value =~ /\s*yes\s*/i) {
                   2289:           $element->setAttribute($att_name, 'yes');
                   2290:         } elsif ($att_value =~ /\s*no\s*/i) {
                   2291:           $element->setAttribute($att_name, 'no');
                   2292:         }
                   2293:       }
                   2294:     }
                   2295:   }
                   2296: }
                   2297: 
                   2298: # fixes spelling mistakes for numericalresponse/@unit
                   2299: sub replace_numericalresponse_unit_attribute {
                   2300:   my ($root) = @_;
                   2301:   my @numericalresponses = $root->getElementsByTagName('numericalresponse');
                   2302:   foreach my $numericalresponse (@numericalresponses) {
                   2303:     if (defined $numericalresponse->getAttribute('units') && !defined $numericalresponse->getAttribute('unit')) {
                   2304:       $numericalresponse->setAttribute('unit', $numericalresponse->getAttribute('units'));
                   2305:       $numericalresponse->removeAttribute('units');
                   2306:     }
                   2307:   }
                   2308:   
                   2309: }
                   2310: 
                   2311: # Replaces &format and &prettyprint by <num> whenever possible.
                   2312: # Also replaces &chemparse by <chem>.
                   2313: # If the function call is enclosed in <display>, the <display> element is removed.
                   2314: sub replace_functions_by_elements {
                   2315:   my ($root) = @_;
                   2316:   my $doc = $root->ownerDocument;
                   2317:   my @preserve = ('script','answer','parse','m','tm','dtm','numericalhintscript'); # display is handled later
                   2318:   my @all = $root->getElementsByTagName('*');
                   2319:   foreach my $element (@all) {
                   2320:     if (string_in_array(\@preserve, $element->nodeName)) {
                   2321:       next;
                   2322:     }
                   2323:     my $changed = 0;
                   2324:     my $next;
                   2325:     for (my $child=$element->firstChild; defined $child; $child=$next) {
                   2326:       $next = $child->nextSibling;
                   2327:       if ($child->nodeType == XML_TEXT_NODE) {
                   2328:         my $value = $child->nodeValue;
                   2329:         if ($value =~ /^(.*)&(?:format|prettyprint)\((\$\{?[a-zA-Z0-9]*\}?(?:\[[^\]]*\])?|[0-9.]+)\s?,\s?(["'][,.\$]?[0-9][eEfFgGsS]["']|\$[a-zA-Z0-9]*)\)(.*)$/s) {
                   2330:           # NOTE: we don't check for &prettyprint's 3rd argument (target), but it has not been seen outside of script elements.
                   2331:           # NOTE: the format options ',' and '$' are not supported by &format in current LON-CAPA since rev 1.81 of default_homework.lcpm,
                   2332:           #       but are supported by &prettyprint;
                   2333:           #       if we use (like current LON-CAPA) &prettyprint for <num> implementation, it will change a few resulting documents
                   2334:           #       (by making them display something they were probably intended to display, but which did not).
                   2335:           #       Usage of <num> with &prettyprint instead of &format might also change the display when there is an exponent.
                   2336:           my $before = $1;
                   2337:           my $number = $2;
                   2338:           my $format = $3;
                   2339:           my $after = $4;
                   2340:           $format =~ s/^['"]|['"]$//g;
                   2341:           # do not change this if the parent is <display> and there are other things before or after &format
                   2342:           if ($element->nodeName eq 'display' && (defined $child->previousSibling || defined $next ||
                   2343:               $before !~ /^\s*$/ || $after !~ /^\s*$/)) {
                   2344:             last;
                   2345:           }
                   2346:           my $replacement = $doc->createDocumentFragment();
                   2347:           my $num = $doc->createElement('num');
                   2348:           $num->setAttribute('format', $format);
                   2349:           $num->appendChild($doc->createTextNode($number));
                   2350:           if (length($before) > 0) {
                   2351:             $replacement->appendChild($doc->createTextNode($before));
                   2352:           }
                   2353:           $replacement->appendChild($num);
                   2354:           if (length($after) > 0) {
                   2355:             $replacement->appendChild($doc->createTextNode($after));
                   2356:           }
                   2357:           $element->replaceChild($replacement, $child);
                   2358:           $changed = 1;
                   2359:           $next = $element->firstChild; # start over, there might be another &format in the same text node
                   2360:         } elsif ($value =~ /^(.*)&chemparse\(([^'"()]*|'[^']*'|"[^"]*")\)(.*)$/s) {
                   2361:           my $before = $1;
                   2362:           my $reaction = $2;
                   2363:           my $after = $3;
                   2364:           $reaction =~ s/^'(.*)'$/$1/;
                   2365:           $reaction =~ s/^"(.*)"$/$1/;
                   2366:           if ($element->nodeName eq 'display' && (defined $child->previousSibling || defined $next ||
                   2367:               $before !~ /^\s*$/ || $after !~ /^\s*$/)) {
                   2368:             last;
                   2369:           }
                   2370:           my $replacement = $doc->createDocumentFragment();
                   2371:           my $chem = $doc->createElement('chem');
                   2372:           $chem->appendChild($doc->createTextNode($reaction));
                   2373:           if (length($before) > 0) {
                   2374:             $replacement->appendChild($doc->createTextNode($before));
                   2375:           }
                   2376:           $replacement->appendChild($chem);
                   2377:           if (length($after) > 0) {
                   2378:             $replacement->appendChild($doc->createTextNode($after));
                   2379:           }
                   2380:           $element->replaceChild($replacement, $child);
                   2381:           $changed = 1;
                   2382:           $next = $element->firstChild;
                   2383:         }
                   2384:       }
                   2385:     }
                   2386:     if ($changed && $element->nodeName eq 'display') {
                   2387:       my $first = $element->firstChild;
                   2388:       if ($first->nodeType == XML_ELEMENT_NODE && string_in_array(['num','chem'], $first->nodeName) &&
                   2389:           !defined $first->nextSibling) {
                   2390:         # remove useless display element
                   2391:         replace_by_children($element);
                   2392:       }
                   2393:     }
                   2394:   }
                   2395: }
                   2396: 
                   2397: # pretty-print using im-memory DOM tree
                   2398: sub pretty {
                   2399:   my ($node, $all_block, $indent_level) = @_;
                   2400:   my $doc = $node->ownerDocument;
                   2401:   $indent_level ||= 0;
                   2402:   my $type = $node->nodeType;
                   2403:   if ($type == XML_ELEMENT_NODE) {
                   2404:     my $name = $node->nodeName;
                   2405:     if ((string_in_array($all_block, $name) || string_in_array(\@inline_like_block, $name)) &&
                   2406:         !string_in_array(\@preserve_elements, $name)) {
                   2407:       # make sure there is a newline at the beginning and at the end if there is anything inside
                   2408:       if (defined $node->firstChild && !string_in_array(\@no_newline_inside, $name)) {
                   2409:         my $first = $node->firstChild;
                   2410:         if ($first->nodeType == XML_TEXT_NODE) {
                   2411:           my $text = $first->nodeValue;
                   2412:           if ($text !~ /^ *\n/) {
                   2413:             $first->setData("\n" . $text);
                   2414:           }
                   2415:         } else {
                   2416:           $node->insertBefore($doc->createTextNode("\n"), $first);
                   2417:         }
                   2418:         my $last = $node->lastChild;
                   2419:         if ($last->nodeType == XML_TEXT_NODE) {
                   2420:           my $text = $last->nodeValue;
                   2421:           if ($text !~ /\n *$/) {
                   2422:             $last->setData($text . "\n");
                   2423:           }
                   2424:         } else {
                   2425:           $node->appendChild($doc->createTextNode("\n"));
                   2426:         }
                   2427:       }
                   2428:       
                   2429:       # indent and make sure there is a newline before and after a block element
                   2430:       my $newline_indent = "\n".(' ' x (2*($indent_level + 1)));
                   2431:       my $newline_indent_last = "\n".(' ' x (2*$indent_level));
                   2432:       my $next;
                   2433:       for (my $child=$node->firstChild; defined $child; $child=$next) {
                   2434:         $next = $child->nextSibling;
                   2435:         if ($child->nodeType == XML_ELEMENT_NODE) {
                   2436:           if (string_in_array($all_block, $child->nodeName) || string_in_array(\@inline_like_block, $child->nodeName)) {
                   2437:             # make sure there is a newline before and after a block element
                   2438:             if (defined $child->previousSibling && $child->previousSibling->nodeType == XML_TEXT_NODE) {
                   2439:               my $prev = $child->previousSibling;
                   2440:               my $text = $prev->nodeValue;
                   2441:               if ($text !~ /\n *$/) {
                   2442:                 $prev->setData($text . $newline_indent);
                   2443:               }
                   2444:             } else {
                   2445:               $node->insertBefore($doc->createTextNode($newline_indent), $child);
                   2446:             }
                   2447:             if (defined $next && $next->nodeType == XML_TEXT_NODE) {
                   2448:               my $text = $next->nodeValue;
                   2449:               if ($text !~ /^ *\n/) {
                   2450:                 $next->setData($newline_indent . $text);
                   2451:               }
                   2452:             } else {
                   2453:               $node->insertAfter($doc->createTextNode($newline_indent), $child);
                   2454:             }
                   2455:           }
                   2456:           pretty($child, $all_block, $indent_level+1);
                   2457:         } elsif ($child->nodeType == XML_TEXT_NODE) {
                   2458:           my $text = $child->nodeValue;
                   2459:           # collapse newlines
                   2460:           $text =~ s/\n([\t ]*\n)+/\n/g;
                   2461:           # indent and remove spaces and tabs before newlines
                   2462:           if (defined $next) {
                   2463:             $text =~ s/[\t ]*\n[\t ]*/$newline_indent/ge;
                   2464:           } else {
                   2465:             $text =~ s/[\t ]*\n[\t ]*/$newline_indent/ge;
                   2466:             $text =~ s/[\t ]*\n[\t ]*$/$newline_indent_last/e;
                   2467:           }
                   2468:           $child->setData($text);
                   2469:         }
                   2470:       }
                   2471:       
                   2472:       # removes whitespace at the beginning and end of p td, th and li (except for nbsp at the beginning)
                   2473:       my @to_trim = ('p','td','th','li');
                   2474:       if (string_in_array(\@to_trim, $name) && defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
                   2475:         my $text = $node->firstChild->nodeValue;
                   2476:         $text =~ s/^[ \t\f\n\r]*//;
                   2477:         if ($text eq '') {
                   2478:           $node->removeChild($node->firstChild);
                   2479:         } else {
                   2480:           $node->firstChild->setData($text);
                   2481:         }
                   2482:       }
                   2483:       if (string_in_array(\@to_trim, $name) && defined $node->lastChild && $node->lastChild->nodeType == XML_TEXT_NODE) {
                   2484:         my $text = $node->lastChild->nodeValue;
                   2485:         $text =~ s/\s*$//;
                   2486:         if ($text eq '') {
                   2487:           $node->removeChild($node->lastChild);
                   2488:         } else {
                   2489:           $node->lastChild->setData($text);
                   2490:         }
                   2491:       }
                   2492:     } elsif (string_in_array(\@preserve_elements, $name)) {
                   2493:       # collapse newlines at the beginning and the end of scripts
                   2494:       if (defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
                   2495:         my $text = $node->firstChild->nodeValue;
                   2496:         $text =~ s/^\n( *\n)+/\n/;
                   2497:         if ($text eq '') {
                   2498:           $node->removeChild($node->firstChild);
                   2499:         } else {
                   2500:           $node->firstChild->setData($text);
                   2501:         }
                   2502:       }
                   2503:       if (defined $node->lastChild && $node->lastChild->nodeType == XML_TEXT_NODE) {
                   2504:         my $text = $node->lastChild->nodeValue;
                   2505:         $text =~ s/\n( *\n)+$/\n/;
                   2506:         if ($text eq '') {
                   2507:           $node->removeChild($node->lastChild);
                   2508:         } else {
                   2509:           $node->lastChild->setData($text);
                   2510:         }
                   2511:       }
                   2512:     }
                   2513:   }
                   2514: }
                   2515: 
                   2516: sub replace_tm_dtm {
                   2517:   my ($root) = @_;
                   2518:   my $doc = $root->ownerDocument;
                   2519:   my @elements = $root->getElementsByTagName('tm');
                   2520:   push(@elements, $root->getElementsByTagName('dtm'));
                   2521:   foreach my $element (@elements) {
                   2522:     my $first = $element->firstChild;
                   2523:     if (defined $first && $first->nodeType == XML_TEXT_NODE) {
                   2524:       my $text = $first->nodeValue;
                   2525:       if ($element->nodeName eq 'tm') {
                   2526:         $first->setData('$'.$text.'$');
                   2527:       } else {
                   2528:         $first->setData('$$'.$text.'$$');
                   2529:       }
                   2530:     }
                   2531:     $element->setNodeName('m');
                   2532:   }
                   2533: }
                   2534: 
                   2535: 
                   2536: ######## utilities ########
                   2537: 
                   2538: ##
                   2539: # Trims a string (really, this should be built-in in Perl, this is ridiculous, ugly and slow)
                   2540: # @param {string} s - the string to trim
                   2541: # @returns the trimmed string
                   2542: ##
                   2543: sub trim {
                   2544:   my ($s) = @_;
                   2545:   $s =~ s/^\s+//;
                   2546:   $s =~ s/\s+$//;
                   2547:   return($s);
                   2548: }
                   2549: 
                   2550: ##
                   2551: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
                   2552: # @param {Array<string>} array - reference to the array of strings
                   2553: # @param {string} value - the string to look for
                   2554: # @returns 1 if found, 0 otherwise
                   2555: ##
                   2556: sub string_in_array {
                   2557:   my ($array, $value) = @_;
                   2558:   # NOTE: would it be faster to use "any { $value eq $_ } @{$array}" from List::Util ?
                   2559:   foreach my $v (@{$array}) {
                   2560:     if ($v eq $value) {
                   2561:       return 1;
                   2562:     }
                   2563:   }
                   2564:   return 0;
                   2565: }
                   2566: 
                   2567: ##
                   2568: # Tests if an object is in an array (using ==)
                   2569: # @param {Array<Object>} array - reference to the array of references
                   2570: # @param {Object} ref - the reference to look for
                   2571: # @returns 1 if found, 0 otherwise
                   2572: ##
                   2573: sub reference_in_array {
                   2574:   my ($array, $ref) = @_;
                   2575:   foreach my $v (@{$array}) {
                   2576:     if ($v == $ref) {
                   2577:       return 1;
                   2578:     }
                   2579:   }
                   2580:   return 0;
                   2581: }
                   2582: 
                   2583: ##
                   2584: # returns the index of a string in an array
                   2585: # @param {Array<Object>} array - reference to the array of strings
                   2586: # @param {string} s - the string to look for (using eq)
                   2587: # @returns the index if found, -1 otherwise
                   2588: ##
                   2589: sub index_of_string {
                   2590:   my ($array, $s) = @_;
                   2591:   for (my $i=0; $i<scalar(@{$array}); $i++) {
                   2592:     if ($array->[$i] eq $s) {
                   2593:       return $i;
                   2594:     }
                   2595:   }
                   2596:   return -1;
                   2597: }
                   2598: 
                   2599: ##
                   2600: # returns the index of a reference in an array
                   2601: # @param {Array<Object>} array - reference to the array of references
                   2602: # @param {Object} ref - the reference to look for
                   2603: # @returns the index if found, -1 otherwise
                   2604: ##
                   2605: sub index_of_reference {
                   2606:   my ($array, $ref) = @_;
                   2607:   for (my $i=0; $i<scalar(@{$array}); $i++) {
                   2608:     if ($array->[$i] == $ref) {
                   2609:       return $i;
                   2610:     }
                   2611:   }
                   2612:   return -1;
                   2613: }
                   2614: 
                   2615: ##
                   2616: # if found, removes a string from an array, otherwise do nothing
                   2617: # @param {Array<string>} array - reference to the array of string
                   2618: # @param {string} s - the string to look for (using eq)
                   2619: ##
                   2620: sub remove_string_from_array {
                   2621:   my ($array, $s) = @_;
                   2622:   my $index = index_of_string($array, $s);
                   2623:   if ($index != -1) {
                   2624:     splice(@$array, $index, 1);
                   2625:   }
                   2626: }
                   2627: 
                   2628: ##
                   2629: # if found, removes a reference from an array, otherwise do nothing
                   2630: # @param {Array<Object>} array - reference to the array of references
                   2631: # @param {Object} ref - the reference to look for
                   2632: ##
                   2633: sub remove_reference_from_array {
                   2634:   my ($array, $ref) = @_;
                   2635:   my $index = index_of_reference($array, $ref);
                   2636:   if ($index != -1) {
                   2637:     splice(@$array, $index, 1);
                   2638:   }
                   2639: }
                   2640: 
                   2641: ##
                   2642: # replaces a node by its children
                   2643: # @param {Node} node - the DOM node
                   2644: ##
                   2645: sub replace_by_children {
                   2646:   my ($node) = @_;
                   2647:   my $parent = $node->parentNode;
                   2648:   my $next;
                   2649:   my $previous;
                   2650:   for (my $child=$node->firstChild; defined $child; $child=$next) {
                   2651:     $next = $child->nextSibling;
                   2652:     if ((!defined $previous || !defined $next) &&
                   2653:         $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
                   2654:       next; # do not keep first and last whitespace nodes
                   2655:     } else {
                   2656:       if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
                   2657:         # remove whitespace at the beginning
                   2658:         my $value = $child->nodeValue;
                   2659:         $value =~ s/^\s+//;
                   2660:         $child->setData($value);
                   2661:       }
                   2662:       if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
                   2663:         # and at the end
                   2664:         my $value = $child->nodeValue;
                   2665:         $value =~ s/\s+$//;
                   2666:         $child->setData($value);
                   2667:       }
                   2668:     }
                   2669:     $node->removeChild($child);
                   2670:     $parent->insertBefore($child, $node);
                   2671:     $previous = $child;
                   2672:   }
                   2673:   $parent->removeChild($node);
                   2674: }
                   2675: 
                   2676: ##
                   2677: # returns the trimmed attribute value if the attribute exists and is not blank, undef otherwise
                   2678: # @param {Node} node - the DOM node
                   2679: # @param {string} attribute_name - the attribute name
                   2680: ##
                   2681: sub get_non_empty_attribute {
                   2682:   my ($node, $attribute_name) = @_;
                   2683:   my $value = $node->getAttribute($attribute_name);
                   2684:   if (defined $value && $value !~ /^\s*$/) {
                   2685:     $value = trim($value);
                   2686:     return($value);
                   2687:   }
                   2688:   return(undef);
                   2689: }
                   2690: 
                   2691: ##
                   2692: # Returns a CSS property value from the style attribute of the element, or undef if not defined
                   2693: # @param {Element} el - the DOM element
                   2694: # @param {string} property_name - the CSS property name
                   2695: ##
                   2696: sub get_css_property {
                   2697:   my ($el, $property_name) = @_;
                   2698:   my $style = $el->getAttribute('style');
                   2699:   if (defined $style) {
                   2700:     $style =~ s/^\s*;\s*//;
                   2701:     $style =~ s/\s*;\s*$//;
                   2702:   } else {
                   2703:     $style = '';
                   2704:   }
                   2705:   my @pairs = split(';', $style);
                   2706:   foreach my $pair (@pairs) {
                   2707:     my @name_value = split(':', $pair);
                   2708:     if (scalar(@name_value) != 2) {
                   2709:       next;
                   2710:     }
                   2711:     my $name = trim($name_value[0]);
                   2712:     my $value = trim($name_value[1]);
                   2713:     if (lc($name) eq $property_name) {
                   2714:       return($value); # return the first one found
                   2715:     }
                   2716:   }
                   2717:   return(undef);
                   2718: }
                   2719: 
                   2720: ##
                   2721: # Returns the reference to a hash CSS property name => value from the style attribute of the element.
                   2722: # Returns an empty list if the style attribute is not defined,
                   2723: # @param {Element} el - the DOM element
                   2724: # @return {Hash<string, string>} reference to the hash  property name => property value
                   2725: ##
                   2726: sub get_css_properties {
                   2727:   my ($el) = @_;
                   2728:   my $style = $el->getAttribute('style');
                   2729:   if (defined $style) {
                   2730:     $style =~ s/^\s*;\s*//;
                   2731:     $style =~ s/\s*;\s*$//;
                   2732:   } else {
                   2733:     $style = '';
                   2734:   }
                   2735:   my @pairs = split(';', $style);
                   2736:   tie (my %hash, 'Tie::IxHash', ());
                   2737:   foreach my $pair (@pairs) {
                   2738:     my @name_value = split(':', $pair);
                   2739:     if (scalar(@name_value) != 2) {
                   2740:       next;
                   2741:     }
                   2742:     my $name = trim($name_value[0]);
                   2743:     my $value = trim($name_value[1]);
                   2744:     if (defined $hash{$name}) {
                   2745:       # duplicate property in the style attribute: keep only the last one
                   2746:       delete $hash{$name};
                   2747:     }
                   2748:     $hash{$name} = $value;
                   2749:   }
                   2750:   return(\%hash);
                   2751: }
                   2752: 
                   2753: ##
                   2754: # Sets a CSS property in the style attribute of an element
                   2755: # @param {Element} el - the DOM element
                   2756: # @param {string} property_name - the CSS property name
                   2757: # @param {string} property_value - the CSS property value
                   2758: ##
                   2759: sub set_css_property {
                   2760:   my ($el, $property_name, $property_value) = @_;
                   2761:   my $hash_ref = { $property_name => $property_value };
                   2762:   set_css_properties($el, $hash_ref);
                   2763: }
                   2764: 
                   2765: ##
                   2766: # Sets several CSS properties in the style attribute of an element
                   2767: # @param {Element} el - the DOM element
                   2768: # @param {Hash<string, string>} properties - reference to the hash property name => property value
                   2769: ##
                   2770: sub set_css_properties {
                   2771:   my ($el, $properties) = @_;
                   2772:   my $hash = get_css_properties($el);
                   2773:   foreach my $property_name (keys %$properties) {
                   2774:     my $property_value = $properties->{$property_name};
                   2775:     if (defined $hash->{$property_name}) {
                   2776:       delete $hash->{$property_name}; # to add the new one at the end
                   2777:     }
                   2778:     $hash->{$property_name} = $property_value;
                   2779:   }
                   2780:   my $style = '';
                   2781:   foreach my $key (keys %$hash) {
                   2782:     $style .= $key.':'.$hash->{$key}.'; ';
                   2783:   }
                   2784:   $style =~ s/; $//;
                   2785:   $el->setAttribute('style', $style);
                   2786: }
                   2787: 
                   2788: 1;
                   2789: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>