Diff for /loncom/homework/cleanxml/xml_to_loncapa.pm between versions 1.3 and 1.10

version 1.3, 2015/12/23 20:33:10 version 1.10, 2016/11/10 21:53:56
Line 38  use warnings; Line 38  use warnings;
 use XML::LibXML;  use XML::LibXML;
   
   
 my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');  # LON-CAPA block elements that cannot be found within startouttext/endouttext
   my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
   
 my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline  # LON-CAPA elements that can be found within startouttext/endouttext:
   my @loncapa_in_text = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto','translated','lang'); # not textline
   
 # HTML elements that trigger the addition of startouttext/endouttext  # HTML elements that trigger the addition of startouttext/endouttext
 my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');  my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
   
 my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );  my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend','comment');
   
 my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');  my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
   
   # see http://www.w3.org/TR/html-polyglot/#empty-elements
   # and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/
   # HTML elements that do not have an empty content, and must never use a self-closing tag:
   my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe');
   
   
 # Converts a file and return the modified contents  # Converts a file and return the modified contents
 sub convert_file {  sub convert_file {
   my ($contents) = @_;    my ($contents) = @_;
   
   my $dom_doc = XML::LibXML->load_xml(string => $contents);    my $dom_doc = XML::LibXML->load_xml(string => $contents);
   add_outtext($dom_doc);    my $root = $dom_doc->documentElement();
     if (defined $root && $root->nodeName ne 'html') {
       add_outtext($dom_doc);
     }
   return node_to_string($dom_doc);    return node_to_string($dom_doc);
 }  }
   
Line 73  sub node_to_string { Line 83  sub node_to_string {
     if (defined $parent->parentNode) {      if (defined $parent->parentNode) {
       $grandparent_name = $parent->parentNode->nodeName;        $grandparent_name = $parent->parentNode->nodeName;
     }      }
     my @no_escape = ('m', 'script', 'display', 'parse', 'answer');      my @no_escape = ('m', 'script', 'style', 'display', 'parse', 'answer');
     if (string_in_array(\@no_escape, $parent_name) &&      if (string_in_array(\@no_escape, $parent_name) &&
         ($parent_name ne 'answer' ||          ($parent_name ne 'answer' ||
         (defined $grandparent_name &&          (defined $grandparent_name &&
Line 95  sub node_to_string { Line 105  sub node_to_string {
       $s .= escape_attribute($attribute->nodeValue);        $s .= escape_attribute($attribute->nodeValue);
       $s .= '"';        $s .= '"';
     }      }
     if ($node->hasChildNodes() || $tag eq 'script') {      if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) {
       $s .= '>';        $s .= '>';
       foreach my $child ($node->childNodes) {        foreach my $child ($node->childNodes) {
         $s .= node_to_string($child);          $s .= node_to_string($child);
Line 137  sub add_outtext { Line 147  sub add_outtext {
     return;      return;
   }    }
   convert_paragraphs($node);    convert_paragraphs($node);
     if ($node->nodeName eq 'hintgroup' && !defined $node->firstChild) {
       # empty hintgroup: colorful editor needs start/end outtext
       add_endouttext($node, undef);
       add_startouttext($node, $node->firstChild);
     }
   my $next;    my $next;
   my $in_outtext = 0;    my $in_outtext = 0;
   for (my $child=$node->firstChild; defined $child; $child=$next) {    for (my $child=$node->firstChild; defined $child; $child=$next) {
Line 170  sub inside_outtext { Line 185  sub inside_outtext {
       return 1;        return 1;
     }      }
   }    }
   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {    if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_in_text, $node->nodeName)) {
     return 1;      return 1;
   }    }
   return 0;    return 0;
Line 247  sub add_endouttext { Line 262  sub add_endouttext {
   } else {    } else {
     $parent->appendChild($endouttext);      $parent->appendChild($endouttext);
   }    }
     # replace spaces afterwards by a \n + indentation
     my $next = $endouttext->nextSibling;
     if (defined $next && $next->nodeType == XML_TEXT_NODE) {
       my $v = $next->nodeValue;
       if ($v =~ /^ /) {
         $v =~ s/^ +//;
         if ($parent->firstChild->nodeType == XML_TEXT_NODE &&
             $parent->firstChild->nodeValue =~ /^\n +$/) {
           $v = $parent->firstChild->nodeValue.$v;
         } else {
           $v = "\n".$v;
         }
         $next->setData($v);
       }
     }
 }  }
   
 # Convert paragraph children when one contains an inline response into content + <br>  # Convert paragraph children when one contains an inline response into content + <br>
Line 281  sub convert_paragraphs { Line 311  sub convert_paragraphs {
           # we only add a br if there is something after            # we only add a br if there is something after
           my $br = $doc->createElement('br');            my $br = $doc->createElement('br');
           $parent->insertBefore($br, $next);            $parent->insertBefore($br, $next);
             # add another br to make up for the p margin
             $br = $doc->createElement('br');
             $parent->insertBefore($br, $next);
         }          }
       }        }
     }      }

Removed from v.1.3  
changed lines
  Added in v.1.10


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>