Diff for /loncom/homework/cleanxml/xml_to_loncapa.pm between versions 1.1 and 1.8

version 1.1, 2015/12/03 20:40:31 version 1.8, 2016/01/20 21:24:22
Line 38  use warnings; Line 38  use warnings;
 use XML::LibXML;  use XML::LibXML;
   
   
 my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');  # LON-CAPA block elements that cannot be found within startouttext/endouttext
   my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
   
 my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline  # LON-CAPA elements that can be found within startouttext/endouttext:
   my @loncapa_in_text = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto','translated','lang'); # not textline
   
 # HTML elements that trigger the addition of startouttext/endouttext  # HTML elements that trigger the addition of startouttext/endouttext
 my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');  my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
Line 49  my @simple_data = ('polygon', 'rectangle Line 51  my @simple_data = ('polygon', 'rectangle
   
 my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');  my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
   
   # see http://www.w3.org/TR/html-polyglot/#empty-elements
   # and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/
   # HTML elements that do not have an empty content, and must never use a self-closing tag:
   my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe');
   
   
 # Converts a file and return the modified contents  # Converts a file and return the modified contents
 sub convert_file {  sub convert_file {
   my ($contents) = @_;    my ($contents) = @_;
   
   my $dom_doc = XML::LibXML->load_xml(string => $contents);    my $dom_doc = XML::LibXML->load_xml(string => $contents);
   add_outtext($dom_doc);    my $root = $dom_doc->documentElement();
     if (defined $root && $root->nodeName ne 'html') {
       add_outtext($dom_doc);
     }
   return node_to_string($dom_doc);    return node_to_string($dom_doc);
 }  }
   
Line 73  sub node_to_string { Line 83  sub node_to_string {
     if (defined $parent->parentNode) {      if (defined $parent->parentNode) {
       $grandparent_name = $parent->parentNode->nodeName;        $grandparent_name = $parent->parentNode->nodeName;
     }      }
     my @no_escape = ('m', 'script', 'display', 'parse', 'answer');      my @no_escape = ('m', 'script', 'style', 'display', 'parse', 'answer');
     if (string_in_array(\@no_escape, $parent_name) &&      if (string_in_array(\@no_escape, $parent_name) &&
         ($parent_name ne 'answer' ||          ($parent_name ne 'answer' ||
         (defined $grandparent_name &&          (defined $grandparent_name &&
Line 92  sub node_to_string { Line 102  sub node_to_string {
       $s .= ' ';        $s .= ' ';
       $s .= $attribute->nodeName;        $s .= $attribute->nodeName;
       $s .= '="';        $s .= '="';
       $s .= escape($attribute->nodeValue);        $s .= escape_attribute($attribute->nodeValue);
       $s .= '"';        $s .= '"';
     }      }
     if ($node->hasChildNodes()) {      if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) {
       $s .= '>';        $s .= '>';
       foreach my $child ($node->childNodes) {        foreach my $child ($node->childNodes) {
         $s .= node_to_string($child);          $s .= node_to_string($child);
Line 110  sub node_to_string { Line 120  sub node_to_string {
   }    }
 }  }
   
 # Escapes a string for LON-CAPA output (used for text nodes, not attribute values)  # Escapes an attribute value
 sub escape {  sub escape_attribute {
   my ($s) = @_;    my ($s) = @_;
   $s =~ s/&/&/sg;    # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse
   $s =~ s/</&lt;/sg;    #$s =~ s/&/&amp;/sg;
   $s =~ s/>/&gt;/sg;    #$s =~ s/</&lt;/sg;
   # quot and apos do not need to be escaped outside attribute values    #$s =~ s/>/&gt;/sg;
     $s =~ s/"/&quot;/sg;
   return $s;    return $s;
 }  }
   
Line 136  sub add_outtext { Line 147  sub add_outtext {
     return;      return;
   }    }
   convert_paragraphs($node);    convert_paragraphs($node);
     if ($node->nodeName eq 'hintgroup' && !defined $node->firstChild) {
       # empty hintgroup: colorful editor needs start/end outtext
       add_endouttext($node, undef);
       add_startouttext($node, $node->firstChild);
     }
   my $next;    my $next;
   my $in_outtext = 0;    my $in_outtext = 0;
   for (my $child=$node->firstChild; defined $child; $child=$next) {    for (my $child=$node->firstChild; defined $child; $child=$next) {
Line 169  sub inside_outtext { Line 185  sub inside_outtext {
       return 1;        return 1;
     }      }
   }    }
   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {    if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_in_text, $node->nodeName)) {
     return 1;      return 1;
   }    }
   return 0;    return 0;
Line 246  sub add_endouttext { Line 262  sub add_endouttext {
   } else {    } else {
     $parent->appendChild($endouttext);      $parent->appendChild($endouttext);
   }    }
     # replace spaces afterwards by a \n + indentation
     my $next = $endouttext->nextSibling;
     if (defined $next && $next->nodeType == XML_TEXT_NODE) {
       my $v = $next->nodeValue;
       if ($v =~ /^ /) {
         $v =~ s/^ +//;
         if ($parent->firstChild->nodeType == XML_TEXT_NODE &&
             $parent->firstChild->nodeValue =~ /^\n +$/) {
           $v = $parent->firstChild->nodeValue.$v;
         } else {
           $v = "\n".$v;
         }
         $next->setData($v);
       }
     }
 }  }
   
 # Convert paragraph children when one contains an inline response into content + <br>  # Convert paragraph children when one contains an inline response into content + <br>

Removed from v.1.1  
changed lines
  Added in v.1.8


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>