Diff for /loncom/homework/cleanxml/xml_to_loncapa.pm between versions 1.1 and 1.7

version 1.1, 2015/12/03 20:40:31 version 1.7, 2016/01/20 00:41:31
Line 49  my @simple_data = ('polygon', 'rectangle Line 49  my @simple_data = ('polygon', 'rectangle
   
 my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');  my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
   
   # see http://www.w3.org/TR/html-polyglot/#empty-elements
   # and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/
   # HTML elements that do not have an empty content, and must never use a self-closing tag:
   my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe');
   
   
 # Converts a file and return the modified contents  # Converts a file and return the modified contents
 sub convert_file {  sub convert_file {
   my ($contents) = @_;    my ($contents) = @_;
   
   my $dom_doc = XML::LibXML->load_xml(string => $contents);    my $dom_doc = XML::LibXML->load_xml(string => $contents);
   add_outtext($dom_doc);    my $root = $dom_doc->documentElement();
     if (defined $root && $root->nodeName ne 'html') {
       add_outtext($dom_doc);
     }
   return node_to_string($dom_doc);    return node_to_string($dom_doc);
 }  }
   
Line 73  sub node_to_string { Line 81  sub node_to_string {
     if (defined $parent->parentNode) {      if (defined $parent->parentNode) {
       $grandparent_name = $parent->parentNode->nodeName;        $grandparent_name = $parent->parentNode->nodeName;
     }      }
     my @no_escape = ('m', 'script', 'display', 'parse', 'answer');      my @no_escape = ('m', 'script', 'style', 'display', 'parse', 'answer');
     if (string_in_array(\@no_escape, $parent_name) &&      if (string_in_array(\@no_escape, $parent_name) &&
         ($parent_name ne 'answer' ||          ($parent_name ne 'answer' ||
         (defined $grandparent_name &&          (defined $grandparent_name &&
Line 92  sub node_to_string { Line 100  sub node_to_string {
       $s .= ' ';        $s .= ' ';
       $s .= $attribute->nodeName;        $s .= $attribute->nodeName;
       $s .= '="';        $s .= '="';
       $s .= escape($attribute->nodeValue);        $s .= escape_attribute($attribute->nodeValue);
       $s .= '"';        $s .= '"';
     }      }
     if ($node->hasChildNodes()) {      if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) {
       $s .= '>';        $s .= '>';
       foreach my $child ($node->childNodes) {        foreach my $child ($node->childNodes) {
         $s .= node_to_string($child);          $s .= node_to_string($child);
Line 110  sub node_to_string { Line 118  sub node_to_string {
   }    }
 }  }
   
 # Escapes a string for LON-CAPA output (used for text nodes, not attribute values)  # Escapes an attribute value
 sub escape {  sub escape_attribute {
   my ($s) = @_;    my ($s) = @_;
   $s =~ s/&/&/sg;    # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse
   $s =~ s/</&lt;/sg;    #$s =~ s/&/&amp;/sg;
   $s =~ s/>/&gt;/sg;    #$s =~ s/</&lt;/sg;
   # quot and apos do not need to be escaped outside attribute values    #$s =~ s/>/&gt;/sg;
     $s =~ s/"/&quot;/sg;
   return $s;    return $s;
 }  }
   
Line 246  sub add_endouttext { Line 255  sub add_endouttext {
   } else {    } else {
     $parent->appendChild($endouttext);      $parent->appendChild($endouttext);
   }    }
     # replace spaces afterwards by a \n + indentation
     my $next = $endouttext->nextSibling;
     if (defined $next && $next->nodeType == XML_TEXT_NODE) {
       my $v = $next->nodeValue;
       if ($v =~ /^ /) {
         $v =~ s/^ +//;
         if ($parent->firstChild->nodeType == XML_TEXT_NODE &&
             $parent->firstChild->nodeValue =~ /^\n +$/) {
           $v = $parent->firstChild->nodeValue.$v;
         } else {
           $v = "\n".$v;
         }
         $next->setData($v);
       }
     }
 }  }
   
 # Convert paragraph children when one contains an inline response into content + <br>  # Convert paragraph children when one contains an inline response into content + <br>

Removed from v.1.1  
changed lines
  Added in v.1.7


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>