--- loncom/homework/cleanxml/xml_to_loncapa.pm 2015/12/18 22:08:51 1.2 +++ loncom/homework/cleanxml/xml_to_loncapa.pm 2016/01/08 20:32:42 1.6 @@ -1,7 +1,7 @@ # The LearningOnline Network # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax. # -# $Id: xml_to_loncapa.pm,v 1.2 2015/12/18 22:08:51 damieng Exp $ +# $Id: xml_to_loncapa.pm,v 1.6 2016/01/08 20:32:42 damieng Exp $ # # Copyright Michigan State University Board of Trustees # @@ -49,13 +49,21 @@ my @simple_data = ('polygon', 'rectangle my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse'); +# see http://www.w3.org/TR/html-polyglot/#empty-elements +# and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/ +# HTML elements that do not have an empty content, and must never use a self-closing tag: +my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe'); + # Converts a file and return the modified contents sub convert_file { my ($contents) = @_; my $dom_doc = XML::LibXML->load_xml(string => $contents); - add_outtext($dom_doc); + my $root = $dom_doc->documentElement(); + if (defined $root && $root->nodeName ne 'html') { + add_outtext($dom_doc); + } return node_to_string($dom_doc); } @@ -73,7 +81,7 @@ sub node_to_string { if (defined $parent->parentNode) { $grandparent_name = $parent->parentNode->nodeName; } - my @no_escape = ('m', 'script', 'display', 'parse', 'answer'); + my @no_escape = ('m', 'script', 'style', 'display', 'parse', 'answer'); if (string_in_array(\@no_escape, $parent_name) && ($parent_name ne 'answer' || (defined $grandparent_name && @@ -95,7 +103,7 @@ sub node_to_string { $s .= escape_attribute($attribute->nodeValue); $s .= '"'; } - if ($node->hasChildNodes()) { + if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) { $s .= '>'; foreach my $child ($node->childNodes) { $s .= node_to_string($child);