--- loncom/homework/cleanxml/html_to_xml.pm 2015/12/03 20:40:31 1.1 +++ loncom/homework/cleanxml/html_to_xml.pm 2016/01/06 16:44:32 1.2 @@ -1,7 +1,7 @@ # The LearningOnline Network # Second step to clean a file. # -# $Id: html_to_xml.pm,v 1.1 2015/12/03 20:40:31 damieng Exp $ +# $Id: html_to_xml.pm,v 1.2 2016/01/06 16:44:32 damieng Exp $ # # Copyright Michigan State University Board of Trustees # @@ -51,8 +51,11 @@ my $warnings; # 1 = print warnings # This takes non-well-formed UTF-8 LC+HTML and returns well-formed but non-valid XML LC+XHTML. sub html_to_xml { - my($textref, $warn) = @_; + my($textref, $warn, $case_sensitive) = @_; $warnings = $warn; + if (!defined $case_sensitive) { + $case_sensitive = 0; + } $result = ''; @stack = (); $close_warning = ''; @@ -65,6 +68,10 @@ sub html_to_xml { process_h => [\&process, "token0"], ); # NOTE: by default, the HTML parser turns all attribute and elements names to lowercase + # This is a problem with the Task elements, so it is disabled in that case + if ($case_sensitive) { + $p->case_sensitive(1); + } $p->empty_element_tags(1); $result .= "\n"; $p->parse($$textref);