--- loncom/homework/cleanxml/html_to_xml.pm 2015/12/03 20:40:31 1.1
+++ loncom/homework/cleanxml/html_to_xml.pm 2016/01/06 16:44:32 1.2
@@ -1,7 +1,7 @@
# The LearningOnline Network
# Second step to clean a file.
#
-# $Id: html_to_xml.pm,v 1.1 2015/12/03 20:40:31 damieng Exp $
+# $Id: html_to_xml.pm,v 1.2 2016/01/06 16:44:32 damieng Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -51,8 +51,11 @@ my $warnings; # 1 = print warnings
# This takes non-well-formed UTF-8 LC+HTML and returns well-formed but non-valid XML LC+XHTML.
sub html_to_xml {
- my($textref, $warn) = @_;
+ my($textref, $warn, $case_sensitive) = @_;
$warnings = $warn;
+ if (!defined $case_sensitive) {
+ $case_sensitive = 0;
+ }
$result = '';
@stack = ();
$close_warning = '';
@@ -65,6 +68,10 @@ sub html_to_xml {
process_h => [\&process, "token0"],
);
# NOTE: by default, the HTML parser turns all attribute and elements names to lowercase
+ # This is a problem with the Task elements, so it is disabled in that case
+ if ($case_sensitive) {
+ $p->case_sensitive(1);
+ }
$p->empty_element_tags(1);
$result .= "\n";
$p->parse($$textref);