version 1.1, 2015/12/03 20:40:31
|
version 1.2, 2016/01/06 16:44:32
|
Line 51 my $warnings; # 1 = print warnings
|
Line 51 my $warnings; # 1 = print warnings
|
|
|
# This takes non-well-formed UTF-8 LC+HTML and returns well-formed but non-valid XML LC+XHTML. |
# This takes non-well-formed UTF-8 LC+HTML and returns well-formed but non-valid XML LC+XHTML. |
sub html_to_xml { |
sub html_to_xml { |
my($textref, $warn) = @_; |
my($textref, $warn, $case_sensitive) = @_; |
$warnings = $warn; |
$warnings = $warn; |
|
if (!defined $case_sensitive) { |
|
$case_sensitive = 0; |
|
} |
$result = ''; |
$result = ''; |
@stack = (); |
@stack = (); |
$close_warning = ''; |
$close_warning = ''; |
Line 65 sub html_to_xml {
|
Line 68 sub html_to_xml {
|
process_h => [\&process, "token0"], |
process_h => [\&process, "token0"], |
); |
); |
# NOTE: by default, the HTML parser turns all attribute and elements names to lowercase |
# NOTE: by default, the HTML parser turns all attribute and elements names to lowercase |
|
# This is a problem with the Task elements, so it is disabled in that case |
|
if ($case_sensitive) { |
|
$p->case_sensitive(1); |
|
} |
$p->empty_element_tags(1); |
$p->empty_element_tags(1); |
$result .= "<?xml version='1.0' encoding='UTF-8'?>\n"; |
$result .= "<?xml version='1.0' encoding='UTF-8'?>\n"; |
$p->parse($$textref); |
$p->parse($$textref); |