--- doc/otherfiles/perl_modules.txt 2002/03/02 05:17:00 1.11 +++ doc/otherfiles/perl_modules.txt 2002/03/06 22:47:45 1.12 @@ -210,6 +210,147 @@ Gisle Aas [gisle@aas.no] HTML/Parser.pm 1 HTML/TokeParser.pm 1 +Need these patches applied: + +diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.1/hparser.c +--- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001 ++++ HTML-Parser-3.25.1/hparser.c Wed Feb 20 13:23:34 2002 +@@ -1094,14 +1094,21 @@ + hctype_t tag_name_first, tag_name_char; + hctype_t attr_name_first, attr_name_char; + +- if (p_state->strict_names || p_state->xml_mode) { ++ if (p_state->strict_names) { + tag_name_first = attr_name_first = HCTYPE_NAME_FIRST; + tag_name_char = attr_name_char = HCTYPE_NAME_CHAR; + } + else { +- tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; +- attr_name_first = HCTYPE_NOT_SPACE_GT; +- attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; ++ if (p_state->xml_mode) { ++ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT; ++ attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT; ++ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; ++ } ++ else { ++ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; ++ attr_name_first = HCTYPE_NOT_SPACE_GT; ++ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; ++ } + } + + s += 2; +@@ -1158,8 +1165,11 @@ + else { + char *word_start = s; + while (s < end && isHNOT_SPACE_GT(*s)) { +- if (p_state->xml_mode && *s == '/') +- break; ++ if (p_state->xml_mode && *s == '/') { ++ /* look ahead to see if the tag ends */ ++ if ((s+1)==end || *(s+1)=='>') ++ break; ++ } + s++; + } + if (s == end) + +diff -urN HTML-Parser-3.25/Parser.pm HTML-Parser-3.25.2/Parser.pm +--- HTML-Parser-3.25/Parser.pm Fri May 11 13:24:09 2001 ++++ HTML-Parser-3.25.2/Parser.pm Wed Mar 6 16:47:46 2002 +@@ -427,6 +427,11 @@ + There are currently no events associated with the marked section + markup, but the text can be returned as C. + ++=item $p->encoded_entities( [$bool] ) ++ ++By default, attr and @attr decode general enitites for attribute values. ++This turns off that behavior. ++ + =back + + As markup and text is recognized, handlers are invoked. The following +diff -urN HTML-Parser-3.25/Parser.xs HTML-Parser-3.25.2/Parser.xs +--- HTML-Parser-3.25/Parser.xs Thu May 10 15:27:28 2001 ++++ HTML-Parser-3.25.2/Parser.xs Wed Mar 6 16:48:56 2002 +@@ -297,6 +297,7 @@ + HTML::Parser::xml_mode = 3 + HTML::Parser::unbroken_text = 4 + HTML::Parser::marked_sections = 5 ++ HTML::Parser::encoded_entities = 6 + PREINIT: + bool *attr; + CODE: +@@ -311,6 +312,7 @@ + #else + croak("marked sections not supported"); break; + #endif ++ case 6: attr = &pstate->encoded_entities; break; + default: + croak("Unknown boolean attribute (%d)", ix); + } +diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.2/hparser.c +--- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001 ++++ HTML-Parser-3.25.2/hparser.c Wed Mar 6 16:44:47 2002 +@@ -398,7 +398,8 @@ + beg++; len -= 2; + } + attrval = newSVpvn(beg, len); +- decode_entities(aTHX_ attrval, p_state->entity2char); ++ if (!p_state->encoded_entities) ++ decode_entities(aTHX_ attrval, p_state->entity2char); + } + else { /* boolean */ + if (p_state->bool_attr_val) +diff -urN HTML-Parser-3.25/hparser.h HTML-Parser-3.25.2/hparser.h +--- HTML-Parser-3.25/hparser.h Tue May 8 13:03:27 2001 ++++ HTML-Parser-3.25.2/hparser.h Wed Mar 6 16:48:18 2002 +@@ -99,6 +99,7 @@ + bool strict_names; + bool xml_mode; + bool unbroken_text; ++ bool encoded_entities; + + /* other configuration stuff */ + SV* bool_attr_val; +diff -urN HTML-Parser-3.25/t/encoded-entities.t HTML-Parser-3.25.2/t/encoded-entities.t +--- HTML-Parser-3.25/t/encoded-entities.t Wed Dec 31 19:00:00 1969 ++++ HTML-Parser-3.25.2/t/encoded-entities.t Wed Mar 6 17:13:53 2002 +@@ -0,0 +1,32 @@ ++use strict; ++print "1..2\n"; ++ ++use HTML::Parser (); ++my $p = HTML::Parser->new(); ++$p->encoded_entities(1); ++ ++my $text = ""; ++$p->handler(start => ++ sub { ++ my($tag, $attr) = @_; ++ $text .= "S[$tag"; ++ for my $k (sort keys %$attr) { ++ my $v = $attr->{$k}; ++ $text .= " $k=$v"; ++ } ++ $text .= "]"; ++ }, "tagname,attr"); ++ ++my $html = <<'EOT'; ++ ++EOT ++ ++$p->parse($html)->eof; ++ ++print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 1\n"; ++ ++$text = ""; ++$p->encoded_entities(0); ++$p->parse($html)->eof; ++ ++print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 2\n"; + ---------------------------------------------- IO-stringy http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz (needed by MIME-tools)