--- doc/otherfiles/perl_modules.txt 2002/03/06 22:47:45 1.12 +++ doc/otherfiles/perl_modules.txt 2002/03/22 20:26:26 1.13 @@ -205,17 +205,17 @@ Sean M. Burke [sburke@cpan.org] HTML/Parse.pm ---------------------------------------------- HTML-Parser -http://www.cpan.org/authors/id/G/GA/GAAS/HTML-Parser-3.25.tar.gz +http://www.cpan.org/authors/id/G/GA/GAAS/HTML-Parser-3.26.tar.gz Gisle Aas [gisle@aas.no] HTML/Parser.pm 1 HTML/TokeParser.pm 1 -Need these patches applied: +Need this patches applied: -diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.1/hparser.c ---- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001 -+++ HTML-Parser-3.25.1/hparser.c Wed Feb 20 13:23:34 2002 -@@ -1094,14 +1094,21 @@ +diff -u HTML-Parser-3.26/hparser.c HTML-Parser-3.26.simpleslashfix/hparser.c +--- HTML-Parser-3.26/hparser.c Sun Mar 17 15:07:57 2002 ++++ HTML-Parser-3.26.simpleslashfix/hparser.c Fri Mar 22 13:23:17 2002 +@@ -1101,14 +1101,21 @@ hctype_t tag_name_first, tag_name_char; hctype_t attr_name_first, attr_name_char; @@ -228,129 +228,34 @@ diff -urN HTML-Parser-3.25/hparser.c HTM - tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; - attr_name_first = HCTYPE_NOT_SPACE_GT; - attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; -+ if (p_state->xml_mode) { -+ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT; -+ attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT; -+ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; -+ } -+ else { -+ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; -+ attr_name_first = HCTYPE_NOT_SPACE_GT; -+ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; -+ } ++ if (p_state->xml_mode) { ++ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT; ++ attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT; ++ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; ++ } ++ else { ++ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; ++ attr_name_first = HCTYPE_NOT_SPACE_GT; ++ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; ++ } } s += 2; -@@ -1158,8 +1165,11 @@ +@@ -1165,8 +1172,11 @@ else { char *word_start = s; while (s < end && isHNOT_SPACE_GT(*s)) { - if (p_state->xml_mode && *s == '/') - break; -+ if (p_state->xml_mode && *s == '/') { -+ /* look ahead to see if the tag ends */ -+ if ((s+1)==end || *(s+1)=='>') -+ break; -+ } ++ if (p_state->xml_mode && *s == '/') { ++ /* look ahead to see if the tag ends */ ++ if ((s+1)==end || *(s+1)=='>') ++ break; ++ } s++; } if (s == end) -diff -urN HTML-Parser-3.25/Parser.pm HTML-Parser-3.25.2/Parser.pm ---- HTML-Parser-3.25/Parser.pm Fri May 11 13:24:09 2001 -+++ HTML-Parser-3.25.2/Parser.pm Wed Mar 6 16:47:46 2002 -@@ -427,6 +427,11 @@ - There are currently no events associated with the marked section - markup, but the text can be returned as C. - -+=item $p->encoded_entities( [$bool] ) -+ -+By default, attr and @attr decode general enitites for attribute values. -+This turns off that behavior. -+ - =back - - As markup and text is recognized, handlers are invoked. The following -diff -urN HTML-Parser-3.25/Parser.xs HTML-Parser-3.25.2/Parser.xs ---- HTML-Parser-3.25/Parser.xs Thu May 10 15:27:28 2001 -+++ HTML-Parser-3.25.2/Parser.xs Wed Mar 6 16:48:56 2002 -@@ -297,6 +297,7 @@ - HTML::Parser::xml_mode = 3 - HTML::Parser::unbroken_text = 4 - HTML::Parser::marked_sections = 5 -+ HTML::Parser::encoded_entities = 6 - PREINIT: - bool *attr; - CODE: -@@ -311,6 +312,7 @@ - #else - croak("marked sections not supported"); break; - #endif -+ case 6: attr = &pstate->encoded_entities; break; - default: - croak("Unknown boolean attribute (%d)", ix); - } -diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.2/hparser.c ---- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001 -+++ HTML-Parser-3.25.2/hparser.c Wed Mar 6 16:44:47 2002 -@@ -398,7 +398,8 @@ - beg++; len -= 2; - } - attrval = newSVpvn(beg, len); -- decode_entities(aTHX_ attrval, p_state->entity2char); -+ if (!p_state->encoded_entities) -+ decode_entities(aTHX_ attrval, p_state->entity2char); - } - else { /* boolean */ - if (p_state->bool_attr_val) -diff -urN HTML-Parser-3.25/hparser.h HTML-Parser-3.25.2/hparser.h ---- HTML-Parser-3.25/hparser.h Tue May 8 13:03:27 2001 -+++ HTML-Parser-3.25.2/hparser.h Wed Mar 6 16:48:18 2002 -@@ -99,6 +99,7 @@ - bool strict_names; - bool xml_mode; - bool unbroken_text; -+ bool encoded_entities; - - /* other configuration stuff */ - SV* bool_attr_val; -diff -urN HTML-Parser-3.25/t/encoded-entities.t HTML-Parser-3.25.2/t/encoded-entities.t ---- HTML-Parser-3.25/t/encoded-entities.t Wed Dec 31 19:00:00 1969 -+++ HTML-Parser-3.25.2/t/encoded-entities.t Wed Mar 6 17:13:53 2002 -@@ -0,0 +1,32 @@ -+use strict; -+print "1..2\n"; -+ -+use HTML::Parser (); -+my $p = HTML::Parser->new(); -+$p->encoded_entities(1); -+ -+my $text = ""; -+$p->handler(start => -+ sub { -+ my($tag, $attr) = @_; -+ $text .= "S[$tag"; -+ for my $k (sort keys %$attr) { -+ my $v = $attr->{$k}; -+ $text .= " $k=$v"; -+ } -+ $text .= "]"; -+ }, "tagname,attr"); -+ -+my $html = <<'EOT'; -+ -+EOT -+ -+$p->parse($html)->eof; -+ -+print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 1\n"; -+ -+$text = ""; -+$p->encoded_entities(0); -+$p->parse($html)->eof; -+ -+print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 2\n"; - ---------------------------------------------- IO-stringy http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz (needed by MIME-tools)