Diff for /doc/otherfiles/perl_modules.txt between versions 1.11 and 1.12

version 1.11, 2002/03/02 05:17:00 version 1.12, 2002/03/06 22:47:45
Line 210  Gisle Aas [gisle@aas.no] Line 210  Gisle Aas [gisle@aas.no]
     HTML/Parser.pm 1      HTML/Parser.pm 1
     HTML/TokeParser.pm 1      HTML/TokeParser.pm 1
   
   Need these patches applied:
   
   diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.1/hparser.c
   --- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001
   +++ HTML-Parser-3.25.1/hparser.c Wed Feb 20 13:23:34 2002
   @@ -1094,14 +1094,21 @@
        hctype_t tag_name_first, tag_name_char;
        hctype_t attr_name_first, attr_name_char;
    
   -    if (p_state->strict_names || p_state->xml_mode) {
   +    if (p_state->strict_names) {
     tag_name_first = attr_name_first = HCTYPE_NAME_FIRST;
     tag_name_char  = attr_name_char  = HCTYPE_NAME_CHAR;
        }
        else {
   - tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
   - attr_name_first = HCTYPE_NOT_SPACE_GT;
   - attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
   + if (p_state->xml_mode) {
   +    tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT;
   +    attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT;
   +    attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
   + }
   + else {
   +    tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
   +    attr_name_first = HCTYPE_NOT_SPACE_GT;
   +    attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
   + }
        }
    
        s += 2;
   @@ -1158,8 +1165,11 @@
        else {
     char *word_start = s;
     while (s < end && isHNOT_SPACE_GT(*s)) {
   -    if (p_state->xml_mode && *s == '/')
   - break;
   +    if (p_state->xml_mode && *s == '/') {
   + /* look ahead to see if the tag ends */
   + if ((s+1)==end || *(s+1)=='>')
   +    break;
   +    }
        s++;
     }
     if (s == end)
   
   diff -urN HTML-Parser-3.25/Parser.pm HTML-Parser-3.25.2/Parser.pm
   --- HTML-Parser-3.25/Parser.pm Fri May 11 13:24:09 2001
   +++ HTML-Parser-3.25.2/Parser.pm Wed Mar  6 16:47:46 2002
   @@ -427,6 +427,11 @@
    There are currently no events associated with the marked section
    markup, but the text can be returned as C<skipped_text>.
    
   +=item $p->encoded_entities( [$bool] )
   +
   +By default, attr and @attr decode general enitites for attribute values.
   +This turns off that behavior.
   +
    =back
    
    As markup and text is recognized, handlers are invoked.  The following
   diff -urN HTML-Parser-3.25/Parser.xs HTML-Parser-3.25.2/Parser.xs
   --- HTML-Parser-3.25/Parser.xs Thu May 10 15:27:28 2001
   +++ HTML-Parser-3.25.2/Parser.xs Wed Mar  6 16:48:56 2002
   @@ -297,6 +297,7 @@
            HTML::Parser::xml_mode = 3
     HTML::Parser::unbroken_text = 4
            HTML::Parser::marked_sections = 5
   +        HTML::Parser::encoded_entities = 6
        PREINIT:
     bool *attr;
        CODE:
   @@ -311,6 +312,7 @@
    #else
             croak("marked sections not supported"); break;
    #endif
   + case  6: attr = &pstate->encoded_entities;     break;
     default:
        croak("Unknown boolean attribute (%d)", ix);
            }
   diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.2/hparser.c
   --- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001
   +++ HTML-Parser-3.25.2/hparser.c Wed Mar  6 16:44:47 2002
   @@ -398,7 +398,8 @@
        beg++; len -= 2;
     }
     attrval = newSVpvn(beg, len);
   - decode_entities(aTHX_ attrval, p_state->entity2char);
   + if (!p_state->encoded_entities)
   +    decode_entities(aTHX_ attrval, p_state->entity2char);
        }
        else { /* boolean */
     if (p_state->bool_attr_val)
   diff -urN HTML-Parser-3.25/hparser.h HTML-Parser-3.25.2/hparser.h
   --- HTML-Parser-3.25/hparser.h Tue May  8 13:03:27 2001
   +++ HTML-Parser-3.25.2/hparser.h Wed Mar  6 16:48:18 2002
   @@ -99,6 +99,7 @@
        bool strict_names;
        bool xml_mode;
        bool unbroken_text;
   +    bool encoded_entities;
    
        /* other configuration stuff */
        SV* bool_attr_val;
   diff -urN HTML-Parser-3.25/t/encoded-entities.t HTML-Parser-3.25.2/t/encoded-entities.t
   --- HTML-Parser-3.25/t/encoded-entities.t Wed Dec 31 19:00:00 1969
   +++ HTML-Parser-3.25.2/t/encoded-entities.t Wed Mar  6 17:13:53 2002
   @@ -0,0 +1,32 @@
   +use strict;
   +print "1..2\n";
   +
   +use HTML::Parser ();
   +my $p = HTML::Parser->new();
   +$p->encoded_entities(1);
   +
   +my $text = "";
   +$p->handler(start =>
   +    sub {
   + my($tag, $attr) = @_;
   + $text .= "S[$tag";
   + for my $k (sort keys %$attr) {
   +     my $v =  $attr->{$k};
   +     $text .= " $k=$v";
   + }
   + $text .= "]";
   +     }, "tagname,attr");
   +
   +my $html = <<'EOT';
   +<tag arg="&amp;&lt;&gt">
   +EOT
   +
   +$p->parse($html)->eof;
   +
   +print "not " unless $text eq 'S[tag arg=&amp;&lt;&gt]';  print "ok 1\n";
   +
   +$text = "";
   +$p->encoded_entities(0);
   +$p->parse($html)->eof;
   +
   +print "not " unless $text eq 'S[tag arg=&<>]';  print "ok 2\n";
   
 ---------------------------------------------- IO-stringy  ---------------------------------------------- IO-stringy
 http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz  http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz
 (needed by MIME-tools)  (needed by MIME-tools)

Removed from v.1.11  
changed lines
  Added in v.1.12


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>