--- loncom/build/xfml_parse.pl 2002/02/01 10:56:41 1.2 +++ loncom/build/xfml_parse.pl 2002/02/20 00:21:42 1.3 @@ -12,22 +12,15 @@ ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## 1. Notes ## -## 2. Get command line arguments ## -## 3. First pass through (grab distribution-specific information) ## -## 4. Second pass through (parse out what is not necessary) ## -## 5. Third pass through (translate markup according to specified mode) ## -## 6. Functions (most all just format contents of different markup tags) ## -## 7. POD (plain old documentation, CPAN style) ## +## 2. Read in filter file ## +## 3. Initialize and clear conditions ## +## 4. Run through and apply clauses ## ## ## ############################################################################### # ----------------------------------------------------------------------- Notes # -# I am using a multiple pass-through approach to parsing -# the xfml file. This saves memory and makes sure the server -# will never be overloaded. -# -# This is meant to parse files meeting the piml document type. +# This is meant to parse files meeting the xfml document type. # See xfml.dtd. XFML=XML Filtering Markup Language. use HTML::TokeParser; @@ -43,26 +36,26 @@ END } my %eh; -my %ih; + +# ---------------------------------------------- Read in filter file from @ARGV my $tofilter=shift @ARGV; -open IN,"<$tofilter"; -my @lines=; my $parsestring=join('',@lines); undef @lines; -close IN; +open IN,"<$tofilter"; my @lines=; +my $parsestring=join('',@lines); undef @lines; close IN; my $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); -# Define handling methods for mode-dependent text rendering - +# --------------------------------------------- initialize and clear conditions my %conditions; &cc; +# Define handling methods for mode-dependent text rendering $parser->{textify}={ - xfml => \&format_xfml, + 'xfml' => \&format_xfml, 'when:name' => \&format_when_name, 'when:attribute' => \&format_when_attribute, 'when:cdata' => \&format_when_cdata, - 'choice:include' => \&format_choice_include, 'choice:exclude' => \&format_choice_exclude, + 'clause' => \&format_clause, }; my $text; @@ -70,158 +63,156 @@ my $xfml; my $wloc=0; my %eha; -while (my $token = $parser->get_tag('xfml')) { - &format_xfml(@{$token}); - $text = $parser->get_text('/xfml'); - $token = $parser->get_tag('/xfml'); -} - -#open IN,"<$tofilter"; -my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2; -$parser = HTML::TokeParser->new(\$parsestring2) or +# ----------------------------------------------- Run through and apply clauses +my @lines2=<>; my $output=join('',@lines2); undef @lines2; +my $lparser = HTML::TokeParser->new(\$output) or die('can\'t create TokeParser object'); -$parser->xml_mode('1'); - -my $token; -my $hloc=0; -my %ts; -my $tr; -my $echild=0; -my $exclude=0; -my $excluden=0; -my $excludea=0; -my $et=0; -my $cdata=''; -my $excludenold=0; -my $ign=0; - -while ($token = $parser->get_token()) { - if ($token->[0] eq 'D') { - print $token->[1]; - } - elsif ($token->[0] eq 'C') { - print $token->[1]; - } - elsif ($token->[0] eq 'S') { - $cdata=''; - $hloc++; -# if token can be excluded, then pretend it is until all conditions are -# run (eha); then output during end tag processing -# else, output - -# a token can be excluded when it is an eh key, or a child node of -# an eh key - - if ($eh{$token->[1]}) { - $echild=$token->[1]; +$lparser->xml_mode('1'); +my $parsestring2; +while (my $token = $parser->get_tag('clause')) { + $parsestring2=$output; + $lparser = HTML::TokeParser->new(\$parsestring2); + $lparser->xml_mode('1'); + $output=''; + &format_clause(@{$token}); + $text = $parser->get_text('/clause'); + $token = $parser->get_tag('/clause'); + + my $token=''; + my $ttype=''; + my $excludeflag=0; + my $outcache=''; + while ($token = $lparser->get_token()) { + if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; } + elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; } + elsif ($token->[0] eq 'T') { + if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S' + or $ttype eq 'E') { + $output.=$token->[1]; + } + else { + $outcache.=$token->[1]; + } } - if ($echild) { - # run through names for echild - # then attributes and/or values and/or cdata - my $name=$token->[1]; - my @attributes=@{$token->[3]}; - my %atthash=%{$token->[2]}; - foreach my $namemlist (@{$eha{$echild}->{'name'}}) { - foreach my $namematch (@{$namemlist}) { - my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//; - if ($name=~/$nm/) { - $excludenold=$excluden; - $excluden++; - foreach my $attributemlist - (@{$eha{$echild}->{'attribute'}}) { - foreach my $attributematch - (@{$attributemlist}) { - my ($an,$am)= - split(/\=/,$attributematch,2); - $am=~s/^.//; - $am=~s/.$//; - if ($atthash{$an}) { - if ($atthash{$an}=~/$am/) { - $excludea++; - } - } - } - } - } - } + elsif ($token->[0] eq 'S') { + if ($eh{$token->[1]} or $excludeflag==1) { + $ttype=''; + $excludeflag=1; + $outcache.=$token->[4]; + } + else { + $ttype='S'; + $output.=$token->[4]; + } + if ($excludeflag==1) { + } - $tr.=$token->[4]; } - else { - print $token->[4]; + elsif ($token->[0] eq 'E') { + if ($eh{$token->[1]} and $excludeflag==1) { + $ttype='E'; + $excludeflag=0; + $outcache.=$token->[2]; + my $retval=&evalconditions($outcache); + if (&evalconditions($outcache)) { + $output.=$outcache; + } + else { + $output.=''; + } + $outcache=''; + } + elsif ($excludeflag==1) { + $ttype=''; + $outcache.=$token->[2]; + } + else { + $output.=$token->[2]; + $ttype='E'; + } } } - elsif ($token->[0] eq 'E') { - if ($echild) { - $tr.=$token->[2]; - if ($excluden) { - my $i=0; - CDATALOOP: - foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) { - $i++; - my $j; - foreach my $cdatamatch (@{$cdatamlist}) { - $j++; -# print "CDATA: $cdatamatch, $cdata\n"; - my $cm=$cdatamatch; - my $not=0; - if ($cm=~/\!/) { - $not=1; - $cm=~s/^.//; + &cc; +} +print $output; + +# -------------------------------------------------------------- evalconditions +sub evalconditions { + my ($parsetext)=@_; + my $eparser = HTML::TokeParser->new(\$parsetext); + unless (@{$conditions{'name'}} or + @{$conditions{'attribute'}}) { + return 0; + } + my $nameflag=0; + my $cdataflag=0; + my $matchflag=0; + my $Ttoken=''; + while (my $token = $eparser->get_token()) { + if ($token->[0] eq 'S') { + foreach my $name (@{$conditions{'name'}}) { + my $flag=0; + my $match=$name; + if ($match=~/^\!/) { + $match=~s/^\!//g; + $flag=1; + } + $match=~s/^\///g; + $match=~s/\/$//g; + if ((!$flag and $token->[1]=~/$match/) or + ($flag and $token->[1]!~/$match/)) { + $nameflag=1; + } + } + $Ttoken=''; + } + elsif ($token->[0] eq 'E') { + foreach my $name (@{$conditions{'name'}}) { + my $flag=0; + my $match=$name; + if ($match=~/^\!/) { + $match=~s/^\!//g; + $flag=1; + } + $match=~s/^\///g; + $match=~s/\/$//g; + if ((!$flag and $token->[1]=~/$match/) or + ($flag and $token->[1]!~/$match/)) { + foreach my $cdata (@{$conditions{'cdata'}}) { + my $flag=0; + my $match=$cdata; + if ($match=~/^\!/) { + $match=~s/^\!//g; + $flag=1; } - $cm=~s/^.//; $cm=~s/.$//; - if ($not and $cdata=~/$cm/) { - $ign=1; $exclude=0; + $match=~s/^\///g; + $match=~s/\/$//g; + if ((!$flag and $Ttoken=~/$match/) or + ($flag and $Ttoken!~/$match/)) { + $cdataflag=1; } - if ((!$not and $cdata!~/$cm/) - or ($not and $cdata=~/$cm/)) { -# nothing happens -# $exclude=0; + } + if (@{$conditions{'cdata'}}) { + if ($cdataflag) { + return 0; } - elsif (($not and $cdata!~/$cm/) - or (!$not and $cdata=~/$cm/)) { - $exclude++ unless $ign; + } + else { + if ($nameflag) { + return 0; } } + $nameflag=0; } } } - if ($eh{$token->[1]}) { - $ign=0; - $echild=0; - if (!$exclude and !$excludea) { - print $tr; -# print $token->[2]; - $tr=''; - } - elsif ($exclude>0 or $excludea>0) { -# print "EXCLUDING $token->[1] $exclude $excludea $excluden\n"; - $exclude=0; $excluden=0; $excludea=0; - $tr=''; - } - $exclude=0; $excluden=0; $excludea=0; - } - else { - if ($echild) { -# $tr.=$token->[2]; + elsif ($token->[0] eq 'T') { + if ($nameflag) { + $Ttoken.=$token->[1]; } - else { - print $token->[2]; - $tr=''; - } - } - $hloc--; - } - elsif ($token->[0] eq 'T') { - if ($echild) { - $tr.=$token->[1]; - $cdata=$token->[1]; - } - else { - print $token->[1]; - $tr=''; } } + return 1; } # ------------------------------------------------------------ clear conditions @@ -230,6 +221,7 @@ sub cc { @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; + %eh=(1,1); delete $eh{1}; } # --------------------------------------- remove starting and ending whitespace @@ -239,35 +231,29 @@ sub trim { + # --------------------------------------------------------- Format xfml section sub format_xfml { my (@tokeninfo)=@_; return ''; } +# ------------------------------------------------------- Format clause section +sub format_clause { + my (@tokeninfo)=@_; + return ''; +} + # ---------------------------------------------------- Format when:name section sub format_when_name { my (@tokeninfo)=@_; - $wloc++; +# $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'name'}},$att_match; my $text=&trim($parser->get_text('/when:name')); $parser->get_tag('/when:name'); - $wloc--; - &cc unless $wloc; - return ''; -} - -# ----------------------------------------------- Format when:attribute section -sub format_when_attribute { - my (@tokeninfo)=@_; - $wloc++; - my $att_match=$tokeninfo[2]->{'match'}; - push @{$conditions{'attribute'}},$att_match; - my $text=&trim($parser->get_text('/when:attribute')); - $parser->get_tag('/when:attribute'); - $wloc--; - &cc unless $wloc; +# $wloc--; +# &cc unless $wloc; return ''; } @@ -280,16 +266,7 @@ sub format_when_cdata { my $text=&trim($parser->get_text('/when:cdata')); $parser->get_tag('/when:cdata'); $wloc--; - &cc unless $wloc; - return ''; -} - -# ----------------------------------------------- Format choice:include section -sub format_choice_include { - my (@tokeninfo)=@_; - my $text=&trim($parser->get_text('/choice:include')); - $parser->get_tag('/choice:include'); - $ih{$tokeninfo[2]->{'match'}}++; +# &cc unless $wloc; return ''; }