Annotation of loncom/build/xfml_parse.pl, revision 1.7

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.4       harris41    3: # -------------------------------------------------------- Documentation notice
                      4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
                      5: # documentation internalized in this program.
                      6: 
                      7: # --------------------------------------------------------- License Information
                      8: # The LearningOnline Network with CAPA
                      9: # piml_parse.pl - Linux Packaging Markup Language parser
                     10: #
1.6       matthew    11: # $Id: xfml_parse.pl,v 1.5 2002/04/08 12:51:03 harris41 Exp $
1.4       harris41   12: #
                     13: # Written by Scott Harrison, codeharrison@yahoo.com
                     14: #
                     15: # Copyright Michigan State University Board of Trustees
                     16: #
                     17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     18: #
                     19: # LON-CAPA is free software; you can redistribute it and/or modify
                     20: # it under the terms of the GNU General Public License as published by
                     21: # the Free Software Foundation; either version 2 of the License, or
                     22: # (at your option) any later version.
                     23: #
                     24: # LON-CAPA is distributed in the hope that it will be useful,
                     25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     27: # GNU General Public License for more details.
                     28: #
                     29: # You should have received a copy of the GNU General Public License
                     30: # along with LON-CAPA; if not, write to the Free Software
                     31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     32: #
                     33: # /home/httpd/html/adm/gpl.txt
                     34: #
                     35: # http://www.lon-capa.org/
                     36: #
1.1       harris41   37: # YEAR=2002
1.4       harris41   38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
1.2       harris41   39: #
                     40: ###
1.1       harris41   41: 
                     42: # Read in 2 XML file; first is the filter specification, the second
                     43: # is the XML file to be filtered
                     44: 
1.2       harris41   45: ###############################################################################
                     46: ##                                                                           ##
                     47: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     48: ## 1. Notes                                                                  ##
1.3       harris41   49: ## 2. Read in filter file                                                    ##
                     50: ## 3. Initialize and clear conditions                                        ##
                     51: ## 4. Run through and apply clauses                                          ##
1.2       harris41   52: ##                                                                           ##
                     53: ###############################################################################
                     54: 
                     55: # ----------------------------------------------------------------------- Notes
                     56: #
1.3       harris41   57: # This is meant to parse files meeting the xfml document type.
1.2       harris41   58: # See xfml.dtd.  XFML=XML Filtering Markup Language.
                     59: 
1.1       harris41   60: use HTML::TokeParser;
                     61: use strict;
                     62: 
                     63: unless (@ARGV) {
1.4       harris41   64:     print(<<END);
1.1       harris41   65: Incorrect invocation.
                     66: Example usages:
                     67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                     68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                     69: END
                     70: }
                     71: 
                     72: my %eh;
1.3       harris41   73: 
                     74: # ---------------------------------------------- Read in filter file from @ARGV
1.1       harris41   75: my $tofilter=shift @ARGV;
1.4       harris41   76: open(IN,"<$tofilter"); my @lines=<IN>;
1.3       harris41   77: my $parsestring=join('',@lines); undef @lines; close IN;
1.1       harris41   78: my $parser = HTML::TokeParser->new(\$parsestring) or
                     79:     die('can\'t create TokeParser object');
                     80: $parser->xml_mode('1');
                     81: 
1.3       harris41   82: # --------------------------------------------- initialize and clear conditions
1.1       harris41   83: my %conditions; &cc;
                     84: 
1.3       harris41   85: # Define handling methods for mode-dependent text rendering
1.1       harris41   86: $parser->{textify}={
1.3       harris41   87:     'xfml' => \&format_xfml,
1.1       harris41   88:     'when:name' => \&format_when_name,
                     89:     'when:attribute' => \&format_when_attribute,
                     90:     'when:cdata' => \&format_when_cdata,
                     91:     'choice:exclude' => \&format_choice_exclude,
1.3       harris41   92:     'clause' => \&format_clause,
1.1       harris41   93:     };
                     94: 
                     95: my $text;
                     96: my $xfml;
                     97: my $wloc=0;
                     98: my %eha;
                     99: 
1.3       harris41  100: # ----------------------------------------------- Run through and apply clauses
                    101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
                    102: my $lparser = HTML::TokeParser->new(\$output) or
                    103:     die('can\'t create TokeParser object');
                    104: $lparser->xml_mode('1');
                    105: my $parsestring2;
                    106: while (my $token = $parser->get_tag('clause')) {
                    107:     $parsestring2=$output;
                    108:     $lparser = HTML::TokeParser->new(\$parsestring2);
                    109:     $lparser->xml_mode('1');
                    110:     $output='';
                    111:     &format_clause(@{$token});
                    112:     $text = $parser->get_text('/clause');
                    113:     $token = $parser->get_tag('/clause');
                    114: 
                    115:     my $token='';
                    116:     my $ttype='';
                    117:     my $excludeflag=0;
                    118:     my $outcache='';
                    119:     while ($token = $lparser->get_token()) {
                    120: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
                    121: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
                    122: 	elsif ($token->[0] eq 'T') {
                    123: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
                    124: 		or $ttype eq 'E') {
                    125: 		$output.=$token->[1];
                    126: 	    }
                    127: 	    else {
                    128: 		$outcache.=$token->[1];
                    129: 	    }
                    130: 	}
                    131: 	elsif ($token->[0] eq 'S') {
                    132: 	    if ($eh{$token->[1]} or $excludeflag==1) {
                    133: 		$ttype='';
                    134: 		$excludeflag=1;
                    135: 		$outcache.=$token->[4];
                    136: 	    }
                    137: 	    else {
                    138: 		$ttype='S';
                    139: 		$output.=$token->[4];
                    140: 	    }
                    141: 	    if ($excludeflag==1) {
                    142: 		
                    143: 	    }
                    144: 	}
                    145: 	elsif ($token->[0] eq 'E') {
                    146: 	    if ($eh{$token->[1]} and $excludeflag==1) {
                    147: 		$ttype='E';
                    148: 		$excludeflag=0;
                    149: 		$outcache.=$token->[2];
1.7     ! harris41  150: 		my $retval=&evalconditions($outcache);
1.3       harris41  151: 		if (&evalconditions($outcache)) {
1.7     ! harris41  152: 		    $output.=$outcache;
1.3       harris41  153: 		}
                    154: 		else {
1.7     ! harris41  155: 		    $output.='<!-- FILTERED OUT -->';
1.3       harris41  156: 		}
                    157: 		$outcache='';
                    158: 	    }
                    159: 	    elsif ($excludeflag==1) {
                    160: 		$ttype='';
                    161: 		$outcache.=$token->[2];
                    162: 	    }
                    163: 	    else {
                    164: 		$output.=$token->[2];
                    165: 		$ttype='E';
                    166: 	    }
                    167: 	}
                    168:     }
                    169:     &cc;
1.1       harris41  170: }
1.3       harris41  171: print $output;
1.1       harris41  172: 
1.3       harris41  173: # -------------------------------------------------------------- evalconditions
                    174: sub evalconditions {
                    175:     my ($parsetext)=@_;
                    176:     my $eparser = HTML::TokeParser->new(\$parsetext);
                    177:     unless (@{$conditions{'name'}} or
                    178: 	    @{$conditions{'attribute'}}) {
1.7     ! harris41  179: 	return 0;
1.1       harris41  180:     }
1.3       harris41  181:     my $nameflag=0;
                    182:     my $cdataflag=0;
                    183:     my $matchflag=0;
                    184:     my $Ttoken='';
                    185:     while (my $token = $eparser->get_token()) {
                    186: 	if ($token->[0] eq 'S') {
                    187: 	    foreach my $name (@{$conditions{'name'}}) {
                    188: 		my $flag=0;
                    189: 		my $match=$name;
                    190: 		if ($match=~/^\!/) {
                    191: 		    $match=~s/^\!//g;
                    192: 		    $flag=1;
                    193: 		}
                    194: 		$match=~s/^\///g;
                    195: 		$match=~s/\/$//g;
                    196: 		if ((!$flag and $token->[1]=~/$match/) or
                    197: 		    ($flag and $token->[1]!~/$match/)) {
                    198: 		    $nameflag=1;
1.1       harris41  199: 		}
                    200: 	    }
1.3       harris41  201: 	    $Ttoken='';
1.1       harris41  202: 	}
1.3       harris41  203: 	elsif ($token->[0] eq 'E') {
                    204: 	    foreach my $name (@{$conditions{'name'}}) {
                    205: 		my $flag=0;
                    206: 		my $match=$name;
                    207: 		if ($match=~/^\!/) {
                    208: 		    $match=~s/^\!//g;
                    209: 		    $flag=1;
                    210: 		}
                    211: 		$match=~s/^\///g;
                    212: 		$match=~s/\/$//g;
                    213: 		if ((!$flag and $token->[1]=~/$match/) or
                    214: 		    ($flag and $token->[1]!~/$match/)) {
                    215: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
                    216: 			my $flag=0;
                    217: 			my $match=$cdata;
                    218: 			if ($match=~/^\!/) {
                    219: 			    $match=~s/^\!//g;
                    220: 			    $flag=1;
1.1       harris41  221: 			}
1.3       harris41  222: 			$match=~s/^\///g;
                    223: 			$match=~s/\/$//g;
                    224: 			if ((!$flag and $Ttoken=~/$match/) or
                    225: 			    ($flag and $Ttoken!~/$match/)) {
                    226: 			    $cdataflag=1;
1.2       harris41  227: 			}
1.3       harris41  228: 		    }
                    229: 		    if (@{$conditions{'cdata'}}) {
                    230: 			if ($cdataflag) {
                    231: 			    return 0;
1.1       harris41  232: 			}
1.3       harris41  233: 		    }
                    234: 		    else {
                    235: 			if ($nameflag) {
                    236: 			    return 0;
1.1       harris41  237: 			}
                    238: 		    }
1.3       harris41  239: 		    $nameflag=0;
1.1       harris41  240: 		}
                    241: 	    }
                    242: 	}
1.3       harris41  243: 	elsif ($token->[0] eq 'T') {
                    244: 	    if ($nameflag) {
                    245: 		$Ttoken.=$token->[1];
1.1       harris41  246: 	    }
                    247: 	}
                    248:     }
1.3       harris41  249:     return 1;
1.1       harris41  250: }
                    251: 
                    252: # ------------------------------------------------------------ clear conditions
                    253: sub cc {
                    254:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
                    255:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
                    256:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
                    257:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
1.3       harris41  258:     %eh=(1,1); delete $eh{1};
1.1       harris41  259: }
                    260: 
                    261: # --------------------------------------- remove starting and ending whitespace
                    262: sub trim {
                    263:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
                    264: }
                    265: 
1.2       harris41  266: 
                    267: 
1.3       harris41  268: 
1.1       harris41  269: # --------------------------------------------------------- Format xfml section
                    270: sub format_xfml {
                    271:     my (@tokeninfo)=@_;
                    272:     return '';
                    273: }
                    274: 
1.3       harris41  275: # ------------------------------------------------------- Format clause section
                    276: sub format_clause {
                    277:     my (@tokeninfo)=@_;
                    278:     return '';
                    279: }
                    280: 
1.1       harris41  281: # ---------------------------------------------------- Format when:name section
                    282: sub format_when_name {
                    283:     my (@tokeninfo)=@_;
1.3       harris41  284: #    $wloc++;
1.1       harris41  285:     my $att_match=$tokeninfo[2]->{'match'};
                    286:     push @{$conditions{'name'}},$att_match;
                    287:     my $text=&trim($parser->get_text('/when:name'));
                    288:     $parser->get_tag('/when:name');
1.3       harris41  289: #    $wloc--;
                    290: #    &cc unless $wloc;
1.1       harris41  291:     return '';
                    292: }
                    293: 
                    294: # --------------------------------------------------- Format when:cdata section
                    295: sub format_when_cdata {
                    296:     my (@tokeninfo)=@_;
                    297:     $wloc++;
                    298:     my $att_match=$tokeninfo[2]->{'match'};
                    299:     push @{$conditions{'cdata'}},$att_match;
                    300:     my $text=&trim($parser->get_text('/when:cdata'));
                    301:     $parser->get_tag('/when:cdata');
                    302:     $wloc--;
1.3       harris41  303: #    &cc unless $wloc;
1.1       harris41  304:     return '';
                    305: }
                    306: 
                    307: # ----------------------------------------------- Format choice:exclude section
                    308: sub format_choice_exclude {
                    309:     my (@tokeninfo)=@_;
                    310:     my $text=&trim($parser->get_text('/choice:exclude'));
                    311:     $parser->get_tag('/choice:exclude');
                    312:     $eh{$tokeninfo[2]->{'nodename'}}++;
                    313:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
                    314:          [@{$conditions{'name'}}];
                    315:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
                    316:          [@{$conditions{'attribute'}}];
                    317:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
                    318:          [@{$conditions{'value'}}];
                    319:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
                    320:          [@{$conditions{'cdata'}}];
                    321:     return '';
                    322: }
1.4       harris41  323: 
                    324: # ----------------------------------- POD (plain old documentation, CPAN style)
                    325: 
                    326: =pod
                    327: 
                    328: =head1 NAME
                    329: 
1.5       harris41  330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
1.4       harris41  331: 
                    332: =head1 SYNOPSIS
                    333: 
                    334: Usage is for lpml file to come in through standard input.
                    335: 
                    336: =over 4
                    337: 
                    338: =item * 
                    339: 
                    340: 1st argument is name of xfml file.
                    341: 
                    342: =back
                    343: 
                    344: Example:
                    345: 
                    346:  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                    347: 
                    348: or
                    349: 
                    350:  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                    351: 
                    352: =head1 DESCRIPTION
                    353: 
                    354: I am using a multiple pass-through approach to parsing
                    355: the xfml file.  This saves memory and makes sure the server
                    356: will never be overloaded.
                    357: 
                    358: =head1 README
                    359: 
                    360: I am using a multiple pass-through approach to parsing
                    361: the xfml file.  This saves memory and makes sure the server
                    362: will never be overloaded.
                    363: 
                    364: =head1 PREREQUISITES
                    365: 
                    366: HTML::TokeParser
                    367: 
                    368: =head1 COREQUISITES
                    369: 
                    370: =head1 OSNAMES
                    371: 
                    372: linux
                    373: 
                    374: =head1 SCRIPT CATEGORIES
                    375: 
                    376: Packaging/Administrative
                    377: 
                    378: =head1 AUTHOR
                    379: 
                    380:  Scott Harrison
                    381:  codeharrison@yahoo.com
                    382: 
                    383: Please let me know how/if you are finding this script useful and
                    384: any/all suggestions.  -Scott
                    385: 
                    386: =cut
                    387: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>