Annotation of loncom/build/xfml_parse.pl, revision 1.7

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.4       harris41    3: # -------------------------------------------------------- Documentation notice
                      4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
                      5: # documentation internalized in this program.
                      6: 
                      7: # --------------------------------------------------------- License Information
                      8: # The LearningOnline Network with CAPA
                      9: # piml_parse.pl - Linux Packaging Markup Language parser
                     10: #
1.6       matthew    11: # $Id: xfml_parse.pl,v 1.5 2002/04/08 12:51:03 harris41 Exp $
1.4       harris41   12: #
                     13: # Written by Scott Harrison, codeharrison@yahoo.com
                     14: #
                     15: # Copyright Michigan State University Board of Trustees
                     16: #
                     17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     18: #
                     19: # LON-CAPA is free software; you can redistribute it and/or modify
                     20: # it under the terms of the GNU General Public License as published by
                     21: # the Free Software Foundation; either version 2 of the License, or
                     22: # (at your option) any later version.
                     23: #
                     24: # LON-CAPA is distributed in the hope that it will be useful,
                     25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     27: # GNU General Public License for more details.
                     28: #
                     29: # You should have received a copy of the GNU General Public License
                     30: # along with LON-CAPA; if not, write to the Free Software
                     31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     32: #
                     33: # /home/httpd/html/adm/gpl.txt
                     34: #
                     35: # http://www.lon-capa.org/
                     36: #
1.1       harris41   37: # YEAR=2002
1.4       harris41   38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
1.2       harris41   39: #
                     40: ###
1.1       harris41   41: 
                     42: # Read in 2 XML file; first is the filter specification, the second
                     43: # is the XML file to be filtered
                     44: 
1.2       harris41   45: ###############################################################################
                     46: ##                                                                           ##
                     47: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     48: ## 1. Notes                                                                  ##
1.3       harris41   49: ## 2. Read in filter file                                                    ##
                     50: ## 3. Initialize and clear conditions                                        ##
                     51: ## 4. Run through and apply clauses                                          ##
1.2       harris41   52: ##                                                                           ##
                     53: ###############################################################################
                     54: 
                     55: # ----------------------------------------------------------------------- Notes
                     56: #
1.3       harris41   57: # This is meant to parse files meeting the xfml document type.
1.2       harris41   58: # See xfml.dtd.  XFML=XML Filtering Markup Language.
                     59: 
1.1       harris41   60: use HTML::TokeParser;
                     61: use strict;
                     62: 
                     63: unless (@ARGV) {
1.4       harris41   64:     print(<<END);
1.1       harris41   65: Incorrect invocation.
                     66: Example usages:
                     67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                     68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                     69: END
                     70: }
                     71: 
                     72: my %eh;
1.3       harris41   73: 
                     74: # ---------------------------------------------- Read in filter file from @ARGV
1.1       harris41   75: my $tofilter=shift @ARGV;
1.4       harris41   76: open(IN,"<$tofilter"); my @lines=<IN>;
1.3       harris41   77: my $parsestring=join('',@lines); undef @lines; close IN;
1.1       harris41   78: my $parser = HTML::TokeParser->new(\$parsestring) or
                     79:     die('can\'t create TokeParser object');
                     80: $parser->xml_mode('1');
                     81: 
1.3       harris41   82: # --------------------------------------------- initialize and clear conditions
1.1       harris41   83: my %conditions; &cc;
                     84: 
1.3       harris41   85: # Define handling methods for mode-dependent text rendering
1.1       harris41   86: $parser->{textify}={
1.3       harris41   87:     'xfml' => \&format_xfml,
1.1       harris41   88:     'when:name' => \&format_when_name,
                     89:     'when:attribute' => \&format_when_attribute,
                     90:     'when:cdata' => \&format_when_cdata,
                     91:     'choice:exclude' => \&format_choice_exclude,
1.3       harris41   92:     'clause' => \&format_clause,
1.1       harris41   93:     };
                     94: 
                     95: my $text;
                     96: my $xfml;
                     97: my $wloc=0;
                     98: my %eha;
                     99: 
1.3       harris41  100: # ----------------------------------------------- Run through and apply clauses
                    101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
                    102: my $lparser = HTML::TokeParser->new(\$output) or
                    103:     die('can\'t create TokeParser object');
                    104: $lparser->xml_mode('1');
                    105: my $parsestring2;
                    106: while (my $token = $parser->get_tag('clause')) {
                    107:     $parsestring2=$output;
                    108:     $lparser = HTML::TokeParser->new(\$parsestring2);
                    109:     $lparser->xml_mode('1');
                    110:     $output='';
                    111:     &format_clause(@{$token});
                    112:     $text = $parser->get_text('/clause');
                    113:     $token = $parser->get_tag('/clause');
                    114: 
                    115:     my $token='';
                    116:     my $ttype='';
                    117:     my $excludeflag=0;
                    118:     my $outcache='';
                    119:     while ($token = $lparser->get_token()) {
                    120: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
                    121: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
                    122: 	elsif ($token->[0] eq 'T') {
                    123: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
                    124: 		or $ttype eq 'E') {
                    125: 		$output.=$token->[1];
                    126: 	    }
                    127: 	    else {
                    128: 		$outcache.=$token->[1];
                    129: 	    }
                    130: 	}
                    131: 	elsif ($token->[0] eq 'S') {
                    132: 	    if ($eh{$token->[1]} or $excludeflag==1) {
                    133: 		$ttype='';
                    134: 		$excludeflag=1;
                    135: 		$outcache.=$token->[4];
                    136: 	    }
                    137: 	    else {
                    138: 		$ttype='S';
                    139: 		$output.=$token->[4];
                    140: 	    }
                    141: 	    if ($excludeflag==1) {
                    142: 		
                    143: 	    }
                    144: 	}
                    145: 	elsif ($token->[0] eq 'E') {
                    146: 	    if ($eh{$token->[1]} and $excludeflag==1) {
                    147: 		$ttype='E';
                    148: 		$excludeflag=0;
                    149: 		$outcache.=$token->[2];
1.7     ! harris41  150: 		my $retval=&evalconditions($outcache);
1.3       harris41  151: 		if (&evalconditions($outcache)) {
1.7     ! harris41  152: 		    $output.=$outcache;
1.3       harris41  153: 		}
                    154: 		else {
1.7     ! harris41  155: 		    $output.='<!-- FILTERED OUT -->';
1.3       harris41  156: 		}
                    157: 		$outcache='';
                    158: 	    }
                    159: 	    elsif ($excludeflag==1) {
                    160: 		$ttype='';
                    161: 		$outcache.=$token->[2];
                    162: 	    }
                    163: 	    else {
                    164: 		$output.=$token->[2];
                    165: 		$ttype='E';
                    166: 	    }
                    167: 	}
                    168:     }
                    169:     &cc;
1.1       harris41  170: }
1.3       harris41  171: print $output;
1.1       harris41  172: 
1.3       harris41  173: # -------------------------------------------------------------- evalconditions
                    174: sub evalconditions {
                    175:     my ($parsetext)=@_;
                    176:     my $eparser = HTML::TokeParser->new(\$parsetext);
                    177:     unless (@{$conditions{'name'}} or
                    178: 	    @{$conditions{'attribute'}}) {
1.7     ! harris41  179: 	return 0;
1.1       harris41  180:     }
1.3       harris41  181:     my $nameflag=0;
                    182:     my $cdataflag=0;
                    183:     my $matchflag=0;
                    184:     my $Ttoken='';
                    185:     while (my $token = $eparser->get_token()) {
                    186: 	if ($token->[0] eq 'S') {
                    187: 	    foreach my $name (@{$conditions{'name'}}) {
                    188: 		my $flag=0;
                    189: 		my $match=$name;
                    190: 		if ($match=~/^\!/) {
                    191: 		    $match=~s/^\!//g;
                    192: 		    $flag=1;
                    193: 		}
                    194: 		$match=~s/^\///g;
                    195: 		$match=~s/\/$//g;
                    196: 		if ((!$flag and $token->[1]=~/$match/) or
                    197: 		    ($flag and $token->[1]!~/$match/)) {
                    198: 		    $nameflag=1;
1.1       harris41  199: 		}
                    200: 	    }
1.3       harris41  201: 	    $Ttoken='';
1.1       harris41  202: 	}
1.3       harris41  203: 	elsif ($token->[0] eq 'E') {
                    204: 	    foreach my $name (@{$conditions{'name'}}) {
                    205: 		my $flag=0;
                    206: 		my $match=$name;
                    207: 		if ($match=~/^\!/) {
                    208: 		    $match=~s/^\!//g;
                    209: 		    $flag=1;
                    210: 		}
                    211: 		$match=~s/^\///g;
                    212: 		$match=~s/\/$//g;
                    213: 		if ((!$flag and $token->[1]=~/$match/) or
                    214: 		    ($flag and $token->[1]!~/$match/)) {
                    215: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
                    216: 			my $flag=0;
                    217: 			my $match=$cdata;
                    218: 			if ($match=~/^\!/) {
                    219: 			    $match=~s/^\!//g;
                    220: 			    $flag=1;
1.1       harris41  221: 			}
1.3       harris41  222: 			$match=~s/^\///g;
                    223: 			$match=~s/\/$//g;
                    224: 			if ((!$flag and $Ttoken=~/$match/) or
                    225: 			    ($flag and $Ttoken!~/$match/)) {
                    226: 			    $cdataflag=1;
1.2       harris41  227: 			}
1.3       harris41  228: 		    }
                    229: 		    if (@{$conditions{'cdata'}}) {
                    230: 			if ($cdataflag) {
                    231: 			    return 0;
1.1       harris41  232: 			}
1.3       harris41  233: 		    }
                    234: 		    else {
                    235: 			if ($nameflag) {
                    236: 			    return 0;
1.1       harris41  237: 			}
                    238: 		    }
1.3       harris41  239: 		    $nameflag=0;
1.1       harris41  240: 		}
                    241: 	    }
                    242: 	}
1.3       harris41  243: 	elsif ($token->[0] eq 'T') {
                    244: 	    if ($nameflag) {
                    245: 		$Ttoken.=$token->[1];
1.1       harris41  246: 	    }
                    247: 	}
                    248:     }
1.3       harris41  249:     return 1;
1.1       harris41  250: }
                    251: 
                    252: # ------------------------------------------------------------ clear conditions
                    253: sub cc {
                    254:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
                    255:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
                    256:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
                    257:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
1.3       harris41  258:     %eh=(1,1); delete $eh{1};
1.1       harris41  259: }
                    260: 
                    261: # --------------------------------------- remove starting and ending whitespace
                    262: sub trim {
                    263:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
                    264: }
                    265: 
1.2       harris41  266: 
                    267: 
1.3       harris41  268: 
1.1       harris41  269: # --------------------------------------------------------- Format xfml section
                    270: sub format_xfml {
                    271:     my (@tokeninfo)=@_;
                    272:     return '';
                    273: }
                    274: 
1.3       harris41  275: # ------------------------------------------------------- Format clause section
                    276: sub format_clause {
                    277:     my (@tokeninfo)=@_;
                    278:     return '';
                    279: }
                    280: 
1.1       harris41  281: # ---------------------------------------------------- Format when:name section
                    282: sub format_when_name {
                    283:     my (@tokeninfo)=@_;
1.3       harris41  284: #    $wloc++;
1.1       harris41  285:     my $att_match=$tokeninfo[2]->{'match'};
                    286:     push @{$conditions{'name'}},$att_match;
                    287:     my $text=&trim($parser->get_text('/when:name'));
                    288:     $parser->get_tag('/when:name');
1.3       harris41  289: #    $wloc--;
                    290: #    &cc unless $wloc;
1.1       harris41  291:     return '';
                    292: }
                    293: 
                    294: # --------------------------------------------------- Format when:cdata section
                    295: sub format_when_cdata {
                    296:     my (@tokeninfo)=@_;
                    297:     $wloc++;
                    298:     my $att_match=$tokeninfo[2]->{'match'};
                    299:     push @{$conditions{'cdata'}},$att_match;
                    300:     my $text=&trim($parser->get_text('/when:cdata'));
                    301:     $parser->get_tag('/when:cdata');
                    302:     $wloc--;
1.3       harris41  303: #    &cc unless $wloc;
1.1       harris41  304:     return '';
                    305: }
                    306: 
                    307: # ----------------------------------------------- Format choice:exclude section
                    308: sub format_choice_exclude {
                    309:     my (@tokeninfo)=@_;
                    310:     my $text=&trim($parser->get_text('/choice:exclude'));
                    311:     $parser->get_tag('/choice:exclude');
                    312:     $eh{$tokeninfo[2]->{'nodename'}}++;
                    313:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
                    314:          [@{$conditions{'name'}}];
                    315:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
                    316:          [@{$conditions{'attribute'}}];
                    317:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
                    318:          [@{$conditions{'value'}}];
                    319:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
                    320:          [@{$conditions{'cdata'}}];
                    321:     return '';
                    322: }
1.4       harris41  323: 
                    324: # ----------------------------------- POD (plain old documentation, CPAN style)
                    325: 
                    326: =pod
                    327: 
                    328: =head1 NAME
                    329: 
1.5       harris41  330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
1.4       harris41  331: 
                    332: =head1 SYNOPSIS
                    333: 
                    334: Usage is for lpml file to come in through standard input.
                    335: 
                    336: =over 4
                    337: 
                    338: =item * 
                    339: 
                    340: 1st argument is name of xfml file.
                    341: 
                    342: =back
                    343: 
                    344: Example:
                    345: 
                    346:  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
                    347: 
                    348: or
                    349: 
                    350:  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
                    351: 
                    352: =head1 DESCRIPTION
                    353: 
                    354: I am using a multiple pass-through approach to parsing
                    355: the xfml file.  This saves memory and makes sure the server
                    356: will never be overloaded.
                    357: 
                    358: =head1 README
                    359: 
                    360: I am using a multiple pass-through approach to parsing
                    361: the xfml file.  This saves memory and makes sure the server
                    362: will never be overloaded.
                    363: 
                    364: =head1 PREREQUISITES
                    365: 
                    366: HTML::TokeParser
                    367: 
                    368: =head1 COREQUISITES
                    369: 
                    370: =head1 OSNAMES
                    371: 
                    372: linux
                    373: 
                    374: =head1 SCRIPT CATEGORIES
                    375: 
                    376: Packaging/Administrative
                    377: 
                    378: =head1 AUTHOR
                    379: 
                    380:  Scott Harrison
                    381:  codeharrison@yahoo.com
                    382: 
                    383: Please let me know how/if you are finding this script useful and
                    384: any/all suggestions.  -Scott
                    385: 
                    386: =cut
                    387: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.