File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.6: download - view: text, annotated - select for diffs
Tue May 21 19:13:53 2002 UTC (22 years ago) by matthew
Branches: MAIN
CVS tags: HEAD
Changes by Guy to fix logic that was backward in two places.

    1: #!/usr/bin/perl
    2: 
    3: # -------------------------------------------------------- Documentation notice
    4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
    5: # documentation internalized in this program.
    6: 
    7: # --------------------------------------------------------- License Information
    8: # The LearningOnline Network with CAPA
    9: # piml_parse.pl - Linux Packaging Markup Language parser
   10: #
   11: # $Id: xfml_parse.pl,v 1.6 2002/05/21 19:13:53 matthew Exp $
   12: #
   13: # Written by Scott Harrison, codeharrison@yahoo.com
   14: #
   15: # Copyright Michigan State University Board of Trustees
   16: #
   17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   18: #
   19: # LON-CAPA is free software; you can redistribute it and/or modify
   20: # it under the terms of the GNU General Public License as published by
   21: # the Free Software Foundation; either version 2 of the License, or
   22: # (at your option) any later version.
   23: #
   24: # LON-CAPA is distributed in the hope that it will be useful,
   25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   27: # GNU General Public License for more details.
   28: #
   29: # You should have received a copy of the GNU General Public License
   30: # along with LON-CAPA; if not, write to the Free Software
   31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   32: #
   33: # /home/httpd/html/adm/gpl.txt
   34: #
   35: # http://www.lon-capa.org/
   36: #
   37: # YEAR=2002
   38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
   39: #
   40: ###
   41: 
   42: # Read in 2 XML file; first is the filter specification, the second
   43: # is the XML file to be filtered
   44: 
   45: ###############################################################################
   46: ##                                                                           ##
   47: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   48: ## 1. Notes                                                                  ##
   49: ## 2. Read in filter file                                                    ##
   50: ## 3. Initialize and clear conditions                                        ##
   51: ## 4. Run through and apply clauses                                          ##
   52: ##                                                                           ##
   53: ###############################################################################
   54: 
   55: # ----------------------------------------------------------------------- Notes
   56: #
   57: # This is meant to parse files meeting the xfml document type.
   58: # See xfml.dtd.  XFML=XML Filtering Markup Language.
   59: 
   60: use HTML::TokeParser;
   61: use strict;
   62: 
   63: unless (@ARGV) {
   64:     print(<<END);
   65: Incorrect invocation.
   66: Example usages:
   67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
   68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
   69: END
   70: }
   71: 
   72: my %eh;
   73: 
   74: # ---------------------------------------------- Read in filter file from @ARGV
   75: my $tofilter=shift @ARGV;
   76: open(IN,"<$tofilter"); my @lines=<IN>;
   77: my $parsestring=join('',@lines); undef @lines; close IN;
   78: my $parser = HTML::TokeParser->new(\$parsestring) or
   79:     die('can\'t create TokeParser object');
   80: $parser->xml_mode('1');
   81: 
   82: # --------------------------------------------- initialize and clear conditions
   83: my %conditions; &cc;
   84: 
   85: # Define handling methods for mode-dependent text rendering
   86: $parser->{textify}={
   87:     'xfml' => \&format_xfml,
   88:     'when:name' => \&format_when_name,
   89:     'when:attribute' => \&format_when_attribute,
   90:     'when:cdata' => \&format_when_cdata,
   91:     'choice:exclude' => \&format_choice_exclude,
   92:     'clause' => \&format_clause,
   93:     };
   94: 
   95: my $text;
   96: my $xfml;
   97: my $wloc=0;
   98: my %eha;
   99: 
  100: # ----------------------------------------------- Run through and apply clauses
  101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
  102: my $lparser = HTML::TokeParser->new(\$output) or
  103:     die('can\'t create TokeParser object');
  104: $lparser->xml_mode('1');
  105: my $parsestring2;
  106: while (my $token = $parser->get_tag('clause')) {
  107:     $parsestring2=$output;
  108:     $lparser = HTML::TokeParser->new(\$parsestring2);
  109:     $lparser->xml_mode('1');
  110:     $output='';
  111:     &format_clause(@{$token});
  112:     $text = $parser->get_text('/clause');
  113:     $token = $parser->get_tag('/clause');
  114: 
  115:     my $token='';
  116:     my $ttype='';
  117:     my $excludeflag=0;
  118:     my $outcache='';
  119:     while ($token = $lparser->get_token()) {
  120: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
  121: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
  122: 	elsif ($token->[0] eq 'T') {
  123: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
  124: 		or $ttype eq 'E') {
  125: 		$output.=$token->[1];
  126: 	    }
  127: 	    else {
  128: 		$outcache.=$token->[1];
  129: 	    }
  130: 	}
  131: 	elsif ($token->[0] eq 'S') {
  132: 	    if ($eh{$token->[1]} or $excludeflag==1) {
  133: 		$ttype='';
  134: 		$excludeflag=1;
  135: 		$outcache.=$token->[4];
  136: 	    }
  137: 	    else {
  138: 		$ttype='S';
  139: 		$output.=$token->[4];
  140: 	    }
  141: 	    if ($excludeflag==1) {
  142: 		
  143: 	    }
  144: 	}
  145: 	elsif ($token->[0] eq 'E') {
  146: 	    if ($eh{$token->[1]} and $excludeflag==1) {
  147: 		$ttype='E';
  148: 		$excludeflag=0;
  149: 		$outcache.=$token->[2];
  150: 		if (&evalconditions($outcache)) {
  151: 		    $output.='<!-- FILTERED OUT -->';
  152: 		}
  153: 		else {
  154: 		    $output.=$outcache;
  155: 		}
  156: 		$outcache='';
  157: 	    }
  158: 	    elsif ($excludeflag==1) {
  159: 		$ttype='';
  160: 		$outcache.=$token->[2];
  161: 	    }
  162: 	    else {
  163: 		$output.=$token->[2];
  164: 		$ttype='E';
  165: 	    }
  166: 	}
  167:     }
  168:     &cc;
  169: }
  170: print $output;
  171: 
  172: # -------------------------------------------------------------- evalconditions
  173: sub evalconditions {
  174:     my ($parsetext)=@_;
  175:     my $eparser = HTML::TokeParser->new(\$parsetext);
  176:     unless (@{$conditions{'name'}} or
  177: 	    @{$conditions{'attribute'}}) {
  178: 	return 1;
  179:     }
  180:     my $nameflag=0;
  181:     my $cdataflag=0;
  182:     my $matchflag=0;
  183:     my $Ttoken='';
  184:     while (my $token = $eparser->get_token()) {
  185: 	if ($token->[0] eq 'S') {
  186: 	    foreach my $name (@{$conditions{'name'}}) {
  187: 		my $flag=0;
  188: 		my $match=$name;
  189: 		if ($match=~/^\!/) {
  190: 		    $match=~s/^\!//g;
  191: 		    $flag=1;
  192: 		}
  193: 		$match=~s/^\///g;
  194: 		$match=~s/\/$//g;
  195: 		if ((!$flag and $token->[1]=~/$match/) or
  196: 		    ($flag and $token->[1]!~/$match/)) {
  197: 		    $nameflag=1;
  198: 		}
  199: 	    }
  200: 	    $Ttoken='';
  201: 	}
  202: 	elsif ($token->[0] eq 'E') {
  203: 	    foreach my $name (@{$conditions{'name'}}) {
  204: 		my $flag=0;
  205: 		my $match=$name;
  206: 		if ($match=~/^\!/) {
  207: 		    $match=~s/^\!//g;
  208: 		    $flag=1;
  209: 		}
  210: 		$match=~s/^\///g;
  211: 		$match=~s/\/$//g;
  212: 		if ((!$flag and $token->[1]=~/$match/) or
  213: 		    ($flag and $token->[1]!~/$match/)) {
  214: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
  215: 			my $flag=0;
  216: 			my $match=$cdata;
  217: 			if ($match=~/^\!/) {
  218: 			    $match=~s/^\!//g;
  219: 			    $flag=1;
  220: 			}
  221: 			$match=~s/^\///g;
  222: 			$match=~s/\/$//g;
  223: 			if ((!$flag and $Ttoken=~/$match/) or
  224: 			    ($flag and $Ttoken!~/$match/)) {
  225: 			    $cdataflag=1;
  226: 			}
  227: 		    }
  228: 		    if (@{$conditions{'cdata'}}) {
  229: 			if ($cdataflag) {
  230: 			    return 0;
  231: 			}
  232: 		    }
  233: 		    else {
  234: 			if ($nameflag) {
  235: 			    return 0;
  236: 			}
  237: 		    }
  238: 		    $nameflag=0;
  239: 		}
  240: 	    }
  241: 	}
  242: 	elsif ($token->[0] eq 'T') {
  243: 	    if ($nameflag) {
  244: 		$Ttoken.=$token->[1];
  245: 	    }
  246: 	}
  247:     }
  248:     return 1;
  249: }
  250: 
  251: # ------------------------------------------------------------ clear conditions
  252: sub cc {
  253:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
  254:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
  255:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
  256:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
  257:     %eh=(1,1); delete $eh{1};
  258: }
  259: 
  260: # --------------------------------------- remove starting and ending whitespace
  261: sub trim {
  262:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
  263: }
  264: 
  265: 
  266: 
  267: 
  268: # --------------------------------------------------------- Format xfml section
  269: sub format_xfml {
  270:     my (@tokeninfo)=@_;
  271:     return '';
  272: }
  273: 
  274: # ------------------------------------------------------- Format clause section
  275: sub format_clause {
  276:     my (@tokeninfo)=@_;
  277:     return '';
  278: }
  279: 
  280: # ---------------------------------------------------- Format when:name section
  281: sub format_when_name {
  282:     my (@tokeninfo)=@_;
  283: #    $wloc++;
  284:     my $att_match=$tokeninfo[2]->{'match'};
  285:     push @{$conditions{'name'}},$att_match;
  286:     my $text=&trim($parser->get_text('/when:name'));
  287:     $parser->get_tag('/when:name');
  288: #    $wloc--;
  289: #    &cc unless $wloc;
  290:     return '';
  291: }
  292: 
  293: # --------------------------------------------------- Format when:cdata section
  294: sub format_when_cdata {
  295:     my (@tokeninfo)=@_;
  296:     $wloc++;
  297:     my $att_match=$tokeninfo[2]->{'match'};
  298:     push @{$conditions{'cdata'}},$att_match;
  299:     my $text=&trim($parser->get_text('/when:cdata'));
  300:     $parser->get_tag('/when:cdata');
  301:     $wloc--;
  302: #    &cc unless $wloc;
  303:     return '';
  304: }
  305: 
  306: # ----------------------------------------------- Format choice:exclude section
  307: sub format_choice_exclude {
  308:     my (@tokeninfo)=@_;
  309:     my $text=&trim($parser->get_text('/choice:exclude'));
  310:     $parser->get_tag('/choice:exclude');
  311:     $eh{$tokeninfo[2]->{'nodename'}}++;
  312:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
  313:          [@{$conditions{'name'}}];
  314:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
  315:          [@{$conditions{'attribute'}}];
  316:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
  317:          [@{$conditions{'value'}}];
  318:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
  319:          [@{$conditions{'cdata'}}];
  320:     return '';
  321: }
  322: 
  323: # ----------------------------------- POD (plain old documentation, CPAN style)
  324: 
  325: =pod
  326: 
  327: =head1 NAME
  328: 
  329: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
  330: 
  331: =head1 SYNOPSIS
  332: 
  333: Usage is for lpml file to come in through standard input.
  334: 
  335: =over 4
  336: 
  337: =item * 
  338: 
  339: 1st argument is name of xfml file.
  340: 
  341: =back
  342: 
  343: Example:
  344: 
  345:  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
  346: 
  347: or
  348: 
  349:  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
  350: 
  351: =head1 DESCRIPTION
  352: 
  353: I am using a multiple pass-through approach to parsing
  354: the xfml file.  This saves memory and makes sure the server
  355: will never be overloaded.
  356: 
  357: =head1 README
  358: 
  359: I am using a multiple pass-through approach to parsing
  360: the xfml file.  This saves memory and makes sure the server
  361: will never be overloaded.
  362: 
  363: =head1 PREREQUISITES
  364: 
  365: HTML::TokeParser
  366: 
  367: =head1 COREQUISITES
  368: 
  369: =head1 OSNAMES
  370: 
  371: linux
  372: 
  373: =head1 SCRIPT CATEGORIES
  374: 
  375: Packaging/Administrative
  376: 
  377: =head1 AUTHOR
  378: 
  379:  Scott Harrison
  380:  codeharrison@yahoo.com
  381: 
  382: Please let me know how/if you are finding this script useful and
  383: any/all suggestions.  -Scott
  384: 
  385: =cut
  386: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>