File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.7: download - view: text, annotated - select for diffs
Wed May 22 17:07:50 2002 UTC (21 years, 11 months ago) by harris41
Branches: MAIN
CVS tags: version_2_9_X, version_2_9_99_0, version_2_9_1, version_2_9_0, version_2_8_X, version_2_8_99_1, version_2_8_99_0, version_2_8_2, version_2_8_1, version_2_8_0, version_2_7_X, version_2_7_99_1, version_2_7_99_0, version_2_7_1, version_2_7_0, version_2_6_X, version_2_6_99_1, version_2_6_99_0, version_2_6_3, version_2_6_2, version_2_6_1, version_2_6_0, version_2_5_X, version_2_5_99_1, version_2_5_99_0, version_2_5_2, version_2_5_1, version_2_5_0, version_2_4_X, version_2_4_99_0, version_2_4_2, version_2_4_1, version_2_4_0, version_2_3_X, version_2_3_99_0, version_2_3_2, version_2_3_1, version_2_3_0, version_2_2_X, version_2_2_99_1, version_2_2_99_0, version_2_2_2, version_2_2_1, version_2_2_0, version_2_1_X, version_2_1_99_3, version_2_1_99_2, version_2_1_99_1, version_2_1_99_0, version_2_1_3, version_2_1_2, version_2_1_1, version_2_1_0, version_2_12_X, version_2_11_X, version_2_11_4_uiuc, version_2_11_4_msu, version_2_11_4, version_2_11_3_uiuc, version_2_11_3_msu, version_2_11_3, version_2_11_2_uiuc, version_2_11_2_msu, version_2_11_2_educog, version_2_11_2, version_2_11_1, version_2_11_0_RC3, version_2_11_0_RC2, version_2_11_0_RC1, version_2_11_0, version_2_10_X, version_2_10_1, version_2_10_0_RC2, version_2_10_0_RC1, version_2_10_0, version_2_0_X, version_2_0_99_1, version_2_0_2, version_2_0_1, version_2_0_0, version_1_99_3, version_1_99_2, version_1_99_1_tmcc, version_1_99_1, version_1_99_0_tmcc, version_1_99_0, version_1_3_X, version_1_3_3, version_1_3_2, version_1_3_1, version_1_3_0, version_1_2_X, version_1_2_99_1, version_1_2_99_0, version_1_2_1, version_1_2_0, version_1_1_X, version_1_1_99_5, version_1_1_99_4, version_1_1_99_3, version_1_1_99_2, version_1_1_99_1, version_1_1_99_0, version_1_1_3, version_1_1_2, version_1_1_1, version_1_1_0, version_1_0_99_3, version_1_0_99_2, version_1_0_99_1, version_1_0_99, version_1_0_3, version_1_0_2, version_1_0_1, version_1_0_0, version_0_99_5, version_0_99_4, version_0_99_3, version_0_99_2, version_0_99_1, version_0_99_0, version_0_6_2, version_0_6, version_0_5_1, version_0_5, version_0_4, stable_2002_july, loncapaMITrelate_1, language_hyphenation_merge, language_hyphenation, conference_2003, bz6209-base, bz6209, bz5969, bz5610, bz2851, STABLE, PRINT_INCOMPLETE_base, PRINT_INCOMPLETE, HEAD, GCI_3, GCI_2, GCI_1, BZ5971-printing-apage, BZ5434-fox, BZ4492-merge, BZ4492-feature_horizontal_radioresponse, BZ4492-feature_Support_horizontal_radioresponse, BZ4492-Support_horizontal_radioresponse
xfml_parse.pl already has been well-tested.  the changes in 1.6 are incorrect
and are now reversed.

    1: #!/usr/bin/perl
    2: 
    3: # -------------------------------------------------------- Documentation notice
    4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
    5: # documentation internalized in this program.
    6: 
    7: # --------------------------------------------------------- License Information
    8: # The LearningOnline Network with CAPA
    9: # piml_parse.pl - Linux Packaging Markup Language parser
   10: #
   11: # $Id: xfml_parse.pl,v 1.7 2002/05/22 17:07:50 harris41 Exp $
   12: #
   13: # Written by Scott Harrison, codeharrison@yahoo.com
   14: #
   15: # Copyright Michigan State University Board of Trustees
   16: #
   17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   18: #
   19: # LON-CAPA is free software; you can redistribute it and/or modify
   20: # it under the terms of the GNU General Public License as published by
   21: # the Free Software Foundation; either version 2 of the License, or
   22: # (at your option) any later version.
   23: #
   24: # LON-CAPA is distributed in the hope that it will be useful,
   25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   27: # GNU General Public License for more details.
   28: #
   29: # You should have received a copy of the GNU General Public License
   30: # along with LON-CAPA; if not, write to the Free Software
   31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   32: #
   33: # /home/httpd/html/adm/gpl.txt
   34: #
   35: # http://www.lon-capa.org/
   36: #
   37: # YEAR=2002
   38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
   39: #
   40: ###
   41: 
   42: # Read in 2 XML file; first is the filter specification, the second
   43: # is the XML file to be filtered
   44: 
   45: ###############################################################################
   46: ##                                                                           ##
   47: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   48: ## 1. Notes                                                                  ##
   49: ## 2. Read in filter file                                                    ##
   50: ## 3. Initialize and clear conditions                                        ##
   51: ## 4. Run through and apply clauses                                          ##
   52: ##                                                                           ##
   53: ###############################################################################
   54: 
   55: # ----------------------------------------------------------------------- Notes
   56: #
   57: # This is meant to parse files meeting the xfml document type.
   58: # See xfml.dtd.  XFML=XML Filtering Markup Language.
   59: 
   60: use HTML::TokeParser;
   61: use strict;
   62: 
   63: unless (@ARGV) {
   64:     print(<<END);
   65: Incorrect invocation.
   66: Example usages:
   67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
   68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
   69: END
   70: }
   71: 
   72: my %eh;
   73: 
   74: # ---------------------------------------------- Read in filter file from @ARGV
   75: my $tofilter=shift @ARGV;
   76: open(IN,"<$tofilter"); my @lines=<IN>;
   77: my $parsestring=join('',@lines); undef @lines; close IN;
   78: my $parser = HTML::TokeParser->new(\$parsestring) or
   79:     die('can\'t create TokeParser object');
   80: $parser->xml_mode('1');
   81: 
   82: # --------------------------------------------- initialize and clear conditions
   83: my %conditions; &cc;
   84: 
   85: # Define handling methods for mode-dependent text rendering
   86: $parser->{textify}={
   87:     'xfml' => \&format_xfml,
   88:     'when:name' => \&format_when_name,
   89:     'when:attribute' => \&format_when_attribute,
   90:     'when:cdata' => \&format_when_cdata,
   91:     'choice:exclude' => \&format_choice_exclude,
   92:     'clause' => \&format_clause,
   93:     };
   94: 
   95: my $text;
   96: my $xfml;
   97: my $wloc=0;
   98: my %eha;
   99: 
  100: # ----------------------------------------------- Run through and apply clauses
  101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
  102: my $lparser = HTML::TokeParser->new(\$output) or
  103:     die('can\'t create TokeParser object');
  104: $lparser->xml_mode('1');
  105: my $parsestring2;
  106: while (my $token = $parser->get_tag('clause')) {
  107:     $parsestring2=$output;
  108:     $lparser = HTML::TokeParser->new(\$parsestring2);
  109:     $lparser->xml_mode('1');
  110:     $output='';
  111:     &format_clause(@{$token});
  112:     $text = $parser->get_text('/clause');
  113:     $token = $parser->get_tag('/clause');
  114: 
  115:     my $token='';
  116:     my $ttype='';
  117:     my $excludeflag=0;
  118:     my $outcache='';
  119:     while ($token = $lparser->get_token()) {
  120: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
  121: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
  122: 	elsif ($token->[0] eq 'T') {
  123: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
  124: 		or $ttype eq 'E') {
  125: 		$output.=$token->[1];
  126: 	    }
  127: 	    else {
  128: 		$outcache.=$token->[1];
  129: 	    }
  130: 	}
  131: 	elsif ($token->[0] eq 'S') {
  132: 	    if ($eh{$token->[1]} or $excludeflag==1) {
  133: 		$ttype='';
  134: 		$excludeflag=1;
  135: 		$outcache.=$token->[4];
  136: 	    }
  137: 	    else {
  138: 		$ttype='S';
  139: 		$output.=$token->[4];
  140: 	    }
  141: 	    if ($excludeflag==1) {
  142: 		
  143: 	    }
  144: 	}
  145: 	elsif ($token->[0] eq 'E') {
  146: 	    if ($eh{$token->[1]} and $excludeflag==1) {
  147: 		$ttype='E';
  148: 		$excludeflag=0;
  149: 		$outcache.=$token->[2];
  150: 		my $retval=&evalconditions($outcache);
  151: 		if (&evalconditions($outcache)) {
  152: 		    $output.=$outcache;
  153: 		}
  154: 		else {
  155: 		    $output.='<!-- FILTERED OUT -->';
  156: 		}
  157: 		$outcache='';
  158: 	    }
  159: 	    elsif ($excludeflag==1) {
  160: 		$ttype='';
  161: 		$outcache.=$token->[2];
  162: 	    }
  163: 	    else {
  164: 		$output.=$token->[2];
  165: 		$ttype='E';
  166: 	    }
  167: 	}
  168:     }
  169:     &cc;
  170: }
  171: print $output;
  172: 
  173: # -------------------------------------------------------------- evalconditions
  174: sub evalconditions {
  175:     my ($parsetext)=@_;
  176:     my $eparser = HTML::TokeParser->new(\$parsetext);
  177:     unless (@{$conditions{'name'}} or
  178: 	    @{$conditions{'attribute'}}) {
  179: 	return 0;
  180:     }
  181:     my $nameflag=0;
  182:     my $cdataflag=0;
  183:     my $matchflag=0;
  184:     my $Ttoken='';
  185:     while (my $token = $eparser->get_token()) {
  186: 	if ($token->[0] eq 'S') {
  187: 	    foreach my $name (@{$conditions{'name'}}) {
  188: 		my $flag=0;
  189: 		my $match=$name;
  190: 		if ($match=~/^\!/) {
  191: 		    $match=~s/^\!//g;
  192: 		    $flag=1;
  193: 		}
  194: 		$match=~s/^\///g;
  195: 		$match=~s/\/$//g;
  196: 		if ((!$flag and $token->[1]=~/$match/) or
  197: 		    ($flag and $token->[1]!~/$match/)) {
  198: 		    $nameflag=1;
  199: 		}
  200: 	    }
  201: 	    $Ttoken='';
  202: 	}
  203: 	elsif ($token->[0] eq 'E') {
  204: 	    foreach my $name (@{$conditions{'name'}}) {
  205: 		my $flag=0;
  206: 		my $match=$name;
  207: 		if ($match=~/^\!/) {
  208: 		    $match=~s/^\!//g;
  209: 		    $flag=1;
  210: 		}
  211: 		$match=~s/^\///g;
  212: 		$match=~s/\/$//g;
  213: 		if ((!$flag and $token->[1]=~/$match/) or
  214: 		    ($flag and $token->[1]!~/$match/)) {
  215: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
  216: 			my $flag=0;
  217: 			my $match=$cdata;
  218: 			if ($match=~/^\!/) {
  219: 			    $match=~s/^\!//g;
  220: 			    $flag=1;
  221: 			}
  222: 			$match=~s/^\///g;
  223: 			$match=~s/\/$//g;
  224: 			if ((!$flag and $Ttoken=~/$match/) or
  225: 			    ($flag and $Ttoken!~/$match/)) {
  226: 			    $cdataflag=1;
  227: 			}
  228: 		    }
  229: 		    if (@{$conditions{'cdata'}}) {
  230: 			if ($cdataflag) {
  231: 			    return 0;
  232: 			}
  233: 		    }
  234: 		    else {
  235: 			if ($nameflag) {
  236: 			    return 0;
  237: 			}
  238: 		    }
  239: 		    $nameflag=0;
  240: 		}
  241: 	    }
  242: 	}
  243: 	elsif ($token->[0] eq 'T') {
  244: 	    if ($nameflag) {
  245: 		$Ttoken.=$token->[1];
  246: 	    }
  247: 	}
  248:     }
  249:     return 1;
  250: }
  251: 
  252: # ------------------------------------------------------------ clear conditions
  253: sub cc {
  254:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
  255:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
  256:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
  257:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
  258:     %eh=(1,1); delete $eh{1};
  259: }
  260: 
  261: # --------------------------------------- remove starting and ending whitespace
  262: sub trim {
  263:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
  264: }
  265: 
  266: 
  267: 
  268: 
  269: # --------------------------------------------------------- Format xfml section
  270: sub format_xfml {
  271:     my (@tokeninfo)=@_;
  272:     return '';
  273: }
  274: 
  275: # ------------------------------------------------------- Format clause section
  276: sub format_clause {
  277:     my (@tokeninfo)=@_;
  278:     return '';
  279: }
  280: 
  281: # ---------------------------------------------------- Format when:name section
  282: sub format_when_name {
  283:     my (@tokeninfo)=@_;
  284: #    $wloc++;
  285:     my $att_match=$tokeninfo[2]->{'match'};
  286:     push @{$conditions{'name'}},$att_match;
  287:     my $text=&trim($parser->get_text('/when:name'));
  288:     $parser->get_tag('/when:name');
  289: #    $wloc--;
  290: #    &cc unless $wloc;
  291:     return '';
  292: }
  293: 
  294: # --------------------------------------------------- Format when:cdata section
  295: sub format_when_cdata {
  296:     my (@tokeninfo)=@_;
  297:     $wloc++;
  298:     my $att_match=$tokeninfo[2]->{'match'};
  299:     push @{$conditions{'cdata'}},$att_match;
  300:     my $text=&trim($parser->get_text('/when:cdata'));
  301:     $parser->get_tag('/when:cdata');
  302:     $wloc--;
  303: #    &cc unless $wloc;
  304:     return '';
  305: }
  306: 
  307: # ----------------------------------------------- Format choice:exclude section
  308: sub format_choice_exclude {
  309:     my (@tokeninfo)=@_;
  310:     my $text=&trim($parser->get_text('/choice:exclude'));
  311:     $parser->get_tag('/choice:exclude');
  312:     $eh{$tokeninfo[2]->{'nodename'}}++;
  313:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
  314:          [@{$conditions{'name'}}];
  315:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
  316:          [@{$conditions{'attribute'}}];
  317:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
  318:          [@{$conditions{'value'}}];
  319:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
  320:          [@{$conditions{'cdata'}}];
  321:     return '';
  322: }
  323: 
  324: # ----------------------------------- POD (plain old documentation, CPAN style)
  325: 
  326: =pod
  327: 
  328: =head1 NAME
  329: 
  330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
  331: 
  332: =head1 SYNOPSIS
  333: 
  334: Usage is for lpml file to come in through standard input.
  335: 
  336: =over 4
  337: 
  338: =item * 
  339: 
  340: 1st argument is name of xfml file.
  341: 
  342: =back
  343: 
  344: Example:
  345: 
  346:  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
  347: 
  348: or
  349: 
  350:  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
  351: 
  352: =head1 DESCRIPTION
  353: 
  354: I am using a multiple pass-through approach to parsing
  355: the xfml file.  This saves memory and makes sure the server
  356: will never be overloaded.
  357: 
  358: =head1 README
  359: 
  360: I am using a multiple pass-through approach to parsing
  361: the xfml file.  This saves memory and makes sure the server
  362: will never be overloaded.
  363: 
  364: =head1 PREREQUISITES
  365: 
  366: HTML::TokeParser
  367: 
  368: =head1 COREQUISITES
  369: 
  370: =head1 OSNAMES
  371: 
  372: linux
  373: 
  374: =head1 SCRIPT CATEGORIES
  375: 
  376: Packaging/Administrative
  377: 
  378: =head1 AUTHOR
  379: 
  380:  Scott Harrison
  381:  codeharrison@yahoo.com
  382: 
  383: Please let me know how/if you are finding this script useful and
  384: any/all suggestions.  -Scott
  385: 
  386: =cut
  387: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>