Annotation of nsdl/build/xfml_parse.pl, revision 1.1

1.1     ! harris41    1: #!/usr/bin/perl
        !             2: 
        !             3: # -------------------------------------------------------- Documentation notice
        !             4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
        !             5: # documentation internalized in this program.
        !             6: 
        !             7: # --------------------------------------------------------- License Information
        !             8: # The LearningOnline Network with CAPA
        !             9: # piml_parse.pl - Linux Packaging Markup Language parser
        !            10: #
        !            11: # $Id: xfml_parse.pl,v 1.5 2002/04/08 12:51:03 harris41 Exp $
        !            12: #
        !            13: # Written by Scott Harrison, codeharrison@yahoo.com
        !            14: #
        !            15: # Copyright Michigan State University Board of Trustees
        !            16: #
        !            17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
        !            18: #
        !            19: # LON-CAPA is free software; you can redistribute it and/or modify
        !            20: # it under the terms of the GNU General Public License as published by
        !            21: # the Free Software Foundation; either version 2 of the License, or
        !            22: # (at your option) any later version.
        !            23: #
        !            24: # LON-CAPA is distributed in the hope that it will be useful,
        !            25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            27: # GNU General Public License for more details.
        !            28: #
        !            29: # You should have received a copy of the GNU General Public License
        !            30: # along with LON-CAPA; if not, write to the Free Software
        !            31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
        !            32: #
        !            33: # /home/httpd/html/adm/gpl.txt
        !            34: #
        !            35: # http://www.lon-capa.org/
        !            36: #
        !            37: # YEAR=2002
        !            38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
        !            39: #
        !            40: ###
        !            41: 
        !            42: # Read in 2 XML file; first is the filter specification, the second
        !            43: # is the XML file to be filtered
        !            44: 
        !            45: ###############################################################################
        !            46: ##                                                                           ##
        !            47: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
        !            48: ## 1. Notes                                                                  ##
        !            49: ## 2. Read in filter file                                                    ##
        !            50: ## 3. Initialize and clear conditions                                        ##
        !            51: ## 4. Run through and apply clauses                                          ##
        !            52: ##                                                                           ##
        !            53: ###############################################################################
        !            54: 
        !            55: # ----------------------------------------------------------------------- Notes
        !            56: #
        !            57: # This is meant to parse files meeting the xfml document type.
        !            58: # See xfml.dtd.  XFML=XML Filtering Markup Language.
        !            59: 
        !            60: use HTML::TokeParser;
        !            61: use strict;
        !            62: 
        !            63: unless (@ARGV) {
        !            64:     print(<<END);
        !            65: Incorrect invocation.
        !            66: Example usages:
        !            67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
        !            68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
        !            69: END
        !            70: }
        !            71: 
        !            72: my %eh;
        !            73: 
        !            74: # ---------------------------------------------- Read in filter file from @ARGV
        !            75: my $tofilter=shift @ARGV;
        !            76: open(IN,"<$tofilter"); my @lines=<IN>;
        !            77: my $parsestring=join('',@lines); undef @lines; close IN;
        !            78: my $parser = HTML::TokeParser->new(\$parsestring) or
        !            79:     die('can\'t create TokeParser object');
        !            80: $parser->xml_mode('1');
        !            81: 
        !            82: # --------------------------------------------- initialize and clear conditions
        !            83: my %conditions; &cc;
        !            84: 
        !            85: # Define handling methods for mode-dependent text rendering
        !            86: $parser->{textify}={
        !            87:     'xfml' => \&format_xfml,
        !            88:     'when:name' => \&format_when_name,
        !            89:     'when:attribute' => \&format_when_attribute,
        !            90:     'when:cdata' => \&format_when_cdata,
        !            91:     'choice:exclude' => \&format_choice_exclude,
        !            92:     'clause' => \&format_clause,
        !            93:     };
        !            94: 
        !            95: my $text;
        !            96: my $xfml;
        !            97: my $wloc=0;
        !            98: my %eha;
        !            99: 
        !           100: # ----------------------------------------------- Run through and apply clauses
        !           101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
        !           102: my $lparser = HTML::TokeParser->new(\$output) or
        !           103:     die('can\'t create TokeParser object');
        !           104: $lparser->xml_mode('1');
        !           105: my $parsestring2;
        !           106: while (my $token = $parser->get_tag('clause')) {
        !           107:     $parsestring2=$output;
        !           108:     $lparser = HTML::TokeParser->new(\$parsestring2);
        !           109:     $lparser->xml_mode('1');
        !           110:     $output='';
        !           111:     &format_clause(@{$token});
        !           112:     $text = $parser->get_text('/clause');
        !           113:     $token = $parser->get_tag('/clause');
        !           114: 
        !           115:     my $token='';
        !           116:     my $ttype='';
        !           117:     my $excludeflag=0;
        !           118:     my $outcache='';
        !           119:     while ($token = $lparser->get_token()) {
        !           120: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
        !           121: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
        !           122: 	elsif ($token->[0] eq 'T') {
        !           123: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
        !           124: 		or $ttype eq 'E') {
        !           125: 		$output.=$token->[1];
        !           126: 	    }
        !           127: 	    else {
        !           128: 		$outcache.=$token->[1];
        !           129: 	    }
        !           130: 	}
        !           131: 	elsif ($token->[0] eq 'S') {
        !           132: 	    if ($eh{$token->[1]} or $excludeflag==1) {
        !           133: 		$ttype='';
        !           134: 		$excludeflag=1;
        !           135: 		$outcache.=$token->[4];
        !           136: 	    }
        !           137: 	    else {
        !           138: 		$ttype='S';
        !           139: 		$output.=$token->[4];
        !           140: 	    }
        !           141: 	    if ($excludeflag==1) {
        !           142: 		
        !           143: 	    }
        !           144: 	}
        !           145: 	elsif ($token->[0] eq 'E') {
        !           146: 	    if ($eh{$token->[1]} and $excludeflag==1) {
        !           147: 		$ttype='E';
        !           148: 		$excludeflag=0;
        !           149: 		$outcache.=$token->[2];
        !           150: 		my $retval=&evalconditions($outcache);
        !           151: 		if (&evalconditions($outcache)) {
        !           152: 		    $output.=$outcache;
        !           153: 		}
        !           154: 		else {
        !           155: 		    $output.='<!-- FILTERED OUT -->';
        !           156: 		}
        !           157: 		$outcache='';
        !           158: 	    }
        !           159: 	    elsif ($excludeflag==1) {
        !           160: 		$ttype='';
        !           161: 		$outcache.=$token->[2];
        !           162: 	    }
        !           163: 	    else {
        !           164: 		$output.=$token->[2];
        !           165: 		$ttype='E';
        !           166: 	    }
        !           167: 	}
        !           168:     }
        !           169:     &cc;
        !           170: }
        !           171: print $output;
        !           172: 
        !           173: # -------------------------------------------------------------- evalconditions
        !           174: sub evalconditions {
        !           175:     my ($parsetext)=@_;
        !           176:     my $eparser = HTML::TokeParser->new(\$parsetext);
        !           177:     unless (@{$conditions{'name'}} or
        !           178: 	    @{$conditions{'attribute'}}) {
        !           179: 	return 0;
        !           180:     }
        !           181:     my $nameflag=0;
        !           182:     my $cdataflag=0;
        !           183:     my $matchflag=0;
        !           184:     my $Ttoken='';
        !           185:     while (my $token = $eparser->get_token()) {
        !           186: 	if ($token->[0] eq 'S') {
        !           187: 	    foreach my $name (@{$conditions{'name'}}) {
        !           188: 		my $flag=0;
        !           189: 		my $match=$name;
        !           190: 		if ($match=~/^\!/) {
        !           191: 		    $match=~s/^\!//g;
        !           192: 		    $flag=1;
        !           193: 		}
        !           194: 		$match=~s/^\///g;
        !           195: 		$match=~s/\/$//g;
        !           196: 		if ((!$flag and $token->[1]=~/$match/) or
        !           197: 		    ($flag and $token->[1]!~/$match/)) {
        !           198: 		    $nameflag=1;
        !           199: 		}
        !           200: 	    }
        !           201: 	    $Ttoken='';
        !           202: 	}
        !           203: 	elsif ($token->[0] eq 'E') {
        !           204: 	    foreach my $name (@{$conditions{'name'}}) {
        !           205: 		my $flag=0;
        !           206: 		my $match=$name;
        !           207: 		if ($match=~/^\!/) {
        !           208: 		    $match=~s/^\!//g;
        !           209: 		    $flag=1;
        !           210: 		}
        !           211: 		$match=~s/^\///g;
        !           212: 		$match=~s/\/$//g;
        !           213: 		if ((!$flag and $token->[1]=~/$match/) or
        !           214: 		    ($flag and $token->[1]!~/$match/)) {
        !           215: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
        !           216: 			my $flag=0;
        !           217: 			my $match=$cdata;
        !           218: 			if ($match=~/^\!/) {
        !           219: 			    $match=~s/^\!//g;
        !           220: 			    $flag=1;
        !           221: 			}
        !           222: 			$match=~s/^\///g;
        !           223: 			$match=~s/\/$//g;
        !           224: 			if ((!$flag and $Ttoken=~/$match/) or
        !           225: 			    ($flag and $Ttoken!~/$match/)) {
        !           226: 			    $cdataflag=1;
        !           227: 			}
        !           228: 		    }
        !           229: 		    if (@{$conditions{'cdata'}}) {
        !           230: 			if ($cdataflag) {
        !           231: 			    return 0;
        !           232: 			}
        !           233: 		    }
        !           234: 		    else {
        !           235: 			if ($nameflag) {
        !           236: 			    return 0;
        !           237: 			}
        !           238: 		    }
        !           239: 		    $nameflag=0;
        !           240: 		}
        !           241: 	    }
        !           242: 	}
        !           243: 	elsif ($token->[0] eq 'T') {
        !           244: 	    if ($nameflag) {
        !           245: 		$Ttoken.=$token->[1];
        !           246: 	    }
        !           247: 	}
        !           248:     }
        !           249:     return 1;
        !           250: }
        !           251: 
        !           252: # ------------------------------------------------------------ clear conditions
        !           253: sub cc {
        !           254:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
        !           255:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
        !           256:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
        !           257:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
        !           258:     %eh=(1,1); delete $eh{1};
        !           259: }
        !           260: 
        !           261: # --------------------------------------- remove starting and ending whitespace
        !           262: sub trim {
        !           263:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
        !           264: }
        !           265: 
        !           266: 
        !           267: 
        !           268: 
        !           269: # --------------------------------------------------------- Format xfml section
        !           270: sub format_xfml {
        !           271:     my (@tokeninfo)=@_;
        !           272:     return '';
        !           273: }
        !           274: 
        !           275: # ------------------------------------------------------- Format clause section
        !           276: sub format_clause {
        !           277:     my (@tokeninfo)=@_;
        !           278:     return '';
        !           279: }
        !           280: 
        !           281: # ---------------------------------------------------- Format when:name section
        !           282: sub format_when_name {
        !           283:     my (@tokeninfo)=@_;
        !           284: #    $wloc++;
        !           285:     my $att_match=$tokeninfo[2]->{'match'};
        !           286:     push @{$conditions{'name'}},$att_match;
        !           287:     my $text=&trim($parser->get_text('/when:name'));
        !           288:     $parser->get_tag('/when:name');
        !           289: #    $wloc--;
        !           290: #    &cc unless $wloc;
        !           291:     return '';
        !           292: }
        !           293: 
        !           294: # --------------------------------------------------- Format when:cdata section
        !           295: sub format_when_cdata {
        !           296:     my (@tokeninfo)=@_;
        !           297:     $wloc++;
        !           298:     my $att_match=$tokeninfo[2]->{'match'};
        !           299:     push @{$conditions{'cdata'}},$att_match;
        !           300:     my $text=&trim($parser->get_text('/when:cdata'));
        !           301:     $parser->get_tag('/when:cdata');
        !           302:     $wloc--;
        !           303: #    &cc unless $wloc;
        !           304:     return '';
        !           305: }
        !           306: 
        !           307: # ----------------------------------------------- Format choice:exclude section
        !           308: sub format_choice_exclude {
        !           309:     my (@tokeninfo)=@_;
        !           310:     my $text=&trim($parser->get_text('/choice:exclude'));
        !           311:     $parser->get_tag('/choice:exclude');
        !           312:     $eh{$tokeninfo[2]->{'nodename'}}++;
        !           313:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
        !           314:          [@{$conditions{'name'}}];
        !           315:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
        !           316:          [@{$conditions{'attribute'}}];
        !           317:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
        !           318:          [@{$conditions{'value'}}];
        !           319:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
        !           320:          [@{$conditions{'cdata'}}];
        !           321:     return '';
        !           322: }
        !           323: 
        !           324: # ----------------------------------- POD (plain old documentation, CPAN style)
        !           325: 
        !           326: =pod
        !           327: 
        !           328: =head1 NAME
        !           329: 
        !           330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
        !           331: 
        !           332: =head1 SYNOPSIS
        !           333: 
        !           334: Usage is for lpml file to come in through standard input.
        !           335: 
        !           336: =over 4
        !           337: 
        !           338: =item * 
        !           339: 
        !           340: 1st argument is name of xfml file.
        !           341: 
        !           342: =back
        !           343: 
        !           344: Example:
        !           345: 
        !           346:  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
        !           347: 
        !           348: or
        !           349: 
        !           350:  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
        !           351: 
        !           352: =head1 DESCRIPTION
        !           353: 
        !           354: I am using a multiple pass-through approach to parsing
        !           355: the xfml file.  This saves memory and makes sure the server
        !           356: will never be overloaded.
        !           357: 
        !           358: =head1 README
        !           359: 
        !           360: I am using a multiple pass-through approach to parsing
        !           361: the xfml file.  This saves memory and makes sure the server
        !           362: will never be overloaded.
        !           363: 
        !           364: =head1 PREREQUISITES
        !           365: 
        !           366: HTML::TokeParser
        !           367: 
        !           368: =head1 COREQUISITES
        !           369: 
        !           370: =head1 OSNAMES
        !           371: 
        !           372: linux
        !           373: 
        !           374: =head1 SCRIPT CATEGORIES
        !           375: 
        !           376: Packaging/Administrative
        !           377: 
        !           378: =head1 AUTHOR
        !           379: 
        !           380:  Scott Harrison
        !           381:  codeharrison@yahoo.com
        !           382: 
        !           383: Please let me know how/if you are finding this script useful and
        !           384: any/all suggestions.  -Scott
        !           385: 
        !           386: =cut
        !           387: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>