nsdl/build/xfml_parse.pl - view

File: [LON-CAPA] / nsdl / build / xfml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Sun May 5 02:44:57 2002 UTC (21 years, 11 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

setting up the LPML toolset from http://lpml.sourceforge.net/

#!/usr/bin/perl # -------------------------------------------------------- Documentation notice # Run "perldoc ./lpml_parse.pl" in order to best view the software # documentation internalized in this program. # --------------------------------------------------------- License Information # The LearningOnline Network with CAPA # piml_parse.pl - Linux Packaging Markup Language parser # # $Id: xfml_parse.pl,v 1.1 2002/05/05 02:44:57 harris41 Exp $ # # Written by Scott Harrison, codeharrison@yahoo.com # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # YEAR=2002 # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison # ### # Read in 2 XML file; first is the filter specification, the second # is the XML file to be filtered ############################################################################### ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## 1. Notes ## ## 2. Read in filter file ## ## 3. Initialize and clear conditions ## ## 4. Run through and apply clauses ## ## ## ############################################################################### # ----------------------------------------------------------------------- Notes # # This is meant to parse files meeting the xfml document type. # See xfml.dtd. XFML=XML Filtering Markup Language. use HTML::TokeParser; use strict; unless (@ARGV) { print(<<END); Incorrect invocation. Example usages: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml END } my %eh; # ---------------------------------------------- Read in filter file from @ARGV my $tofilter=shift @ARGV; open(IN,"<$tofilter"); my @lines=<IN>; my $parsestring=join('',@lines); undef @lines; close IN; my $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # --------------------------------------------- initialize and clear conditions my %conditions; &cc; # Define handling methods for mode-dependent text rendering $parser->{textify}={ 'xfml' => \&format_xfml, 'when:name' => \&format_when_name, 'when:attribute' => \&format_when_attribute, 'when:cdata' => \&format_when_cdata, 'choice:exclude' => \&format_choice_exclude, 'clause' => \&format_clause, }; my $text; my $xfml; my $wloc=0; my %eha; # ----------------------------------------------- Run through and apply clauses my @lines2=<>; my $output=join('',@lines2); undef @lines2; my $lparser = HTML::TokeParser->new(\$output) or die('can\'t create TokeParser object'); $lparser->xml_mode('1'); my $parsestring2; while (my $token = $parser->get_tag('clause')) { $parsestring2=$output; $lparser = HTML::TokeParser->new(\$parsestring2); $lparser->xml_mode('1'); $output=''; &format_clause(@{$token}); $text = $parser->get_text('/clause'); $token = $parser->get_tag('/clause'); my $token=''; my $ttype=''; my $excludeflag=0; my $outcache=''; while ($token = $lparser->get_token()) { if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; } elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; } elsif ($token->[0] eq 'T') { if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S' or $ttype eq 'E') { $output.=$token->[1]; } else { $outcache.=$token->[1]; } } elsif ($token->[0] eq 'S') { if ($eh{$token->[1]} or $excludeflag==1) { $ttype=''; $excludeflag=1; $outcache.=$token->[4]; } else { $ttype='S'; $output.=$token->[4]; } if ($excludeflag==1) { } } elsif ($token->[0] eq 'E') { if ($eh{$token->[1]} and $excludeflag==1) { $ttype='E'; $excludeflag=0; $outcache.=$token->[2]; my $retval=&evalconditions($outcache); if (&evalconditions($outcache)) { $output.=$outcache; } else { $output.=''; } $outcache=''; } elsif ($excludeflag==1) { $ttype=''; $outcache.=$token->[2]; } else { $output.=$token->[2]; $ttype='E'; } } } &cc; } print $output; # -------------------------------------------------------------- evalconditions sub evalconditions { my ($parsetext)=@_; my $eparser = HTML::TokeParser->new(\$parsetext); unless (@{$conditions{'name'}} or @{$conditions{'attribute'}}) { return 0; } my $nameflag=0; my $cdataflag=0; my $matchflag=0; my $Ttoken=''; while (my $token = $eparser->get_token()) { if ($token->[0] eq 'S') { foreach my $name (@{$conditions{'name'}}) { my $flag=0; my $match=$name; if ($match=~/^\!/) { $match=~s/^\!//g; $flag=1; } $match=~s/^\///g; $match=~s/\/$//g; if ((!$flag and $token->[1]=~/$match/) or ($flag and $token->[1]!~/$match/)) { $nameflag=1; } } $Ttoken=''; } elsif ($token->[0] eq 'E') { foreach my $name (@{$conditions{'name'}}) { my $flag=0; my $match=$name; if ($match=~/^\!/) { $match=~s/^\!//g; $flag=1; } $match=~s/^\///g; $match=~s/\/$//g; if ((!$flag and $token->[1]=~/$match/) or ($flag and $token->[1]!~/$match/)) { foreach my $cdata (@{$conditions{'cdata'}}) { my $flag=0; my $match=$cdata; if ($match=~/^\!/) { $match=~s/^\!//g; $flag=1; } $match=~s/^\///g; $match=~s/\/$//g; if ((!$flag and $Ttoken=~/$match/) or ($flag and $Ttoken!~/$match/)) { $cdataflag=1; } } if (@{$conditions{'cdata'}}) { if ($cdataflag) { return 0; } } else { if ($nameflag) { return 0; } } $nameflag=0; } } } elsif ($token->[0] eq 'T') { if ($nameflag) { $Ttoken.=$token->[1]; } } } return 1; } # ------------------------------------------------------------ clear conditions sub cc { @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; %eh=(1,1); delete $eh{1}; } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; } # --------------------------------------------------------- Format xfml section sub format_xfml { my (@tokeninfo)=@_; return ''; } # ------------------------------------------------------- Format clause section sub format_clause { my (@tokeninfo)=@_; return ''; } # ---------------------------------------------------- Format when:name section sub format_when_name { my (@tokeninfo)=@_; # $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'name'}},$att_match; my $text=&trim($parser->get_text('/when:name')); $parser->get_tag('/when:name'); # $wloc--; # &cc unless $wloc; return ''; } # --------------------------------------------------- Format when:cdata section sub format_when_cdata { my (@tokeninfo)=@_; $wloc++; my $att_match=$tokeninfo[2]->{'match'}; push @{$conditions{'cdata'}},$att_match; my $text=&trim($parser->get_text('/when:cdata')); $parser->get_tag('/when:cdata'); $wloc--; # &cc unless $wloc; return ''; } # ----------------------------------------------- Format choice:exclude section sub format_choice_exclude { my (@tokeninfo)=@_; my $text=&trim($parser->get_text('/choice:exclude')); $parser->get_tag('/choice:exclude'); $eh{$tokeninfo[2]->{'nodename'}}++; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, [@{$conditions{'name'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, [@{$conditions{'attribute'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, [@{$conditions{'value'}}]; push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, [@{$conditions{'cdata'}}]; return ''; } # ----------------------------------- POD (plain old documentation, CPAN style) =pod =head1 NAME xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.) =head1 SYNOPSIS Usage is for lpml file to come in through standard input. =over 4 =item * 1st argument is name of xfml file. =back Example: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml or perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml =head1 DESCRIPTION I am using a multiple pass-through approach to parsing the xfml file. This saves memory and makes sure the server will never be overloaded. =head1 README I am using a multiple pass-through approach to parsing the xfml file. This saves memory and makes sure the server will never be overloaded. =head1 PREREQUISITES HTML::TokeParser =head1 COREQUISITES =head1 OSNAMES linux =head1 SCRIPT CATEGORIES Packaging/Administrative =head1 AUTHOR Scott Harrison codeharrison@yahoo.com Please let me know how/if you are finding this script useful and any/all suggestions. -Scott =cut