loncom/build/xfml_parse.pl - view

File: [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.7: download - view: text, annotated - select for diffs
Wed May 22 17:07:50 2002 UTC (21 years, 11 months ago) by harris41
Branches: MAIN
CVS tags: version_2_9_X, version_2_9_99_0, version_2_9_1, version_2_9_0, version_2_8_X, version_2_8_99_1, version_2_8_99_0, version_2_8_2, version_2_8_1, version_2_8_0, version_2_7_X, version_2_7_99_1, version_2_7_99_0, version_2_7_1, version_2_7_0, version_2_6_X, version_2_6_99_1, version_2_6_99_0, version_2_6_3, version_2_6_2, version_2_6_1, version_2_6_0, version_2_5_X, version_2_5_99_1, version_2_5_99_0, version_2_5_2, version_2_5_1, version_2_5_0, version_2_4_X, version_2_4_99_0, version_2_4_2, version_2_4_1, version_2_4_0, version_2_3_X, version_2_3_99_0, version_2_3_2, version_2_3_1, version_2_3_0, version_2_2_X, version_2_2_99_1, version_2_2_99_0, version_2_2_2, version_2_2_1, version_2_2_0, version_2_1_X, version_2_1_99_3, version_2_1_99_2, version_2_1_99_1, version_2_1_99_0, version_2_1_3, version_2_1_2, version_2_1_1, version_2_1_0, version_2_12_X, version_2_11_X, version_2_11_4_uiuc, version_2_11_4_msu, version_2_11_4, version_2_11_3_uiuc, version_2_11_3_msu, version_2_11_3, version_2_11_2_uiuc, version_2_11_2_msu, version_2_11_2_educog, version_2_11_2, version_2_11_1, version_2_11_0_RC3, version_2_11_0_RC2, version_2_11_0_RC1, version_2_11_0, version_2_10_X, version_2_10_1, version_2_10_0_RC2, version_2_10_0_RC1, version_2_10_0, version_2_0_X, version_2_0_99_1, version_2_0_2, version_2_0_1, version_2_0_0, version_1_99_3, version_1_99_2, version_1_99_1_tmcc, version_1_99_1, version_1_99_0_tmcc, version_1_99_0, version_1_3_X, version_1_3_3, version_1_3_2, version_1_3_1, version_1_3_0, version_1_2_X, version_1_2_99_1, version_1_2_99_0, version_1_2_1, version_1_2_0, version_1_1_X, version_1_1_99_5, version_1_1_99_4, version_1_1_99_3, version_1_1_99_2, version_1_1_99_1, version_1_1_99_0, version_1_1_3, version_1_1_2, version_1_1_1, version_1_1_0, version_1_0_99_3, version_1_0_99_2, version_1_0_99_1, version_1_0_99, version_1_0_3, version_1_0_2, version_1_0_1, version_1_0_0, version_0_99_5, version_0_99_4, version_0_99_3, version_0_99_2, version_0_99_1, version_0_99_0, version_0_6_2, version_0_6, version_0_5_1, version_0_5, version_0_4, stable_2002_july, loncapaMITrelate_1, language_hyphenation_merge, language_hyphenation, conference_2003, bz6209-base, bz6209, bz5969, bz5610, bz2851, STABLE, PRINT_INCOMPLETE_base, PRINT_INCOMPLETE, HEAD, GCI_3, GCI_2, GCI_1, BZ5971-printing-apage, BZ5434-fox, BZ4492-merge, BZ4492-feature_horizontal_radioresponse, BZ4492-feature_Support_horizontal_radioresponse, BZ4492-Support_horizontal_radioresponse

xfml_parse.pl already has been well-tested.  the changes in 1.6 are incorrect
and are now reversed.

1: #!/usr/bin/perl 2: 3: # -------------------------------------------------------- Documentation notice 4: # Run "perldoc ./lpml_parse.pl" in order to best view the software 5: # documentation internalized in this program. 6: 7: # --------------------------------------------------------- License Information 8: # The LearningOnline Network with CAPA 9: # piml_parse.pl - Linux Packaging Markup Language parser 10: # 11: # $Id: xfml_parse.pl,v 1.7 2002/05/22 17:07:50 harris41 Exp $ 12: # 13: # Written by Scott Harrison, codeharrison@yahoo.com 14: # 15: # Copyright Michigan State University Board of Trustees 16: # 17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA). 18: # 19: # LON-CAPA is free software; you can redistribute it and/or modify 20: # it under the terms of the GNU General Public License as published by 21: # the Free Software Foundation; either version 2 of the License, or 22: # (at your option) any later version. 23: # 24: # LON-CAPA is distributed in the hope that it will be useful, 25: # but WITHOUT ANY WARRANTY; without even the implied warranty of 26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27: # GNU General Public License for more details. 28: # 29: # You should have received a copy of the GNU General Public License 30: # along with LON-CAPA; if not, write to the Free Software 31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 32: # 33: # /home/httpd/html/adm/gpl.txt 34: # 35: # http://www.lon-capa.org/ 36: # 37: # YEAR=2002 38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison 39: # 40: ### 41: 42: # Read in 2 XML file; first is the filter specification, the second 43: # is the XML file to be filtered 44: 45: ############################################################################### 46: ## ## 47: ## ORGANIZATION OF THIS PERL SCRIPT ## 48: ## 1. Notes ## 49: ## 2. Read in filter file ## 50: ## 3. Initialize and clear conditions ## 51: ## 4. Run through and apply clauses ## 52: ## ## 53: ############################################################################### 54: 55: # ----------------------------------------------------------------------- Notes 56: # 57: # This is meant to parse files meeting the xfml document type. 58: # See xfml.dtd. XFML=XML Filtering Markup Language. 59: 60: use HTML::TokeParser; 61: use strict; 62: 63: unless (@ARGV) { 64: print(<<END); 65: Incorrect invocation. 66: Example usages: 67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml 68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml 69: END 70: } 71: 72: my %eh; 73: 74: # ---------------------------------------------- Read in filter file from @ARGV 75: my $tofilter=shift @ARGV; 76: open(IN,"<$tofilter"); my @lines=<IN>; 77: my $parsestring=join('',@lines); undef @lines; close IN; 78: my $parser = HTML::TokeParser->new(\$parsestring) or 79: die('can\'t create TokeParser object'); 80: $parser->xml_mode('1'); 81: 82: # --------------------------------------------- initialize and clear conditions 83: my %conditions; &cc; 84: 85: # Define handling methods for mode-dependent text rendering 86: $parser->{textify}={ 87: 'xfml' => \&format_xfml, 88: 'when:name' => \&format_when_name, 89: 'when:attribute' => \&format_when_attribute, 90: 'when:cdata' => \&format_when_cdata, 91: 'choice:exclude' => \&format_choice_exclude, 92: 'clause' => \&format_clause, 93: }; 94: 95: my $text; 96: my $xfml; 97: my $wloc=0; 98: my %eha; 99: 100: # ----------------------------------------------- Run through and apply clauses 101: my @lines2=<>; my $output=join('',@lines2); undef @lines2; 102: my $lparser = HTML::TokeParser->new(\$output) or 103: die('can\'t create TokeParser object'); 104: $lparser->xml_mode('1'); 105: my $parsestring2; 106: while (my $token = $parser->get_tag('clause')) { 107: $parsestring2=$output; 108: $lparser = HTML::TokeParser->new(\$parsestring2); 109: $lparser->xml_mode('1'); 110: $output=''; 111: &format_clause(@{$token}); 112: $text = $parser->get_text('/clause'); 113: $token = $parser->get_tag('/clause'); 114: 115: my $token=''; 116: my $ttype=''; 117: my $excludeflag=0; 118: my $outcache=''; 119: while ($token = $lparser->get_token()) { 120: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; } 121: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; } 122: elsif ($token->[0] eq 'T') { 123: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S' 124: or $ttype eq 'E') { 125: $output.=$token->[1]; 126: } 127: else { 128: $outcache.=$token->[1]; 129: } 130: } 131: elsif ($token->[0] eq 'S') { 132: if ($eh{$token->[1]} or $excludeflag==1) { 133: $ttype=''; 134: $excludeflag=1; 135: $outcache.=$token->[4]; 136: } 137: else { 138: $ttype='S'; 139: $output.=$token->[4]; 140: } 141: if ($excludeflag==1) { 142: 143: } 144: } 145: elsif ($token->[0] eq 'E') { 146: if ($eh{$token->[1]} and $excludeflag==1) { 147: $ttype='E'; 148: $excludeflag=0; 149: $outcache.=$token->[2]; 150: my $retval=&evalconditions($outcache); 151: if (&evalconditions($outcache)) { 152: $output.=$outcache; 153: } 154: else { 155: $output.=''; 156: } 157: $outcache=''; 158: } 159: elsif ($excludeflag==1) { 160: $ttype=''; 161: $outcache.=$token->[2]; 162: } 163: else { 164: $output.=$token->[2]; 165: $ttype='E'; 166: } 167: } 168: } 169: &cc; 170: } 171: print $output; 172: 173: # -------------------------------------------------------------- evalconditions 174: sub evalconditions { 175: my ($parsetext)=@_; 176: my $eparser = HTML::TokeParser->new(\$parsetext); 177: unless (@{$conditions{'name'}} or 178: @{$conditions{'attribute'}}) { 179: return 0; 180: } 181: my $nameflag=0; 182: my $cdataflag=0; 183: my $matchflag=0; 184: my $Ttoken=''; 185: while (my $token = $eparser->get_token()) { 186: if ($token->[0] eq 'S') { 187: foreach my $name (@{$conditions{'name'}}) { 188: my $flag=0; 189: my $match=$name; 190: if ($match=~/^\!/) { 191: $match=~s/^\!//g; 192: $flag=1; 193: } 194: $match=~s/^\///g; 195: $match=~s/\/$//g; 196: if ((!$flag and $token->[1]=~/$match/) or 197: ($flag and $token->[1]!~/$match/)) { 198: $nameflag=1; 199: } 200: } 201: $Ttoken=''; 202: } 203: elsif ($token->[0] eq 'E') { 204: foreach my $name (@{$conditions{'name'}}) { 205: my $flag=0; 206: my $match=$name; 207: if ($match=~/^\!/) { 208: $match=~s/^\!//g; 209: $flag=1; 210: } 211: $match=~s/^\///g; 212: $match=~s/\/$//g; 213: if ((!$flag and $token->[1]=~/$match/) or 214: ($flag and $token->[1]!~/$match/)) { 215: foreach my $cdata (@{$conditions{'cdata'}}) { 216: my $flag=0; 217: my $match=$cdata; 218: if ($match=~/^\!/) { 219: $match=~s/^\!//g; 220: $flag=1; 221: } 222: $match=~s/^\///g; 223: $match=~s/\/$//g; 224: if ((!$flag and $Ttoken=~/$match/) or 225: ($flag and $Ttoken!~/$match/)) { 226: $cdataflag=1; 227: } 228: } 229: if (@{$conditions{'cdata'}}) { 230: if ($cdataflag) { 231: return 0; 232: } 233: } 234: else { 235: if ($nameflag) { 236: return 0; 237: } 238: } 239: $nameflag=0; 240: } 241: } 242: } 243: elsif ($token->[0] eq 'T') { 244: if ($nameflag) { 245: $Ttoken.=$token->[1]; 246: } 247: } 248: } 249: return 1; 250: } 251: 252: # ------------------------------------------------------------ clear conditions 253: sub cc { 254: @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; 255: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; 256: @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; 257: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; 258: %eh=(1,1); delete $eh{1}; 259: } 260: 261: # --------------------------------------- remove starting and ending whitespace 262: sub trim { 263: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; 264: } 265: 266: 267: 268: 269: # --------------------------------------------------------- Format xfml section 270: sub format_xfml { 271: my (@tokeninfo)=@_; 272: return ''; 273: } 274: 275: # ------------------------------------------------------- Format clause section 276: sub format_clause { 277: my (@tokeninfo)=@_; 278: return ''; 279: } 280: 281: # ---------------------------------------------------- Format when:name section 282: sub format_when_name { 283: my (@tokeninfo)=@_; 284: # $wloc++; 285: my $att_match=$tokeninfo[2]->{'match'}; 286: push @{$conditions{'name'}},$att_match; 287: my $text=&trim($parser->get_text('/when:name')); 288: $parser->get_tag('/when:name'); 289: # $wloc--; 290: # &cc unless $wloc; 291: return ''; 292: } 293: 294: # --------------------------------------------------- Format when:cdata section 295: sub format_when_cdata { 296: my (@tokeninfo)=@_; 297: $wloc++; 298: my $att_match=$tokeninfo[2]->{'match'}; 299: push @{$conditions{'cdata'}},$att_match; 300: my $text=&trim($parser->get_text('/when:cdata')); 301: $parser->get_tag('/when:cdata'); 302: $wloc--; 303: # &cc unless $wloc; 304: return ''; 305: } 306: 307: # ----------------------------------------------- Format choice:exclude section 308: sub format_choice_exclude { 309: my (@tokeninfo)=@_; 310: my $text=&trim($parser->get_text('/choice:exclude')); 311: $parser->get_tag('/choice:exclude'); 312: $eh{$tokeninfo[2]->{'nodename'}}++; 313: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, 314: [@{$conditions{'name'}}]; 315: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, 316: [@{$conditions{'attribute'}}]; 317: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, 318: [@{$conditions{'value'}}]; 319: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, 320: [@{$conditions{'cdata'}}]; 321: return ''; 322: } 323: 324: # ----------------------------------- POD (plain old documentation, CPAN style) 325: 326: =pod 327: 328: =head1 NAME 329: 330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.) 331: 332: =head1 SYNOPSIS 333: 334: Usage is for lpml file to come in through standard input. 335: 336: =over 4 337: 338: =item * 339: 340: 1st argument is name of xfml file. 341: 342: =back 343: 344: Example: 345: 346: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml 347: 348: or 349: 350: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml 351: 352: =head1 DESCRIPTION 353: 354: I am using a multiple pass-through approach to parsing 355: the xfml file. This saves memory and makes sure the server 356: will never be overloaded. 357: 358: =head1 README 359: 360: I am using a multiple pass-through approach to parsing 361: the xfml file. This saves memory and makes sure the server 362: will never be overloaded. 363: 364: =head1 PREREQUISITES 365: 366: HTML::TokeParser 367: 368: =head1 COREQUISITES 369: 370: =head1 OSNAMES 371: 372: linux 373: 374: =head1 SCRIPT CATEGORIES 375: 376: Packaging/Administrative 377: 378: =head1 AUTHOR 379: 380: Scott Harrison 381: codeharrison@yahoo.com 382: 383: Please let me know how/if you are finding this script useful and 384: any/all suggestions. -Scott 385: 386: =cut 387: