File:  [LON-CAPA] / nsdl / build / xfml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Sun May 5 02:44:57 2002 UTC (21 years, 10 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
setting up the LPML toolset from http://lpml.sourceforge.net/

#!/usr/bin/perl

# -------------------------------------------------------- Documentation notice
# Run "perldoc ./lpml_parse.pl" in order to best view the software
# documentation internalized in this program.

# --------------------------------------------------------- License Information
# The LearningOnline Network with CAPA
# piml_parse.pl - Linux Packaging Markup Language parser
#
# $Id: xfml_parse.pl,v 1.1 2002/05/05 02:44:57 harris41 Exp $
#
# Written by Scott Harrison, codeharrison@yahoo.com
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
# YEAR=2002
# 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
#
###

# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered

###############################################################################
##                                                                           ##
## ORGANIZATION OF THIS PERL SCRIPT                                          ##
## 1. Notes                                                                  ##
## 2. Read in filter file                                                    ##
## 3. Initialize and clear conditions                                        ##
## 4. Run through and apply clauses                                          ##
##                                                                           ##
###############################################################################

# ----------------------------------------------------------------------- Notes
#
# This is meant to parse files meeting the xfml document type.
# See xfml.dtd.  XFML=XML Filtering Markup Language.

use HTML::TokeParser;
use strict;

unless (@ARGV) {
    print(<<END);
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}

my %eh;

# ---------------------------------------------- Read in filter file from @ARGV
my $tofilter=shift @ARGV;
open(IN,"<$tofilter"); my @lines=<IN>;
my $parsestring=join('',@lines); undef @lines; close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# --------------------------------------------- initialize and clear conditions
my %conditions; &cc;

# Define handling methods for mode-dependent text rendering
$parser->{textify}={
    'xfml' => \&format_xfml,
    'when:name' => \&format_when_name,
    'when:attribute' => \&format_when_attribute,
    'when:cdata' => \&format_when_cdata,
    'choice:exclude' => \&format_choice_exclude,
    'clause' => \&format_clause,
    };

my $text;
my $xfml;
my $wloc=0;
my %eha;

# ----------------------------------------------- Run through and apply clauses
my @lines2=<>; my $output=join('',@lines2); undef @lines2;
my $lparser = HTML::TokeParser->new(\$output) or
    die('can\'t create TokeParser object');
$lparser->xml_mode('1');
my $parsestring2;
while (my $token = $parser->get_tag('clause')) {
    $parsestring2=$output;
    $lparser = HTML::TokeParser->new(\$parsestring2);
    $lparser->xml_mode('1');
    $output='';
    &format_clause(@{$token});
    $text = $parser->get_text('/clause');
    $token = $parser->get_tag('/clause');

    my $token='';
    my $ttype='';
    my $excludeflag=0;
    my $outcache='';
    while ($token = $lparser->get_token()) {
	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
	elsif ($token->[0] eq 'T') {
	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
		or $ttype eq 'E') {
		$output.=$token->[1];
	    }
	    else {
		$outcache.=$token->[1];
	    }
	}
	elsif ($token->[0] eq 'S') {
	    if ($eh{$token->[1]} or $excludeflag==1) {
		$ttype='';
		$excludeflag=1;
		$outcache.=$token->[4];
	    }
	    else {
		$ttype='S';
		$output.=$token->[4];
	    }
	    if ($excludeflag==1) {
		
	    }
	}
	elsif ($token->[0] eq 'E') {
	    if ($eh{$token->[1]} and $excludeflag==1) {
		$ttype='E';
		$excludeflag=0;
		$outcache.=$token->[2];
		my $retval=&evalconditions($outcache);
		if (&evalconditions($outcache)) {
		    $output.=$outcache;
		}
		else {
		    $output.='<!-- FILTERED OUT -->';
		}
		$outcache='';
	    }
	    elsif ($excludeflag==1) {
		$ttype='';
		$outcache.=$token->[2];
	    }
	    else {
		$output.=$token->[2];
		$ttype='E';
	    }
	}
    }
    &cc;
}
print $output;

# -------------------------------------------------------------- evalconditions
sub evalconditions {
    my ($parsetext)=@_;
    my $eparser = HTML::TokeParser->new(\$parsetext);
    unless (@{$conditions{'name'}} or
	    @{$conditions{'attribute'}}) {
	return 0;
    }
    my $nameflag=0;
    my $cdataflag=0;
    my $matchflag=0;
    my $Ttoken='';
    while (my $token = $eparser->get_token()) {
	if ($token->[0] eq 'S') {
	    foreach my $name (@{$conditions{'name'}}) {
		my $flag=0;
		my $match=$name;
		if ($match=~/^\!/) {
		    $match=~s/^\!//g;
		    $flag=1;
		}
		$match=~s/^\///g;
		$match=~s/\/$//g;
		if ((!$flag and $token->[1]=~/$match/) or
		    ($flag and $token->[1]!~/$match/)) {
		    $nameflag=1;
		}
	    }
	    $Ttoken='';
	}
	elsif ($token->[0] eq 'E') {
	    foreach my $name (@{$conditions{'name'}}) {
		my $flag=0;
		my $match=$name;
		if ($match=~/^\!/) {
		    $match=~s/^\!//g;
		    $flag=1;
		}
		$match=~s/^\///g;
		$match=~s/\/$//g;
		if ((!$flag and $token->[1]=~/$match/) or
		    ($flag and $token->[1]!~/$match/)) {
		    foreach my $cdata (@{$conditions{'cdata'}}) {
			my $flag=0;
			my $match=$cdata;
			if ($match=~/^\!/) {
			    $match=~s/^\!//g;
			    $flag=1;
			}
			$match=~s/^\///g;
			$match=~s/\/$//g;
			if ((!$flag and $Ttoken=~/$match/) or
			    ($flag and $Ttoken!~/$match/)) {
			    $cdataflag=1;
			}
		    }
		    if (@{$conditions{'cdata'}}) {
			if ($cdataflag) {
			    return 0;
			}
		    }
		    else {
			if ($nameflag) {
			    return 0;
			}
		    }
		    $nameflag=0;
		}
	    }
	}
	elsif ($token->[0] eq 'T') {
	    if ($nameflag) {
		$Ttoken.=$token->[1];
	    }
	}
    }
    return 1;
}

# ------------------------------------------------------------ clear conditions
sub cc {
    @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
    @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
    @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
    @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
    %eh=(1,1); delete $eh{1};
}

# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}




# --------------------------------------------------------- Format xfml section
sub format_xfml {
    my (@tokeninfo)=@_;
    return '';
}

# ------------------------------------------------------- Format clause section
sub format_clause {
    my (@tokeninfo)=@_;
    return '';
}

# ---------------------------------------------------- Format when:name section
sub format_when_name {
    my (@tokeninfo)=@_;
#    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'name'}},$att_match;
    my $text=&trim($parser->get_text('/when:name'));
    $parser->get_tag('/when:name');
#    $wloc--;
#    &cc unless $wloc;
    return '';
}

# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
    my (@tokeninfo)=@_;
    $wloc++;
    my $att_match=$tokeninfo[2]->{'match'};
    push @{$conditions{'cdata'}},$att_match;
    my $text=&trim($parser->get_text('/when:cdata'));
    $parser->get_tag('/when:cdata');
    $wloc--;
#    &cc unless $wloc;
    return '';
}

# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
    my (@tokeninfo)=@_;
    my $text=&trim($parser->get_text('/choice:exclude'));
    $parser->get_tag('/choice:exclude');
    $eh{$tokeninfo[2]->{'nodename'}}++;
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
         [@{$conditions{'name'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
         [@{$conditions{'attribute'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
         [@{$conditions{'value'}}];
    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
         [@{$conditions{'cdata'}}];
    return '';
}

# ----------------------------------- POD (plain old documentation, CPAN style)

=pod

=head1 NAME

xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)

=head1 SYNOPSIS

Usage is for lpml file to come in through standard input.

=over 4

=item * 

1st argument is name of xfml file.

=back

Example:

 cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml

or

 perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml

=head1 DESCRIPTION

I am using a multiple pass-through approach to parsing
the xfml file.  This saves memory and makes sure the server
will never be overloaded.

=head1 README

I am using a multiple pass-through approach to parsing
the xfml file.  This saves memory and makes sure the server
will never be overloaded.

=head1 PREREQUISITES

HTML::TokeParser

=head1 COREQUISITES

=head1 OSNAMES

linux

=head1 SCRIPT CATEGORIES

Packaging/Administrative

=head1 AUTHOR

 Scott Harrison
 codeharrison@yahoo.com

Please let me know how/if you are finding this script useful and
any/all suggestions.  -Scott

=cut


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>