#!/usr/bin/perl # -------------------------------------------------------- Documentation notice # Run "perldoc ./lpml_parse.pl" in order to best view the software # documentation internalized in this program. # --------------------------------------------------------- License Information # The LearningOnline Network with CAPA # piml_parse.pl - Linux Packaging Markup Language parser # # $Id: piml_parse.pl,v 1.1 2002/05/05 02:44:57 harris41 Exp $ # # Written by Scott Harrison, codeharrison@yahoo.com # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # YEAR=2002 # 1/28,1/29,1/30,1/31,2/5,4/8 - Scott Harrison # ### ############################################################################### ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## 1. Notes ## ## 2. Get command line arguments ## ## 3. First pass through (grab distribution-specific information) ## ## 4. Second pass through (parse out what is not necessary) ## ## 5. Third pass through (translate markup according to specified mode) ## ## 6. Functions (most all just format contents of different markup tags) ## ## 7. POD (plain old documentation, CPAN style) ## ## ## ############################################################################### # ----------------------------------------------------------------------- Notes # # I am using a multiple pass-through approach to parsing # the piml file. This saves memory and makes sure the server # will never be overloaded. # # This is meant to parse files meeting the piml document type. # See piml.dtd. PIML=Post Installation Markup Language. use HTML::TokeParser; my $usage=(<){} # throw away the input to avoid broken pipes print $usage; exit -1; # exit with error status } my $categorytype; if (@ARGV) { $categorytype = shift @ARGV; } my $dist; if (@ARGV) { $dist = shift @ARGV; } my $targetroot; my $targetrootarg; if (@ARGV) { $targetroot = shift @ARGV; } $targetroot=~s/\/$//; $targetrootarg=$targetroot; my $logcmd='| tee -a WARNINGS'; my $invocation; # --------------------------------------------------- Record program invocation if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { $invocation=(<; my $parsestring = join('',@parsecontents); my $outstring=''; # Need to make a pass through and figure out what defaults are # overrided. Top-down overriding strategy (leaves don't know # about distant leaves). my @hierarchy; $hierarchy[0]=0; my $hloc=0; my $token=''; $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my %hash; my $key; while ($token = $parser->get_token()) { if ($token->[0] eq 'S') { $hloc++; $hierarchy[$hloc]++; $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); my $thisdist=' '.$token->[2]{'dist'}.' '; if ($thisdist eq ' default ') { $hash{$key}=1; # there is a default setting for this key } elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { $hash{$key}=2; # disregard default setting for this key if # there is a directly requested distribution match } } if ($token->[0] eq 'E') { $hloc--; } } # --------------------------------------------------- Start second pass through undef $hloc; undef @hierarchy; undef $parser; $hierarchy[0]=0; $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my $cleanstring; while ($token = $parser->get_token()) { if ($token->[0] eq 'S') { $hloc++; $hierarchy[$hloc]++; $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); my $thisdist=' '.$token->[2]{'dist'}.' '; # This conditional clause is set up to ignore two sets # of invalid conditions before accepting entry into # the cleanstring. if ($hash{$key}==2 and !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { if ($token->[4]!~/\/>$/) { $parser->get_tag('/'.$token->[1]); $hloc--; } } elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and !($thisdist eq ' default ' and $hash{$key}!=2)) { if ($token->[4]!~/\/>$/) { $parser->get_tag('/'.$token->[1]); $hloc--; } } else { $cleanstring.=$token->[4]; } if ($token->[4]=~/\/>$/) { # $hloc--; } } if ($token->[0] eq 'E') { $cleanstring.=$token->[2]; $hloc--; } if ($token->[0] eq 'T') { $cleanstring.=$token->[1]; } } $cleanstring=&trim($cleanstring); $cleanstring=~s/\>\s*\n\s*\\new(\$cleanstring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # Define handling methods for mode-dependent text rendering $parser->{textify}={ specialnotices => \&format_specialnotices, specialnotice => \&format_specialnotice, targetroot => \&format_targetroot, categories => \&format_categories, category => \&format_category, abbreviation => \&format_abbreviation, chown => \&format_chown, chmod => \&format_chmod, categoryname => \&format_categoryname, files => \&format_files, file => \&format_file, target => \&format_target, note => \&format_note, build => \&format_build, dependencies => \&format_dependencies, filenames => \&format_filenames, perlscript => \&format_perlscript, TARGET => \&format_TARGET, }; my $text; my $token; undef($hloc); undef(@hierarchy); my $hloc; my @hierarchy2; while ($token = $parser->get_tag('piml')) { &format_piml(@{$token}); $text = &trim($parser->get_text('/piml')); $token = $parser->get_tag('/piml'); print($piml); print("\n"); print($text); print("\n"); print(&end()); } exit(0); # ---------- Functions (most all just format contents of different markup tags) # ------------------------ Final output at end of markup parsing and formatting sub end { } # ----------------------- Take in string to parse and the separation expression sub extract_array { my ($stringtoparse,$sepexp) = @_; my @a=split(/$sepexp/,$stringtoparse); return \@a; } # --------------------------------------------------------- Format piml section sub format_piml { my (@tokeninfo)=@_; my $date=`date`; chop $date; $piml=<get_text('/targetroot')); $text=$targetroot if $targetroot; $parser->get_tag('/targetroot'); return('# TARGET INSTALL LOCATION is "'.$targetroot."\"\n"); } # -------------------------------------------------- Format perl script section sub format_perlscript { my (@tokeninfo)=@_; $mode=$tokeninfo[2]->{'mode'}; my $text=$parser->get_text('/perlscript'); $parser->get_tag('/perlscript'); if ($mode eq 'bg') { open(OUT,">/tmp/piml$$.pl"); print(OUT $text); close(OUT); return(<get_tag('/TARGET'); return($target); } # --------------------------------------------------- Format categories section sub format_categories { my $text=&trim($parser->get_text('/categories')); $parser->get_tag('/categories'); return('# CATEGORIES'."\n".$text); } # --------------------------------------------------- Format categories section sub format_category { my (@tokeninfo)=@_; $category_att_name=$tokeninfo[2]->{'name'}; $category_att_type=$tokeninfo[2]->{'type'}; $abbreviation=''; $chmod='';$chown=''; $parser->get_text('/category'); $parser->get_tag('/category'); $fab{$category_att_name}=$abbreviation; if ($category_att_type eq $categorytype) { my ($user,$group)=split(/\:/,$chown); $categoryhash{$category_att_name}='-o '.$user.' -g '.$group. ' -m '.$chmod; } return(''); } # --------------------------------------------------- Format categories section sub format_abbreviation { my @tokeninfo=@_; $abbreviation=''; my $text=&trim($parser->get_text('/abbreviation')); if ($text) { $parser->get_tag('/abbreviation'); $abbreviation=$text; } return(''); } # -------------------------------------------------------- Format chown section sub format_chown { my @tokeninfo=@_; $chown=''; my $text=&trim($parser->get_text('/chown')); if ($text) { $parser->get_tag('/chown'); $chown=$text; } return(''); } # -------------------------------------------------------- Format chmod section sub format_chmod { my @tokeninfo=@_; $chmod=''; my $text=&trim($parser->get_text('/chmod')); if ($text) { $parser->get_tag('/chmod'); $chmod=$text; } return(''); } # ------------------------------------------------- Format categoryname section sub format_categoryname { my @tokeninfo=@_; $categoryname=''; my $text=&trim($parser->get_text('/categoryname')); if ($text) { $parser->get_tag('/categoryname'); $categoryname=$text; } return(''); } # -------------------------------------------------------- Format files section sub format_files { my $text=$parser->get_text('/files'); $parser->get_tag('/files'); return("\n".'# There are '.$file_count.' files this script works on'. "\n\n".$text); } # --------------------------------------------------------- Format file section sub format_file { my @tokeninfo=@_; $file=''; $source=''; $target=''; $categoryname=''; $description=''; $note=''; $build=''; $status=''; $dependencies=''; my $text=&trim($parser->get_text('/file')); $file_count++; $categorycount{$categoryname}++; $parser->get_tag('/file'); return("# File: $target\n". "$text\n"); } # ------------------------------------------------------- Format target section sub format_target { my @tokeninfo=@_; $target=''; my $text=&trim($parser->get_text('/target')); if ($text) { $parser->get_tag('/target'); $target=$targetrootarg.$text; } return(''); } # --------------------------------------------------------- Format note section sub format_note { my @tokeninfo=@_; $note=''; my $aref; my $text; while ($aref=$parser->get_token()) { if ($aref->[0] eq 'E' && $aref->[1] eq 'note') { last; } elsif ($aref->[0] eq 'S') { $text.=$aref->[4]; } elsif ($aref->[0] eq 'E') { $text.=$aref->[2]; } else { $text.=$aref->[1]; } } if ($text) { $note=$text; } return(''); } # ------------------------------------------------- Format dependencies section sub format_dependencies { my @tokeninfo=@_; $dependencies=''; my $text=&trim($parser->get_text('/dependencies')); if ($text) { $parser->get_tag('/dependencies'); $dependencies=join(';', (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text))); } return(''); } # ------------------------------------------------ Format specialnotice section sub format_specialnotices { $parser->get_tag('/specialnotices'); return(''); } # ------------------------------------------------ Format specialnotice section sub format_specialnotice { $parser->get_tag('/specialnotice'); return(''); } # ------------------------------------- Render less-than and greater-than signs sub htmlsafe { my $text=@_[0]; $text =~ s//>/g; return($text); } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return($s); } # ----------------------------------- POD (plain old documentation, CPAN style) =pod =head1 NAME piml_parse.pl - This is meant to parse piml files (Post Installation Markup Language) =head1 SYNOPSIS Usage is for piml file to come in through standard input. =over 4 =item * 1st argument is the category permissions to use (runtime or development) =item * 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc). =item * 3rd argument is to manually specify a targetroot. =back Only the 1st argument is mandatory for the program to run. Example: cat ../../doc/loncapafiles.piml |\\ perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install =head1 DESCRIPTION I am using a multiple pass-through approach to parsing the piml file. This saves memory and makes sure the server will never be overloaded. =head1 README I am using a multiple pass-through approach to parsing the piml file. This saves memory and makes sure the server will never be overloaded. =head1 PREREQUISITES HTML::TokeParser =head1 COREQUISITES =head1 OSNAMES linux =head1 SCRIPT CATEGORIES Packaging/Administrative =head1 AUTHOR Scott Harrison codeharrison@yahoo.com Please let me know how/if you are finding this script useful and any/all suggestions. -Scott =cut