Annotation of loncom/build/piml_parse.pl, revision 1.6

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.6     ! harris41    3: # -------------------------------------------------------- Documentation notice
        !             4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
        !             5: # documentation internalized in this program.
        !             6: 
        !             7: # --------------------------------------------------------- License Information
1.1       harris41    8: # The LearningOnline Network with CAPA
                      9: # piml_parse.pl - Linux Packaging Markup Language parser
                     10: #
1.6     ! harris41   11: # $Id: piml_parse.pl,v 1.5 2002/02/05 01:49:39 harris41 Exp $
1.1       harris41   12: #
1.6     ! harris41   13: # Written by Scott Harrison, codeharrison@yahoo.com
1.1       harris41   14: #
                     15: # Copyright Michigan State University Board of Trustees
                     16: #
                     17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     18: #
                     19: # LON-CAPA is free software; you can redistribute it and/or modify
                     20: # it under the terms of the GNU General Public License as published by
                     21: # the Free Software Foundation; either version 2 of the License, or
                     22: # (at your option) any later version.
                     23: #
                     24: # LON-CAPA is distributed in the hope that it will be useful,
                     25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     27: # GNU General Public License for more details.
                     28: #
                     29: # You should have received a copy of the GNU General Public License
                     30: # along with LON-CAPA; if not, write to the Free Software
                     31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     32: #
                     33: # /home/httpd/html/adm/gpl.txt
                     34: #
                     35: # http://www.lon-capa.org/
                     36: #
                     37: # YEAR=2002
1.6     ! harris41   38: # 1/28,1/29,1/30,1/31,2/5,4/8 - Scott Harrison
1.1       harris41   39: #
                     40: ###
                     41: 
                     42: ###############################################################################
                     43: ##                                                                           ##
                     44: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     45: ## 1. Notes                                                                  ##
                     46: ## 2. Get command line arguments                                             ##
                     47: ## 3. First pass through (grab distribution-specific information)            ##
                     48: ## 4. Second pass through (parse out what is not necessary)                  ##
                     49: ## 5. Third pass through (translate markup according to specified mode)      ##
                     50: ## 6. Functions (most all just format contents of different markup tags)     ##
                     51: ## 7. POD (plain old documentation, CPAN style)                              ##
                     52: ##                                                                           ##
                     53: ###############################################################################
                     54: 
                     55: # ----------------------------------------------------------------------- Notes
                     56: #
                     57: # I am using a multiple pass-through approach to parsing
                     58: # the piml file.  This saves memory and makes sure the server
                     59: # will never be overloaded.
                     60: #
                     61: # This is meant to parse files meeting the piml document type.
                     62: # See piml.dtd.  PIML=Post Installation Markup Language.
                     63: 
                     64: use HTML::TokeParser;
                     65: 
1.6     ! harris41   66: my $usage=(<<END);
1.1       harris41   67: **** ERROR ERROR ERROR ERROR ****
                     68: Usage is for piml file to come in through standard input.
1.2       harris41   69: 1st argument is the category permissions to use (runtime or development)
                     70: 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
                     71: 3rd argument is to manually specify a targetroot
1.1       harris41   72: 
                     73: Only the 1st argument is mandatory for the program to run.
                     74: 
                     75: Example:
                     76: 
                     77: cat ../../doc/loncapafiles.piml |\\
                     78: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install
                     79: END
                     80: 
                     81: # ------------------------------------------------- Grab command line arguments
                     82: 
                     83: my $mode;
1.3       harris41   84: if (@ARGV==3) {
1.1       harris41   85:     $mode = shift @ARGV;
                     86: }
                     87: else {
                     88:     @ARGV=();shift @ARGV;
                     89:     while(<>){} # throw away the input to avoid broken pipes
                     90:     print $usage;
                     91:     exit -1; # exit with error status
                     92: }
                     93: 
                     94: my $categorytype;
                     95: if (@ARGV) {
                     96:     $categorytype = shift @ARGV;
                     97: }
                     98: 
                     99: my $dist;
                    100: if (@ARGV) {
                    101:     $dist = shift @ARGV;
                    102: }
                    103: 
                    104: my $targetroot;
                    105: my $targetrootarg;
                    106: if (@ARGV) {
                    107:     $targetroot = shift @ARGV;
                    108: }
                    109: $targetroot=~s/\/$//;
                    110: $targetrootarg=$targetroot;
                    111: 
                    112: my $logcmd='| tee -a WARNINGS';
                    113: 
                    114: my $invocation;
                    115: # --------------------------------------------------- Record program invocation
                    116: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
                    117:     $invocation=(<<END);
                    118: # Invocation: STDINPUT | piml_parse.pl
1.2       harris41  119: #             1st argument (category type) is: $categorytype
                    120: #             2nd argument (distribution) is: $dist
                    121: #             3rd argument (targetroot) is: described below
1.1       harris41  122: END
                    123: }
                    124: 
                    125: # ---------------------------------------------------- Start first pass through
                    126: my @parsecontents = <>;
                    127: my $parsestring = join('',@parsecontents);
1.6     ! harris41  128: my $outstring='';
1.1       harris41  129: 
                    130: # Need to make a pass through and figure out what defaults are
                    131: # overrided.  Top-down overriding strategy (leaves don't know
                    132: # about distant leaves).
                    133: 
                    134: my @hierarchy;
                    135: $hierarchy[0]=0;
                    136: my $hloc=0;
1.6     ! harris41  137: my $token='';
1.1       harris41  138: $parser = HTML::TokeParser->new(\$parsestring) or
                    139:     die('can\'t create TokeParser object');
                    140: $parser->xml_mode('1');
                    141: my %hash;
                    142: my $key;
                    143: while ($token = $parser->get_token()) {
                    144:     if ($token->[0] eq 'S') {
                    145: 	$hloc++;
                    146: 	$hierarchy[$hloc]++;
                    147: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    148: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    149: 	if ($thisdist eq ' default ') {
                    150: 	    $hash{$key}=1; # there is a default setting for this key
                    151: 	}
                    152: 	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
                    153: 	    $hash{$key}=2; # disregard default setting for this key if
                    154: 	                   # there is a directly requested distribution match
                    155: 	}
                    156:     }
                    157:     if ($token->[0] eq 'E') {
                    158: 	$hloc--;
                    159:     }
                    160: }
                    161: 
                    162: # --------------------------------------------------- Start second pass through
                    163: undef $hloc;
                    164: undef @hierarchy;
                    165: undef $parser;
                    166: $hierarchy[0]=0;
                    167: $parser = HTML::TokeParser->new(\$parsestring) or
                    168:     die('can\'t create TokeParser object');
                    169: $parser->xml_mode('1');
                    170: my $cleanstring;
                    171: while ($token = $parser->get_token()) {
                    172:     if ($token->[0] eq 'S') {
                    173: 	$hloc++;
                    174: 	$hierarchy[$hloc]++;
                    175: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    176: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    177: 	# This conditional clause is set up to ignore two sets
                    178: 	# of invalid conditions before accepting entry into
                    179: 	# the cleanstring.
                    180: 	if ($hash{$key}==2 and
                    181: 	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
                    182: 	    if ($token->[4]!~/\/>$/) {
                    183: 		$parser->get_tag('/'.$token->[1]);
                    184: 		$hloc--;
                    185: 	    }
                    186: 	}
                    187: 	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
                    188: 	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
                    189: 	    if ($token->[4]!~/\/>$/) {
                    190: 		$parser->get_tag('/'.$token->[1]);
                    191: 		$hloc--;
                    192: 	    }
                    193: 	}
                    194: 	else {
                    195: 	    $cleanstring.=$token->[4];
                    196: 	}
                    197: 	if ($token->[4]=~/\/>$/) {
1.4       harris41  198: #	    $hloc--;
1.1       harris41  199: 	}
                    200:     }
                    201:     if ($token->[0] eq 'E') {
                    202: 	$cleanstring.=$token->[2];
                    203: 	$hloc--;
                    204:     }
                    205:     if ($token->[0] eq 'T') {
                    206: 	$cleanstring.=$token->[1];
                    207:     }
                    208: }
                    209: $cleanstring=&trim($cleanstring);
                    210: $cleanstring=~s/\>\s*\n\s*\</\>\</g;
                    211: 
                    212: # ---------------------------------------------------- Start final pass through
                    213: 
                    214: # storage variables
                    215: my $piml;
                    216: my $categories;
                    217: my @categorynamelist;
                    218: my $category;
                    219: my $category_att_name;
                    220: my $category_att_type;
                    221: my $chown;
                    222: my $chmod;
                    223: my $abbreviation; # space-free abbreviation; esp. for image names
                    224: my $categoryname;
                    225: my $description;
                    226: my $files;
                    227: my $file;
                    228: my $target;
                    229: my $note;
                    230: my $commands;
                    231: my $command;
                    232: my $dependencies;
                    233: my @links;
                    234: my %categoryhash;
                    235: my $dpathlength;
                    236: my %fab; # file category abbreviation
                    237: my $directory_count;
                    238: my $file_count;
                    239: my $link_count;
                    240: my $fileglob_count;
                    241: my $fileglobnames_count;
                    242: my %categorycount;
                    243: 
1.4       harris41  244: my $mode;
                    245: 
1.1       harris41  246: my @buildall;
                    247: my @buildinfo;
                    248: 
                    249: my @configall;
                    250: 
                    251: # Make new parser with distribution specific input
1.6     ! harris41  252: undef($parser);
1.1       harris41  253: $parser = HTML::TokeParser->new(\$cleanstring) or
                    254:     die('can\'t create TokeParser object');
                    255: $parser->xml_mode('1');
                    256: 
                    257: # Define handling methods for mode-dependent text rendering
                    258: 
                    259: $parser->{textify}={
                    260:     specialnotices => \&format_specialnotices,
                    261:     specialnotice => \&format_specialnotice,
                    262:     targetroot => \&format_targetroot,
                    263:     categories => \&format_categories,
                    264:     category => \&format_category,
                    265:     abbreviation => \&format_abbreviation,
                    266:     chown => \&format_chown,
                    267:     chmod => \&format_chmod,
                    268:     categoryname => \&format_categoryname,
                    269:     files => \&format_files,
                    270:     file => \&format_file,
                    271:     target => \&format_target,
                    272:     note => \&format_note,
                    273:     build => \&format_build,
                    274:     dependencies => \&format_dependencies,
                    275:     filenames => \&format_filenames,
1.2       harris41  276:     perlscript => \&format_perlscript,
                    277:     TARGET => \&format_TARGET,
1.1       harris41  278:     };
                    279: 
                    280: my $text;
                    281: my $token;
1.6     ! harris41  282: undef($hloc);
        !           283: undef(@hierarchy);
1.1       harris41  284: my $hloc;
                    285: my @hierarchy2;
                    286: while ($token = $parser->get_tag('piml')) {
                    287:     &format_piml(@{$token});
                    288:     $text = &trim($parser->get_text('/piml'));
                    289:     $token = $parser->get_tag('/piml');
1.6     ! harris41  290:     print($piml); 
        !           291:     print("\n");
        !           292:     print($text);
        !           293:     print("\n");
        !           294:     print(&end());
1.1       harris41  295: }
1.6     ! harris41  296: exit(0);
1.1       harris41  297: 
                    298: # ---------- Functions (most all just format contents of different markup tags)
                    299: 
                    300: # ------------------------ Final output at end of markup parsing and formatting
                    301: sub end {
1.2       harris41  302: 
1.1       harris41  303: }
                    304: 
                    305: # ----------------------- Take in string to parse and the separation expression
                    306: sub extract_array {
                    307:     my ($stringtoparse,$sepexp) = @_;
                    308:     my @a=split(/$sepexp/,$stringtoparse);
                    309:     return \@a;
                    310: }
                    311: 
                    312: # --------------------------------------------------------- Format piml section
                    313: sub format_piml {
                    314:     my (@tokeninfo)=@_;
                    315:     my $date=`date`; chop $date;
1.2       harris41  316:     $piml=<<END;
                    317: #!/usr/bin/perl
1.1       harris41  318: 
1.2       harris41  319: # Generated from a PIML (Post Installation Markup Language) document
1.1       harris41  320: 
                    321: END
                    322: }
                    323: # --------------------------------------------------- Format targetroot section
                    324: sub format_targetroot {
                    325:     my $text=&trim($parser->get_text('/targetroot'));
                    326:     $text=$targetroot if $targetroot;
                    327:     $parser->get_tag('/targetroot');
1.6     ! harris41  328:     return('# TARGET INSTALL LOCATION is "'.$targetroot."\"\n");
1.2       harris41  329: }
                    330: # -------------------------------------------------- Format perl script section
                    331: sub format_perlscript {
                    332:     my (@tokeninfo)=@_;
1.5       harris41  333:     $mode=$tokeninfo[2]->{'mode'};
1.2       harris41  334:     my $text=$parser->get_text('/perlscript');
                    335:     $parser->get_tag('/perlscript');
1.5       harris41  336:     if ($mode eq 'bg') {
1.6     ! harris41  337: 	open(OUT,">/tmp/piml$$.pl");
        !           338: 	print(OUT $text);
        !           339: 	close(OUT);
        !           340: 	return(<<END);
1.5       harris41  341: 	# launch background process for $target
                    342: 	system("perl /tmp/piml$$.pl &");
                    343: END
                    344:     }
                    345:     else {
1.6     ! harris41  346: 	return($text);
1.5       harris41  347:     }
1.1       harris41  348: }
1.2       harris41  349: # --------------------------------------------------------------- Format TARGET
                    350: sub format_TARGET {
                    351:     my (@tokeninfo)=@_;
                    352:     $parser->get_tag('/TARGET');
1.6     ! harris41  353:     return($target);
1.1       harris41  354: }
                    355: # --------------------------------------------------- Format categories section
                    356: sub format_categories {
                    357:     my $text=&trim($parser->get_text('/categories'));
                    358:     $parser->get_tag('/categories');
1.6     ! harris41  359:     return('# CATEGORIES'."\n".$text);
1.1       harris41  360: }
                    361: # --------------------------------------------------- Format categories section
                    362: sub format_category {
                    363:     my (@tokeninfo)=@_;
                    364:     $category_att_name=$tokeninfo[2]->{'name'};
                    365:     $category_att_type=$tokeninfo[2]->{'type'};
                    366:     $abbreviation=''; $chmod='';$chown='';
                    367:     $parser->get_text('/category');
                    368:     $parser->get_tag('/category');
                    369:     $fab{$category_att_name}=$abbreviation;
1.2       harris41  370:     if ($category_att_type eq $categorytype) {
                    371: 	my ($user,$group)=split(/\:/,$chown);
                    372: 	$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
                    373: 	    ' -m '.$chmod;
1.1       harris41  374:     }
1.6     ! harris41  375:     return('');
1.1       harris41  376: }
                    377: # --------------------------------------------------- Format categories section
                    378: sub format_abbreviation {
                    379:     my @tokeninfo=@_;
                    380:     $abbreviation='';
                    381:     my $text=&trim($parser->get_text('/abbreviation'));
                    382:     if ($text) {
                    383: 	$parser->get_tag('/abbreviation');
                    384: 	$abbreviation=$text;
                    385:     }
1.6     ! harris41  386:     return('');
1.1       harris41  387: }
                    388: # -------------------------------------------------------- Format chown section
                    389: sub format_chown {
                    390:     my @tokeninfo=@_;
                    391:     $chown='';
                    392:     my $text=&trim($parser->get_text('/chown'));
                    393:     if ($text) {
                    394: 	$parser->get_tag('/chown');
                    395: 	$chown=$text;
                    396:     }
1.6     ! harris41  397:     return('');
1.1       harris41  398: }
                    399: # -------------------------------------------------------- Format chmod section
                    400: sub format_chmod {
                    401:     my @tokeninfo=@_;
                    402:     $chmod='';
                    403:     my $text=&trim($parser->get_text('/chmod'));
                    404:     if ($text) {
                    405: 	$parser->get_tag('/chmod');
                    406: 	$chmod=$text;
                    407:     }
1.6     ! harris41  408:     return('');
1.1       harris41  409: }
                    410: # ------------------------------------------------- Format categoryname section
                    411: sub format_categoryname {
                    412:     my @tokeninfo=@_;
                    413:     $categoryname='';
                    414:     my $text=&trim($parser->get_text('/categoryname'));
                    415:     if ($text) {
                    416: 	$parser->get_tag('/categoryname');
                    417: 	$categoryname=$text;
                    418:     }
1.6     ! harris41  419:     return('');
1.1       harris41  420: }
                    421: # -------------------------------------------------------- Format files section
                    422: sub format_files {
                    423:     my $text=$parser->get_text('/files');
                    424:     $parser->get_tag('/files');
1.6     ! harris41  425:     return("\n".'# There are '.$file_count.' files this script works on'.
        !           426: 	"\n\n".$text);
1.1       harris41  427: }
                    428: # --------------------------------------------------------- Format file section
                    429: sub format_file {
                    430:     my @tokeninfo=@_;
                    431:     $file=''; $source=''; $target=''; $categoryname=''; $description='';
                    432:     $note=''; $build=''; $status=''; $dependencies='';
                    433:     my $text=&trim($parser->get_text('/file'));
                    434:     $file_count++;
                    435:     $categorycount{$categoryname}++;
1.2       harris41  436:     $parser->get_tag('/file');
1.6     ! harris41  437:     return("# File: $target\n".
        !           438: 	"$text\n");
1.1       harris41  439: }
                    440: # ------------------------------------------------------- Format target section
                    441: sub format_target {
                    442:     my @tokeninfo=@_;
                    443:     $target='';
                    444:     my $text=&trim($parser->get_text('/target'));
                    445:     if ($text) {
                    446: 	$parser->get_tag('/target');
1.2       harris41  447: 	$target=$targetrootarg.$text;
1.1       harris41  448:     }
1.6     ! harris41  449:     return('');
1.1       harris41  450: }
                    451: # --------------------------------------------------------- Format note section
                    452: sub format_note {
                    453:     my @tokeninfo=@_;
                    454:     $note='';
                    455:     my $aref;
                    456:     my $text;
                    457:     while ($aref=$parser->get_token()) {
                    458: 	if ($aref->[0] eq 'E' && $aref->[1] eq 'note') {
                    459: 	    last;
                    460: 	}
                    461: 	elsif ($aref->[0] eq 'S') {
                    462: 	    $text.=$aref->[4];
                    463: 	}
                    464: 	elsif ($aref->[0] eq 'E') {
                    465: 	    $text.=$aref->[2];
                    466: 	}
                    467: 	else {
                    468: 	    $text.=$aref->[1];
                    469: 	}
                    470:     }
                    471:     if ($text) {
                    472: 	$note=$text;
                    473:     }
1.6     ! harris41  474:     return('');
1.1       harris41  475: }
                    476: # ------------------------------------------------- Format dependencies section
                    477: sub format_dependencies {
                    478:     my @tokeninfo=@_;
                    479:     $dependencies='';
                    480:     my $text=&trim($parser->get_text('/dependencies'));
                    481:     if ($text) {
                    482: 	$parser->get_tag('/dependencies');
                    483: 	$dependencies=join(';',
                    484: 			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
                    485:     }
1.6     ! harris41  486:     return('');
1.1       harris41  487: }
                    488: # ------------------------------------------------ Format specialnotice section
                    489: sub format_specialnotices {
                    490:     $parser->get_tag('/specialnotices');
1.6     ! harris41  491:     return('');
1.1       harris41  492: }
                    493: # ------------------------------------------------ Format specialnotice section
                    494: sub format_specialnotice {
                    495:     $parser->get_tag('/specialnotice');
1.6     ! harris41  496:     return('');
1.1       harris41  497: }
                    498: # ------------------------------------- Render less-than and greater-than signs
                    499: sub htmlsafe {
                    500:     my $text=@_[0];
                    501:     $text =~ s/</&lt;/g;
                    502:     $text =~ s/>/&gt;/g;
1.6     ! harris41  503:     return($text);
1.1       harris41  504: }
                    505: # --------------------------------------- remove starting and ending whitespace
                    506: sub trim {
1.6     ! harris41  507:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return($s);
        !           508: }
1.1       harris41  509: 
                    510: # ----------------------------------- POD (plain old documentation, CPAN style)
                    511: 
1.6     ! harris41  512: =pod
        !           513: 
1.1       harris41  514: =head1 NAME
                    515: 
                    516: piml_parse.pl - This is meant to parse files meeting the piml document type.
1.2       harris41  517: See piml.dtd.  PIML=Post Installation Markup Language.
1.1       harris41  518: 
                    519: =head1 SYNOPSIS
                    520: 
                    521: Usage is for piml file to come in through standard input.
                    522: 
                    523: =over 4
                    524: 
                    525: =item * 
                    526: 
1.2       harris41  527: 1st argument is the category permissions to use (runtime or development)
1.1       harris41  528: 
                    529: =item *
                    530: 
1.2       harris41  531: 2nd argument is the distribution
1.1       harris41  532: (default,redhat6.2,debian2.2,redhat7.1,etc).
                    533: 
                    534: =item *
                    535: 
1.2       harris41  536: 3rd argument is to manually specify a targetroot.
1.1       harris41  537: 
                    538: =back
                    539: 
                    540: Only the 1st argument is mandatory for the program to run.
                    541: 
                    542: Example:
                    543: 
                    544: cat ../../doc/loncapafiles.piml |\\
                    545: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install
                    546: 
                    547: =head1 DESCRIPTION
                    548: 
                    549: I am using a multiple pass-through approach to parsing
                    550: the piml file.  This saves memory and makes sure the server
                    551: will never be overloaded.
                    552: 
                    553: =head1 README
                    554: 
                    555: I am using a multiple pass-through approach to parsing
                    556: the piml file.  This saves memory and makes sure the server
                    557: will never be overloaded.
                    558: 
                    559: =head1 PREREQUISITES
                    560: 
                    561: HTML::TokeParser
                    562: 
                    563: =head1 COREQUISITES
                    564: 
                    565: =head1 OSNAMES
                    566: 
                    567: linux
                    568: 
                    569: =head1 SCRIPT CATEGORIES
                    570: 
                    571: Packaging/Administrative
1.6     ! harris41  572: 
        !           573: =head1 AUTHOR
        !           574: 
        !           575:  Scott Harrison
        !           576:  codeharrison@yahoo.com
        !           577: 
        !           578: Please let me know how/if you are finding this script useful and
        !           579: any/all suggestions.  -Scott
1.1       harris41  580: 
                    581: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>