Annotation of loncom/build/piml_parse.pl, revision 1.2

1.1       harris41    1: #!/usr/bin/perl
                      2: 
                      3: # The LearningOnline Network with CAPA
                      4: # piml_parse.pl - Linux Packaging Markup Language parser
                      5: #
1.2     ! harris41    6: # $Id: piml_parse.pl,v 1.1 2002/01/29 10:43:02 harris41 Exp $
1.1       harris41    7: #
                      8: # Written by Scott Harrison, harris41@msu.edu
                      9: #
                     10: # Copyright Michigan State University Board of Trustees
                     11: #
                     12: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     13: #
                     14: # LON-CAPA is free software; you can redistribute it and/or modify
                     15: # it under the terms of the GNU General Public License as published by
                     16: # the Free Software Foundation; either version 2 of the License, or
                     17: # (at your option) any later version.
                     18: #
                     19: # LON-CAPA is distributed in the hope that it will be useful,
                     20: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     21: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     22: # GNU General Public License for more details.
                     23: #
                     24: # You should have received a copy of the GNU General Public License
                     25: # along with LON-CAPA; if not, write to the Free Software
                     26: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     27: #
                     28: # /home/httpd/html/adm/gpl.txt
                     29: #
                     30: # http://www.lon-capa.org/
                     31: #
                     32: # YEAR=2002
                     33: # 1/28 - Scott Harrison
                     34: #
                     35: ###
                     36: 
                     37: ###############################################################################
                     38: ##                                                                           ##
                     39: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     40: ## 1. Notes                                                                  ##
                     41: ## 2. Get command line arguments                                             ##
                     42: ## 3. First pass through (grab distribution-specific information)            ##
                     43: ## 4. Second pass through (parse out what is not necessary)                  ##
                     44: ## 5. Third pass through (translate markup according to specified mode)      ##
                     45: ## 6. Functions (most all just format contents of different markup tags)     ##
                     46: ## 7. POD (plain old documentation, CPAN style)                              ##
                     47: ##                                                                           ##
                     48: ###############################################################################
                     49: 
                     50: # ----------------------------------------------------------------------- Notes
                     51: #
                     52: # I am using a multiple pass-through approach to parsing
                     53: # the piml file.  This saves memory and makes sure the server
                     54: # will never be overloaded.
                     55: #
                     56: # This is meant to parse files meeting the piml document type.
                     57: # See piml.dtd.  PIML=Post Installation Markup Language.
                     58: 
                     59: use HTML::TokeParser;
                     60: 
                     61: my $usage=<<END;
                     62: **** ERROR ERROR ERROR ERROR ****
                     63: Usage is for piml file to come in through standard input.
1.2     ! harris41   64: 1st argument is the category permissions to use (runtime or development)
        !            65: 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
        !            66: 3rd argument is to manually specify a targetroot
1.1       harris41   67: 
                     68: Only the 1st argument is mandatory for the program to run.
                     69: 
                     70: Example:
                     71: 
                     72: cat ../../doc/loncapafiles.piml |\\
                     73: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install
                     74: END
                     75: 
                     76: # ------------------------------------------------- Grab command line arguments
                     77: 
                     78: my $mode;
                     79: if (@ARGV==4) {
                     80:     $mode = shift @ARGV;
                     81: }
                     82: else {
                     83:     @ARGV=();shift @ARGV;
                     84:     while(<>){} # throw away the input to avoid broken pipes
                     85:     print $usage;
                     86:     exit -1; # exit with error status
                     87: }
                     88: 
                     89: my $categorytype;
                     90: if (@ARGV) {
                     91:     $categorytype = shift @ARGV;
                     92: }
                     93: 
                     94: my $dist;
                     95: if (@ARGV) {
                     96:     $dist = shift @ARGV;
                     97: }
                     98: 
                     99: my $targetroot;
                    100: my $targetrootarg;
                    101: if (@ARGV) {
                    102:     $targetroot = shift @ARGV;
                    103: }
                    104: $targetroot=~s/\/$//;
                    105: $targetrootarg=$targetroot;
                    106: 
                    107: my $logcmd='| tee -a WARNINGS';
                    108: 
                    109: my $invocation;
                    110: # --------------------------------------------------- Record program invocation
                    111: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
                    112:     $invocation=(<<END);
                    113: # Invocation: STDINPUT | piml_parse.pl
1.2     ! harris41  114: #             1st argument (category type) is: $categorytype
        !           115: #             2nd argument (distribution) is: $dist
        !           116: #             3rd argument (targetroot) is: described below
1.1       harris41  117: END
                    118: }
                    119: 
                    120: # ---------------------------------------------------- Start first pass through
                    121: my @parsecontents = <>;
                    122: my $parsestring = join('',@parsecontents);
                    123: my $outstring;
                    124: 
                    125: # Need to make a pass through and figure out what defaults are
                    126: # overrided.  Top-down overriding strategy (leaves don't know
                    127: # about distant leaves).
                    128: 
                    129: my @hierarchy;
                    130: $hierarchy[0]=0;
                    131: my $hloc=0;
                    132: my $token;
                    133: $parser = HTML::TokeParser->new(\$parsestring) or
                    134:     die('can\'t create TokeParser object');
                    135: $parser->xml_mode('1');
                    136: my %hash;
                    137: my $key;
                    138: while ($token = $parser->get_token()) {
                    139:     if ($token->[0] eq 'S') {
                    140: 	$hloc++;
                    141: 	$hierarchy[$hloc]++;
                    142: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    143: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    144: 	if ($thisdist eq ' default ') {
                    145: 	    $hash{$key}=1; # there is a default setting for this key
                    146: 	}
                    147: 	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
                    148: 	    $hash{$key}=2; # disregard default setting for this key if
                    149: 	                   # there is a directly requested distribution match
                    150: 	}
                    151:     }
                    152:     if ($token->[0] eq 'E') {
                    153: 	$hloc--;
                    154:     }
                    155: }
                    156: 
                    157: # --------------------------------------------------- Start second pass through
                    158: undef $hloc;
                    159: undef @hierarchy;
                    160: undef $parser;
                    161: $hierarchy[0]=0;
                    162: $parser = HTML::TokeParser->new(\$parsestring) or
                    163:     die('can\'t create TokeParser object');
                    164: $parser->xml_mode('1');
                    165: my $cleanstring;
                    166: while ($token = $parser->get_token()) {
                    167:     if ($token->[0] eq 'S') {
                    168: 	$hloc++;
                    169: 	$hierarchy[$hloc]++;
                    170: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    171: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    172: 	# This conditional clause is set up to ignore two sets
                    173: 	# of invalid conditions before accepting entry into
                    174: 	# the cleanstring.
                    175: 	if ($hash{$key}==2 and
                    176: 	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
                    177: 	    if ($token->[4]!~/\/>$/) {
                    178: 		$parser->get_tag('/'.$token->[1]);
                    179: 		$hloc--;
                    180: 	    }
                    181: 	}
                    182: 	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
                    183: 	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
                    184: 	    if ($token->[4]!~/\/>$/) {
                    185: 		$parser->get_tag('/'.$token->[1]);
                    186: 		$hloc--;
                    187: 	    }
                    188: 	}
                    189: 	else {
                    190: 	    $cleanstring.=$token->[4];
                    191: 	}
                    192: 	if ($token->[4]=~/\/>$/) {
                    193: 	    $hloc--;
                    194: 	}
                    195:     }
                    196:     if ($token->[0] eq 'E') {
                    197: 	$cleanstring.=$token->[2];
                    198: 	$hloc--;
                    199:     }
                    200:     if ($token->[0] eq 'T') {
                    201: 	$cleanstring.=$token->[1];
                    202:     }
                    203: }
                    204: $cleanstring=&trim($cleanstring);
                    205: $cleanstring=~s/\>\s*\n\s*\</\>\</g;
                    206: 
                    207: # ---------------------------------------------------- Start final pass through
                    208: 
                    209: # storage variables
                    210: my $piml;
                    211: my $categories;
                    212: my @categorynamelist;
                    213: my $category;
                    214: my $category_att_name;
                    215: my $category_att_type;
                    216: my $chown;
                    217: my $chmod;
                    218: my $abbreviation; # space-free abbreviation; esp. for image names
                    219: my $categoryname;
                    220: my $description;
                    221: my $files;
                    222: my $file;
                    223: my $target;
                    224: my $note;
                    225: my $commands;
                    226: my $command;
                    227: my $dependencies;
                    228: my @links;
                    229: my %categoryhash;
                    230: my $dpathlength;
                    231: my %fab; # file category abbreviation
                    232: my $directory_count;
                    233: my $file_count;
                    234: my $link_count;
                    235: my $fileglob_count;
                    236: my $fileglobnames_count;
                    237: my %categorycount;
                    238: 
                    239: my @buildall;
                    240: my @buildinfo;
                    241: 
                    242: my @configall;
                    243: 
                    244: # Make new parser with distribution specific input
                    245: undef $parser;
                    246: $parser = HTML::TokeParser->new(\$cleanstring) or
                    247:     die('can\'t create TokeParser object');
                    248: $parser->xml_mode('1');
                    249: 
                    250: # Define handling methods for mode-dependent text rendering
                    251: 
                    252: $parser->{textify}={
                    253:     specialnotices => \&format_specialnotices,
                    254:     specialnotice => \&format_specialnotice,
                    255:     targetroot => \&format_targetroot,
                    256:     categories => \&format_categories,
                    257:     category => \&format_category,
                    258:     abbreviation => \&format_abbreviation,
                    259:     chown => \&format_chown,
                    260:     chmod => \&format_chmod,
                    261:     categoryname => \&format_categoryname,
                    262:     files => \&format_files,
                    263:     file => \&format_file,
                    264:     target => \&format_target,
                    265:     note => \&format_note,
                    266:     build => \&format_build,
                    267:     dependencies => \&format_dependencies,
                    268:     filenames => \&format_filenames,
1.2     ! harris41  269:     perlscript => \&format_perlscript,
        !           270:     TARGET => \&format_TARGET,
1.1       harris41  271:     };
                    272: 
                    273: my $text;
                    274: my $token;
                    275: undef $hloc;
                    276: undef @hierarchy;
                    277: my $hloc;
                    278: my @hierarchy2;
                    279: while ($token = $parser->get_tag('piml')) {
                    280:     &format_piml(@{$token});
                    281:     $text = &trim($parser->get_text('/piml'));
                    282:     $token = $parser->get_tag('/piml');
                    283:     print $piml; 
                    284:     print "\n";
                    285:     print $text;
                    286:     print "\n";
                    287:     print &end();
                    288: }
                    289: exit;
                    290: 
                    291: # ---------- Functions (most all just format contents of different markup tags)
                    292: 
                    293: # ------------------------ Final output at end of markup parsing and formatting
                    294: sub end {
1.2     ! harris41  295: 
1.1       harris41  296: }
                    297: 
                    298: # ----------------------- Take in string to parse and the separation expression
                    299: sub extract_array {
                    300:     my ($stringtoparse,$sepexp) = @_;
                    301:     my @a=split(/$sepexp/,$stringtoparse);
                    302:     return \@a;
                    303: }
                    304: 
                    305: # --------------------------------------------------------- Format piml section
                    306: sub format_piml {
                    307:     my (@tokeninfo)=@_;
                    308:     my $date=`date`; chop $date;
1.2     ! harris41  309:     $piml=<<END;
        !           310: #!/usr/bin/perl
1.1       harris41  311: 
1.2     ! harris41  312: # Generated from a PIML (Post Installation Markup Language) document
1.1       harris41  313: 
                    314: END
                    315: }
                    316: # --------------------------------------------------- Format targetroot section
                    317: sub format_targetroot {
                    318:     my $text=&trim($parser->get_text('/targetroot'));
                    319:     $text=$targetroot if $targetroot;
                    320:     $parser->get_tag('/targetroot');
1.2     ! harris41  321:     return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n";
        !           322: }
        !           323: # -------------------------------------------------- Format perl script section
        !           324: sub format_perlscript {
        !           325:     my (@tokeninfo)=@_;
        !           326:     my $text=$parser->get_text('/perlscript');
        !           327:     $parser->get_tag('/perlscript');
        !           328:     return $text;
1.1       harris41  329: }
1.2     ! harris41  330: # --------------------------------------------------------------- Format TARGET
        !           331: sub format_TARGET {
        !           332:     my (@tokeninfo)=@_;
        !           333:     $parser->get_tag('/TARGET');
        !           334:     return $target;
1.1       harris41  335: }
                    336: # --------------------------------------------------- Format categories section
                    337: sub format_categories {
                    338:     my $text=&trim($parser->get_text('/categories'));
                    339:     $parser->get_tag('/categories');
1.2     ! harris41  340:     return '# CATEGORIES'."\n".$text;
1.1       harris41  341: }
                    342: # --------------------------------------------------- Format categories section
                    343: sub format_category {
                    344:     my (@tokeninfo)=@_;
                    345:     $category_att_name=$tokeninfo[2]->{'name'};
                    346:     $category_att_type=$tokeninfo[2]->{'type'};
                    347:     $abbreviation=''; $chmod='';$chown='';
                    348:     $parser->get_text('/category');
                    349:     $parser->get_tag('/category');
                    350:     $fab{$category_att_name}=$abbreviation;
1.2     ! harris41  351:     if ($category_att_type eq $categorytype) {
        !           352: 	my ($user,$group)=split(/\:/,$chown);
        !           353: 	$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
        !           354: 	    ' -m '.$chmod;
1.1       harris41  355:     }
1.2     ! harris41  356:     return '';
1.1       harris41  357: }
                    358: # --------------------------------------------------- Format categories section
                    359: sub format_abbreviation {
                    360:     my @tokeninfo=@_;
                    361:     $abbreviation='';
                    362:     my $text=&trim($parser->get_text('/abbreviation'));
                    363:     if ($text) {
                    364: 	$parser->get_tag('/abbreviation');
                    365: 	$abbreviation=$text;
                    366:     }
                    367:     return '';
                    368: }
                    369: # -------------------------------------------------------- Format chown section
                    370: sub format_chown {
                    371:     my @tokeninfo=@_;
                    372:     $chown='';
                    373:     my $text=&trim($parser->get_text('/chown'));
                    374:     if ($text) {
                    375: 	$parser->get_tag('/chown');
                    376: 	$chown=$text;
                    377:     }
                    378:     return '';
                    379: }
                    380: # -------------------------------------------------------- Format chmod section
                    381: sub format_chmod {
                    382:     my @tokeninfo=@_;
                    383:     $chmod='';
                    384:     my $text=&trim($parser->get_text('/chmod'));
                    385:     if ($text) {
                    386: 	$parser->get_tag('/chmod');
                    387: 	$chmod=$text;
                    388:     }
                    389:     return '';
                    390: }
                    391: # ------------------------------------------------- Format categoryname section
                    392: sub format_categoryname {
                    393:     my @tokeninfo=@_;
                    394:     $categoryname='';
                    395:     my $text=&trim($parser->get_text('/categoryname'));
                    396:     if ($text) {
                    397: 	$parser->get_tag('/categoryname');
                    398: 	$categoryname=$text;
                    399:     }
                    400:     return '';
                    401: }
                    402: # -------------------------------------------------------- Format files section
                    403: sub format_files {
                    404:     my $text=$parser->get_text('/files');
                    405:     $parser->get_tag('/files');
1.2     ! harris41  406:     return "\n".'# There are '.$file_count.' files this script works on'.
        !           407: 	"\n\n".$text;
1.1       harris41  408: }
                    409: # --------------------------------------------------------- Format file section
                    410: sub format_file {
                    411:     my @tokeninfo=@_;
                    412:     $file=''; $source=''; $target=''; $categoryname=''; $description='';
                    413:     $note=''; $build=''; $status=''; $dependencies='';
                    414:     my $text=&trim($parser->get_text('/file'));
                    415:     $file_count++;
                    416:     $categorycount{$categoryname}++;
1.2     ! harris41  417:     $parser->get_tag('/file');
        !           418:     return "# File: $target\n".
        !           419: 	"$text\n";
1.1       harris41  420:     return '';
                    421: }
                    422: # ------------------------------------------------------- Format target section
                    423: sub format_target {
                    424:     my @tokeninfo=@_;
                    425:     $target='';
                    426:     my $text=&trim($parser->get_text('/target'));
                    427:     if ($text) {
                    428: 	$parser->get_tag('/target');
1.2     ! harris41  429: 	$target=$targetrootarg.$text;
1.1       harris41  430:     }
                    431:     return '';
                    432: }
                    433: # --------------------------------------------------------- Format note section
                    434: sub format_note {
                    435:     my @tokeninfo=@_;
                    436:     $note='';
                    437:     my $aref;
                    438:     my $text;
                    439:     while ($aref=$parser->get_token()) {
                    440: 	if ($aref->[0] eq 'E' && $aref->[1] eq 'note') {
                    441: 	    last;
                    442: 	}
                    443: 	elsif ($aref->[0] eq 'S') {
                    444: 	    $text.=$aref->[4];
                    445: 	}
                    446: 	elsif ($aref->[0] eq 'E') {
                    447: 	    $text.=$aref->[2];
                    448: 	}
                    449: 	else {
                    450: 	    $text.=$aref->[1];
                    451: 	}
                    452:     }
                    453:     if ($text) {
                    454: 	$note=$text;
                    455:     }
                    456:     return '';
                    457: 
                    458: }
                    459: # ------------------------------------------------- Format dependencies section
                    460: sub format_dependencies {
                    461:     my @tokeninfo=@_;
                    462:     $dependencies='';
                    463:     my $text=&trim($parser->get_text('/dependencies'));
                    464:     if ($text) {
                    465: 	$parser->get_tag('/dependencies');
                    466: 	$dependencies=join(';',
                    467: 			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
                    468:     }
                    469:     return '';
                    470: }
                    471: # ------------------------------------------------ Format specialnotice section
                    472: sub format_specialnotices {
                    473:     $parser->get_tag('/specialnotices');
                    474:     return '';
                    475: }
                    476: # ------------------------------------------------ Format specialnotice section
                    477: sub format_specialnotice {
                    478:     $parser->get_tag('/specialnotice');
                    479:     return '';
                    480: }
                    481: # ------------------------------------- Render less-than and greater-than signs
                    482: sub htmlsafe {
                    483:     my $text=@_[0];
                    484:     $text =~ s/</&lt;/g;
                    485:     $text =~ s/>/&gt;/g;
                    486:     return $text;
                    487: }
                    488: # --------------------------------------- remove starting and ending whitespace
                    489: sub trim {
                    490:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
                    491: } 
                    492: 
                    493: # ----------------------------------- POD (plain old documentation, CPAN style)
                    494: 
                    495: =head1 NAME
                    496: 
                    497: piml_parse.pl - This is meant to parse files meeting the piml document type.
1.2     ! harris41  498: See piml.dtd.  PIML=Post Installation Markup Language.
1.1       harris41  499: 
                    500: =head1 SYNOPSIS
                    501: 
                    502: Usage is for piml file to come in through standard input.
                    503: 
                    504: =over 4
                    505: 
                    506: =item * 
                    507: 
1.2     ! harris41  508: 1st argument is the category permissions to use (runtime or development)
1.1       harris41  509: 
                    510: =item *
                    511: 
1.2     ! harris41  512: 2nd argument is the distribution
1.1       harris41  513: (default,redhat6.2,debian2.2,redhat7.1,etc).
                    514: 
                    515: =item *
                    516: 
1.2     ! harris41  517: 3rd argument is to manually specify a targetroot.
1.1       harris41  518: 
                    519: =back
                    520: 
                    521: Only the 1st argument is mandatory for the program to run.
                    522: 
                    523: Example:
                    524: 
                    525: cat ../../doc/loncapafiles.piml |\\
                    526: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install
                    527: 
                    528: =head1 DESCRIPTION
                    529: 
                    530: I am using a multiple pass-through approach to parsing
                    531: the piml file.  This saves memory and makes sure the server
                    532: will never be overloaded.
                    533: 
                    534: =head1 README
                    535: 
                    536: I am using a multiple pass-through approach to parsing
                    537: the piml file.  This saves memory and makes sure the server
                    538: will never be overloaded.
                    539: 
                    540: =head1 PREREQUISITES
                    541: 
                    542: HTML::TokeParser
                    543: 
                    544: =head1 COREQUISITES
                    545: 
                    546: =head1 OSNAMES
                    547: 
                    548: linux
                    549: 
                    550: =head1 SCRIPT CATEGORIES
                    551: 
                    552: Packaging/Administrative
                    553: 
                    554: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>