Annotation of loncom/build/piml_parse.pl, revision 1.4

1.1       harris41    1: #!/usr/bin/perl
                      2: 
                      3: # The LearningOnline Network with CAPA
                      4: # piml_parse.pl - Linux Packaging Markup Language parser
                      5: #
1.4     ! harris41    6: # $Id: piml_parse.pl,v 1.3 2002/01/31 17:37:19 harris41 Exp $
1.1       harris41    7: #
                      8: # Written by Scott Harrison, harris41@msu.edu
                      9: #
                     10: # Copyright Michigan State University Board of Trustees
                     11: #
                     12: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     13: #
                     14: # LON-CAPA is free software; you can redistribute it and/or modify
                     15: # it under the terms of the GNU General Public License as published by
                     16: # the Free Software Foundation; either version 2 of the License, or
                     17: # (at your option) any later version.
                     18: #
                     19: # LON-CAPA is distributed in the hope that it will be useful,
                     20: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     21: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     22: # GNU General Public License for more details.
                     23: #
                     24: # You should have received a copy of the GNU General Public License
                     25: # along with LON-CAPA; if not, write to the Free Software
                     26: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     27: #
                     28: # /home/httpd/html/adm/gpl.txt
                     29: #
                     30: # http://www.lon-capa.org/
                     31: #
                     32: # YEAR=2002
1.3       harris41   33: # 1/28,1/29,1/30,1/31 - Scott Harrison
1.1       harris41   34: #
                     35: ###
                     36: 
                     37: ###############################################################################
                     38: ##                                                                           ##
                     39: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     40: ## 1. Notes                                                                  ##
                     41: ## 2. Get command line arguments                                             ##
                     42: ## 3. First pass through (grab distribution-specific information)            ##
                     43: ## 4. Second pass through (parse out what is not necessary)                  ##
                     44: ## 5. Third pass through (translate markup according to specified mode)      ##
                     45: ## 6. Functions (most all just format contents of different markup tags)     ##
                     46: ## 7. POD (plain old documentation, CPAN style)                              ##
                     47: ##                                                                           ##
                     48: ###############################################################################
                     49: 
                     50: # ----------------------------------------------------------------------- Notes
                     51: #
                     52: # I am using a multiple pass-through approach to parsing
                     53: # the piml file.  This saves memory and makes sure the server
                     54: # will never be overloaded.
                     55: #
                     56: # This is meant to parse files meeting the piml document type.
                     57: # See piml.dtd.  PIML=Post Installation Markup Language.
                     58: 
                     59: use HTML::TokeParser;
                     60: 
                     61: my $usage=<<END;
                     62: **** ERROR ERROR ERROR ERROR ****
                     63: Usage is for piml file to come in through standard input.
1.2       harris41   64: 1st argument is the category permissions to use (runtime or development)
                     65: 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
                     66: 3rd argument is to manually specify a targetroot
1.1       harris41   67: 
                     68: Only the 1st argument is mandatory for the program to run.
                     69: 
                     70: Example:
                     71: 
                     72: cat ../../doc/loncapafiles.piml |\\
                     73: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install
                     74: END
                     75: 
                     76: # ------------------------------------------------- Grab command line arguments
                     77: 
                     78: my $mode;
1.3       harris41   79: if (@ARGV==3) {
1.1       harris41   80:     $mode = shift @ARGV;
                     81: }
                     82: else {
                     83:     @ARGV=();shift @ARGV;
                     84:     while(<>){} # throw away the input to avoid broken pipes
                     85:     print $usage;
                     86:     exit -1; # exit with error status
                     87: }
                     88: 
                     89: my $categorytype;
                     90: if (@ARGV) {
                     91:     $categorytype = shift @ARGV;
                     92: }
                     93: 
                     94: my $dist;
                     95: if (@ARGV) {
                     96:     $dist = shift @ARGV;
                     97: }
                     98: 
                     99: my $targetroot;
                    100: my $targetrootarg;
                    101: if (@ARGV) {
                    102:     $targetroot = shift @ARGV;
                    103: }
                    104: $targetroot=~s/\/$//;
                    105: $targetrootarg=$targetroot;
                    106: 
                    107: my $logcmd='| tee -a WARNINGS';
                    108: 
                    109: my $invocation;
                    110: # --------------------------------------------------- Record program invocation
                    111: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
                    112:     $invocation=(<<END);
                    113: # Invocation: STDINPUT | piml_parse.pl
1.2       harris41  114: #             1st argument (category type) is: $categorytype
                    115: #             2nd argument (distribution) is: $dist
                    116: #             3rd argument (targetroot) is: described below
1.1       harris41  117: END
                    118: }
                    119: 
                    120: # ---------------------------------------------------- Start first pass through
                    121: my @parsecontents = <>;
                    122: my $parsestring = join('',@parsecontents);
                    123: my $outstring;
                    124: 
                    125: # Need to make a pass through and figure out what defaults are
                    126: # overrided.  Top-down overriding strategy (leaves don't know
                    127: # about distant leaves).
                    128: 
                    129: my @hierarchy;
                    130: $hierarchy[0]=0;
                    131: my $hloc=0;
                    132: my $token;
                    133: $parser = HTML::TokeParser->new(\$parsestring) or
                    134:     die('can\'t create TokeParser object');
                    135: $parser->xml_mode('1');
                    136: my %hash;
                    137: my $key;
                    138: while ($token = $parser->get_token()) {
                    139:     if ($token->[0] eq 'S') {
                    140: 	$hloc++;
                    141: 	$hierarchy[$hloc]++;
                    142: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    143: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    144: 	if ($thisdist eq ' default ') {
                    145: 	    $hash{$key}=1; # there is a default setting for this key
                    146: 	}
                    147: 	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
                    148: 	    $hash{$key}=2; # disregard default setting for this key if
                    149: 	                   # there is a directly requested distribution match
                    150: 	}
                    151:     }
                    152:     if ($token->[0] eq 'E') {
                    153: 	$hloc--;
                    154:     }
                    155: }
                    156: 
                    157: # --------------------------------------------------- Start second pass through
                    158: undef $hloc;
                    159: undef @hierarchy;
                    160: undef $parser;
                    161: $hierarchy[0]=0;
                    162: $parser = HTML::TokeParser->new(\$parsestring) or
                    163:     die('can\'t create TokeParser object');
                    164: $parser->xml_mode('1');
                    165: my $cleanstring;
                    166: while ($token = $parser->get_token()) {
                    167:     if ($token->[0] eq 'S') {
                    168: 	$hloc++;
                    169: 	$hierarchy[$hloc]++;
                    170: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    171: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    172: 	# This conditional clause is set up to ignore two sets
                    173: 	# of invalid conditions before accepting entry into
                    174: 	# the cleanstring.
                    175: 	if ($hash{$key}==2 and
                    176: 	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
                    177: 	    if ($token->[4]!~/\/>$/) {
                    178: 		$parser->get_tag('/'.$token->[1]);
                    179: 		$hloc--;
                    180: 	    }
                    181: 	}
                    182: 	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
                    183: 	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
                    184: 	    if ($token->[4]!~/\/>$/) {
                    185: 		$parser->get_tag('/'.$token->[1]);
                    186: 		$hloc--;
                    187: 	    }
                    188: 	}
                    189: 	else {
                    190: 	    $cleanstring.=$token->[4];
                    191: 	}
                    192: 	if ($token->[4]=~/\/>$/) {
1.4     ! harris41  193: #	    $hloc--;
1.1       harris41  194: 	}
                    195:     }
                    196:     if ($token->[0] eq 'E') {
                    197: 	$cleanstring.=$token->[2];
                    198: 	$hloc--;
                    199:     }
                    200:     if ($token->[0] eq 'T') {
                    201: 	$cleanstring.=$token->[1];
                    202:     }
                    203: }
                    204: $cleanstring=&trim($cleanstring);
                    205: $cleanstring=~s/\>\s*\n\s*\</\>\</g;
                    206: 
                    207: # ---------------------------------------------------- Start final pass through
                    208: 
                    209: # storage variables
                    210: my $piml;
                    211: my $categories;
                    212: my @categorynamelist;
                    213: my $category;
                    214: my $category_att_name;
                    215: my $category_att_type;
                    216: my $chown;
                    217: my $chmod;
                    218: my $abbreviation; # space-free abbreviation; esp. for image names
                    219: my $categoryname;
                    220: my $description;
                    221: my $files;
                    222: my $file;
                    223: my $target;
                    224: my $note;
                    225: my $commands;
                    226: my $command;
                    227: my $dependencies;
                    228: my @links;
                    229: my %categoryhash;
                    230: my $dpathlength;
                    231: my %fab; # file category abbreviation
                    232: my $directory_count;
                    233: my $file_count;
                    234: my $link_count;
                    235: my $fileglob_count;
                    236: my $fileglobnames_count;
                    237: my %categorycount;
                    238: 
1.4     ! harris41  239: my $mode;
        !           240: 
1.1       harris41  241: my @buildall;
                    242: my @buildinfo;
                    243: 
                    244: my @configall;
                    245: 
                    246: # Make new parser with distribution specific input
                    247: undef $parser;
                    248: $parser = HTML::TokeParser->new(\$cleanstring) or
                    249:     die('can\'t create TokeParser object');
                    250: $parser->xml_mode('1');
                    251: 
                    252: # Define handling methods for mode-dependent text rendering
                    253: 
                    254: $parser->{textify}={
                    255:     specialnotices => \&format_specialnotices,
                    256:     specialnotice => \&format_specialnotice,
                    257:     targetroot => \&format_targetroot,
                    258:     categories => \&format_categories,
                    259:     category => \&format_category,
                    260:     abbreviation => \&format_abbreviation,
                    261:     chown => \&format_chown,
                    262:     chmod => \&format_chmod,
                    263:     categoryname => \&format_categoryname,
                    264:     files => \&format_files,
                    265:     file => \&format_file,
                    266:     target => \&format_target,
                    267:     note => \&format_note,
                    268:     build => \&format_build,
                    269:     dependencies => \&format_dependencies,
                    270:     filenames => \&format_filenames,
1.2       harris41  271:     perlscript => \&format_perlscript,
                    272:     TARGET => \&format_TARGET,
1.1       harris41  273:     };
                    274: 
                    275: my $text;
                    276: my $token;
                    277: undef $hloc;
                    278: undef @hierarchy;
                    279: my $hloc;
                    280: my @hierarchy2;
                    281: while ($token = $parser->get_tag('piml')) {
                    282:     &format_piml(@{$token});
                    283:     $text = &trim($parser->get_text('/piml'));
                    284:     $token = $parser->get_tag('/piml');
                    285:     print $piml; 
                    286:     print "\n";
                    287:     print $text;
                    288:     print "\n";
                    289:     print &end();
                    290: }
                    291: exit;
                    292: 
                    293: # ---------- Functions (most all just format contents of different markup tags)
                    294: 
                    295: # ------------------------ Final output at end of markup parsing and formatting
                    296: sub end {
1.2       harris41  297: 
1.1       harris41  298: }
                    299: 
                    300: # ----------------------- Take in string to parse and the separation expression
                    301: sub extract_array {
                    302:     my ($stringtoparse,$sepexp) = @_;
                    303:     my @a=split(/$sepexp/,$stringtoparse);
                    304:     return \@a;
                    305: }
                    306: 
                    307: # --------------------------------------------------------- Format piml section
                    308: sub format_piml {
                    309:     my (@tokeninfo)=@_;
                    310:     my $date=`date`; chop $date;
1.2       harris41  311:     $piml=<<END;
                    312: #!/usr/bin/perl
1.1       harris41  313: 
1.2       harris41  314: # Generated from a PIML (Post Installation Markup Language) document
1.1       harris41  315: 
                    316: END
                    317: }
                    318: # --------------------------------------------------- Format targetroot section
                    319: sub format_targetroot {
                    320:     my $text=&trim($parser->get_text('/targetroot'));
                    321:     $text=$targetroot if $targetroot;
                    322:     $parser->get_tag('/targetroot');
1.2       harris41  323:     return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n";
                    324: }
                    325: # -------------------------------------------------- Format perl script section
                    326: sub format_perlscript {
                    327:     my (@tokeninfo)=@_;
1.4     ! harris41  328:     $mode=$tokeninfo->[2]{'mode'};
1.2       harris41  329:     my $text=$parser->get_text('/perlscript');
                    330:     $parser->get_tag('/perlscript');
                    331:     return $text;
1.1       harris41  332: }
1.2       harris41  333: # --------------------------------------------------------------- Format TARGET
                    334: sub format_TARGET {
                    335:     my (@tokeninfo)=@_;
                    336:     $parser->get_tag('/TARGET');
                    337:     return $target;
1.1       harris41  338: }
                    339: # --------------------------------------------------- Format categories section
                    340: sub format_categories {
                    341:     my $text=&trim($parser->get_text('/categories'));
                    342:     $parser->get_tag('/categories');
1.2       harris41  343:     return '# CATEGORIES'."\n".$text;
1.1       harris41  344: }
                    345: # --------------------------------------------------- Format categories section
                    346: sub format_category {
                    347:     my (@tokeninfo)=@_;
                    348:     $category_att_name=$tokeninfo[2]->{'name'};
                    349:     $category_att_type=$tokeninfo[2]->{'type'};
                    350:     $abbreviation=''; $chmod='';$chown='';
                    351:     $parser->get_text('/category');
                    352:     $parser->get_tag('/category');
                    353:     $fab{$category_att_name}=$abbreviation;
1.2       harris41  354:     if ($category_att_type eq $categorytype) {
                    355: 	my ($user,$group)=split(/\:/,$chown);
                    356: 	$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
                    357: 	    ' -m '.$chmod;
1.1       harris41  358:     }
1.2       harris41  359:     return '';
1.1       harris41  360: }
                    361: # --------------------------------------------------- Format categories section
                    362: sub format_abbreviation {
                    363:     my @tokeninfo=@_;
                    364:     $abbreviation='';
                    365:     my $text=&trim($parser->get_text('/abbreviation'));
                    366:     if ($text) {
                    367: 	$parser->get_tag('/abbreviation');
                    368: 	$abbreviation=$text;
                    369:     }
                    370:     return '';
                    371: }
                    372: # -------------------------------------------------------- Format chown section
                    373: sub format_chown {
                    374:     my @tokeninfo=@_;
                    375:     $chown='';
                    376:     my $text=&trim($parser->get_text('/chown'));
                    377:     if ($text) {
                    378: 	$parser->get_tag('/chown');
                    379: 	$chown=$text;
                    380:     }
                    381:     return '';
                    382: }
                    383: # -------------------------------------------------------- Format chmod section
                    384: sub format_chmod {
                    385:     my @tokeninfo=@_;
                    386:     $chmod='';
                    387:     my $text=&trim($parser->get_text('/chmod'));
                    388:     if ($text) {
                    389: 	$parser->get_tag('/chmod');
                    390: 	$chmod=$text;
                    391:     }
                    392:     return '';
                    393: }
                    394: # ------------------------------------------------- Format categoryname section
                    395: sub format_categoryname {
                    396:     my @tokeninfo=@_;
                    397:     $categoryname='';
                    398:     my $text=&trim($parser->get_text('/categoryname'));
                    399:     if ($text) {
                    400: 	$parser->get_tag('/categoryname');
                    401: 	$categoryname=$text;
                    402:     }
                    403:     return '';
                    404: }
                    405: # -------------------------------------------------------- Format files section
                    406: sub format_files {
                    407:     my $text=$parser->get_text('/files');
                    408:     $parser->get_tag('/files');
1.2       harris41  409:     return "\n".'# There are '.$file_count.' files this script works on'.
                    410: 	"\n\n".$text;
1.1       harris41  411: }
                    412: # --------------------------------------------------------- Format file section
                    413: sub format_file {
                    414:     my @tokeninfo=@_;
                    415:     $file=''; $source=''; $target=''; $categoryname=''; $description='';
                    416:     $note=''; $build=''; $status=''; $dependencies='';
                    417:     my $text=&trim($parser->get_text('/file'));
                    418:     $file_count++;
                    419:     $categorycount{$categoryname}++;
1.2       harris41  420:     $parser->get_tag('/file');
                    421:     return "# File: $target\n".
                    422: 	"$text\n";
1.1       harris41  423:     return '';
                    424: }
                    425: # ------------------------------------------------------- Format target section
                    426: sub format_target {
                    427:     my @tokeninfo=@_;
                    428:     $target='';
                    429:     my $text=&trim($parser->get_text('/target'));
                    430:     if ($text) {
                    431: 	$parser->get_tag('/target');
1.2       harris41  432: 	$target=$targetrootarg.$text;
1.1       harris41  433:     }
                    434:     return '';
                    435: }
                    436: # --------------------------------------------------------- Format note section
                    437: sub format_note {
                    438:     my @tokeninfo=@_;
                    439:     $note='';
                    440:     my $aref;
                    441:     my $text;
                    442:     while ($aref=$parser->get_token()) {
                    443: 	if ($aref->[0] eq 'E' && $aref->[1] eq 'note') {
                    444: 	    last;
                    445: 	}
                    446: 	elsif ($aref->[0] eq 'S') {
                    447: 	    $text.=$aref->[4];
                    448: 	}
                    449: 	elsif ($aref->[0] eq 'E') {
                    450: 	    $text.=$aref->[2];
                    451: 	}
                    452: 	else {
                    453: 	    $text.=$aref->[1];
                    454: 	}
                    455:     }
                    456:     if ($text) {
                    457: 	$note=$text;
                    458:     }
                    459:     return '';
                    460: 
                    461: }
                    462: # ------------------------------------------------- Format dependencies section
                    463: sub format_dependencies {
                    464:     my @tokeninfo=@_;
                    465:     $dependencies='';
                    466:     my $text=&trim($parser->get_text('/dependencies'));
                    467:     if ($text) {
                    468: 	$parser->get_tag('/dependencies');
                    469: 	$dependencies=join(';',
                    470: 			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
                    471:     }
                    472:     return '';
                    473: }
                    474: # ------------------------------------------------ Format specialnotice section
                    475: sub format_specialnotices {
                    476:     $parser->get_tag('/specialnotices');
                    477:     return '';
                    478: }
                    479: # ------------------------------------------------ Format specialnotice section
                    480: sub format_specialnotice {
                    481:     $parser->get_tag('/specialnotice');
                    482:     return '';
                    483: }
                    484: # ------------------------------------- Render less-than and greater-than signs
                    485: sub htmlsafe {
                    486:     my $text=@_[0];
                    487:     $text =~ s/</&lt;/g;
                    488:     $text =~ s/>/&gt;/g;
                    489:     return $text;
                    490: }
                    491: # --------------------------------------- remove starting and ending whitespace
                    492: sub trim {
                    493:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
                    494: } 
                    495: 
                    496: # ----------------------------------- POD (plain old documentation, CPAN style)
                    497: 
                    498: =head1 NAME
                    499: 
                    500: piml_parse.pl - This is meant to parse files meeting the piml document type.
1.2       harris41  501: See piml.dtd.  PIML=Post Installation Markup Language.
1.1       harris41  502: 
                    503: =head1 SYNOPSIS
                    504: 
                    505: Usage is for piml file to come in through standard input.
                    506: 
                    507: =over 4
                    508: 
                    509: =item * 
                    510: 
1.2       harris41  511: 1st argument is the category permissions to use (runtime or development)
1.1       harris41  512: 
                    513: =item *
                    514: 
1.2       harris41  515: 2nd argument is the distribution
1.1       harris41  516: (default,redhat6.2,debian2.2,redhat7.1,etc).
                    517: 
                    518: =item *
                    519: 
1.2       harris41  520: 3rd argument is to manually specify a targetroot.
1.1       harris41  521: 
                    522: =back
                    523: 
                    524: Only the 1st argument is mandatory for the program to run.
                    525: 
                    526: Example:
                    527: 
                    528: cat ../../doc/loncapafiles.piml |\\
                    529: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install
                    530: 
                    531: =head1 DESCRIPTION
                    532: 
                    533: I am using a multiple pass-through approach to parsing
                    534: the piml file.  This saves memory and makes sure the server
                    535: will never be overloaded.
                    536: 
                    537: =head1 README
                    538: 
                    539: I am using a multiple pass-through approach to parsing
                    540: the piml file.  This saves memory and makes sure the server
                    541: will never be overloaded.
                    542: 
                    543: =head1 PREREQUISITES
                    544: 
                    545: HTML::TokeParser
                    546: 
                    547: =head1 COREQUISITES
                    548: 
                    549: =head1 OSNAMES
                    550: 
                    551: linux
                    552: 
                    553: =head1 SCRIPT CATEGORIES
                    554: 
                    555: Packaging/Administrative
                    556: 
                    557: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>