nsdl/build/piml_parse.pl - view

File: [LON-CAPA] / nsdl / build / piml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Sun May 5 02:44:57 2002 UTC (22 years ago) by harris41
Branches: MAIN
CVS tags: HEAD

setting up the LPML toolset from http://lpml.sourceforge.net/

1: #!/usr/bin/perl 2: 3: # -------------------------------------------------------- Documentation notice 4: # Run "perldoc ./lpml_parse.pl" in order to best view the software 5: # documentation internalized in this program. 6: 7: # --------------------------------------------------------- License Information 8: # The LearningOnline Network with CAPA 9: # piml_parse.pl - Linux Packaging Markup Language parser 10: # 11: # $Id: piml_parse.pl,v 1.1 2002/05/05 02:44:57 harris41 Exp $ 12: # 13: # Written by Scott Harrison, codeharrison@yahoo.com 14: # 15: # Copyright Michigan State University Board of Trustees 16: # 17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA). 18: # 19: # LON-CAPA is free software; you can redistribute it and/or modify 20: # it under the terms of the GNU General Public License as published by 21: # the Free Software Foundation; either version 2 of the License, or 22: # (at your option) any later version. 23: # 24: # LON-CAPA is distributed in the hope that it will be useful, 25: # but WITHOUT ANY WARRANTY; without even the implied warranty of 26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27: # GNU General Public License for more details. 28: # 29: # You should have received a copy of the GNU General Public License 30: # along with LON-CAPA; if not, write to the Free Software 31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 32: # 33: # /home/httpd/html/adm/gpl.txt 34: # 35: # http://www.lon-capa.org/ 36: # 37: # YEAR=2002 38: # 1/28,1/29,1/30,1/31,2/5,4/8 - Scott Harrison 39: # 40: ### 41: 42: ############################################################################### 43: ## ## 44: ## ORGANIZATION OF THIS PERL SCRIPT ## 45: ## 1. Notes ## 46: ## 2. Get command line arguments ## 47: ## 3. First pass through (grab distribution-specific information) ## 48: ## 4. Second pass through (parse out what is not necessary) ## 49: ## 5. Third pass through (translate markup according to specified mode) ## 50: ## 6. Functions (most all just format contents of different markup tags) ## 51: ## 7. POD (plain old documentation, CPAN style) ## 52: ## ## 53: ############################################################################### 54: 55: # ----------------------------------------------------------------------- Notes 56: # 57: # I am using a multiple pass-through approach to parsing 58: # the piml file. This saves memory and makes sure the server 59: # will never be overloaded. 60: # 61: # This is meant to parse files meeting the piml document type. 62: # See piml.dtd. PIML=Post Installation Markup Language. 63: 64: use HTML::TokeParser; 65: 66: my $usage=(<<END); 67: **** ERROR ERROR ERROR ERROR **** 68: Usage is for piml file to come in through standard input. 69: 1st argument is the category permissions to use (runtime or development) 70: 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc). 71: 3rd argument is to manually specify a targetroot 72: 73: Only the 1st argument is mandatory for the program to run. 74: 75: Example: 76: 77: cat ../../doc/loncapafiles.piml |\\ 78: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install 79: END 80: 81: # ------------------------------------------------- Grab command line arguments 82: 83: my $mode; 84: if (@ARGV==3) { 85: $mode = shift @ARGV; 86: } 87: else { 88: @ARGV=();shift @ARGV; 89: while(<>){} # throw away the input to avoid broken pipes 90: print $usage; 91: exit -1; # exit with error status 92: } 93: 94: my $categorytype; 95: if (@ARGV) { 96: $categorytype = shift @ARGV; 97: } 98: 99: my $dist; 100: if (@ARGV) { 101: $dist = shift @ARGV; 102: } 103: 104: my $targetroot; 105: my $targetrootarg; 106: if (@ARGV) { 107: $targetroot = shift @ARGV; 108: } 109: $targetroot=~s/\/$//; 110: $targetrootarg=$targetroot; 111: 112: my $logcmd='| tee -a WARNINGS'; 113: 114: my $invocation; 115: # --------------------------------------------------- Record program invocation 116: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { 117: $invocation=(<<END); 118: # Invocation: STDINPUT | piml_parse.pl 119: # 1st argument (category type) is: $categorytype 120: # 2nd argument (distribution) is: $dist 121: # 3rd argument (targetroot) is: described below 122: END 123: } 124: 125: # ---------------------------------------------------- Start first pass through 126: my @parsecontents = <>; 127: my $parsestring = join('',@parsecontents); 128: my $outstring=''; 129: 130: # Need to make a pass through and figure out what defaults are 131: # overrided. Top-down overriding strategy (leaves don't know 132: # about distant leaves). 133: 134: my @hierarchy; 135: $hierarchy[0]=0; 136: my $hloc=0; 137: my $token=''; 138: $parser = HTML::TokeParser->new(\$parsestring) or 139: die('can\'t create TokeParser object'); 140: $parser->xml_mode('1'); 141: my %hash; 142: my $key; 143: while ($token = $parser->get_token()) { 144: if ($token->[0] eq 'S') { 145: $hloc++; 146: $hierarchy[$hloc]++; 147: $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); 148: my $thisdist=' '.$token->[2]{'dist'}.' '; 149: if ($thisdist eq ' default ') { 150: $hash{$key}=1; # there is a default setting for this key 151: } 152: elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { 153: $hash{$key}=2; # disregard default setting for this key if 154: # there is a directly requested distribution match 155: } 156: } 157: if ($token->[0] eq 'E') { 158: $hloc--; 159: } 160: } 161: 162: # --------------------------------------------------- Start second pass through 163: undef $hloc; 164: undef @hierarchy; 165: undef $parser; 166: $hierarchy[0]=0; 167: $parser = HTML::TokeParser->new(\$parsestring) or 168: die('can\'t create TokeParser object'); 169: $parser->xml_mode('1'); 170: my $cleanstring; 171: while ($token = $parser->get_token()) { 172: if ($token->[0] eq 'S') { 173: $hloc++; 174: $hierarchy[$hloc]++; 175: $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); 176: my $thisdist=' '.$token->[2]{'dist'}.' '; 177: # This conditional clause is set up to ignore two sets 178: # of invalid conditions before accepting entry into 179: # the cleanstring. 180: if ($hash{$key}==2 and 181: !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { 182: if ($token->[4]!~/\/>$/) { 183: $parser->get_tag('/'.$token->[1]); 184: $hloc--; 185: } 186: } 187: elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and 188: !($thisdist eq ' default ' and $hash{$key}!=2)) { 189: if ($token->[4]!~/\/>$/) { 190: $parser->get_tag('/'.$token->[1]); 191: $hloc--; 192: } 193: } 194: else { 195: $cleanstring.=$token->[4]; 196: } 197: if ($token->[4]=~/\/>$/) { 198: # $hloc--; 199: } 200: } 201: if ($token->[0] eq 'E') { 202: $cleanstring.=$token->[2]; 203: $hloc--; 204: } 205: if ($token->[0] eq 'T') { 206: $cleanstring.=$token->[1]; 207: } 208: } 209: $cleanstring=&trim($cleanstring); 210: $cleanstring=~s/\>\s*\n\s*\</\>\</g; 211: 212: # ---------------------------------------------------- Start final pass through 213: 214: # storage variables 215: my $piml; 216: my $categories; 217: my @categorynamelist; 218: my $category; 219: my $category_att_name; 220: my $category_att_type; 221: my $chown; 222: my $chmod; 223: my $abbreviation; # space-free abbreviation; esp. for image names 224: my $categoryname; 225: my $description; 226: my $files; 227: my $file; 228: my $target; 229: my $note; 230: my $commands; 231: my $command; 232: my $dependencies; 233: my @links; 234: my %categoryhash; 235: my $dpathlength; 236: my %fab; # file category abbreviation 237: my $directory_count; 238: my $file_count; 239: my $link_count; 240: my $fileglob_count; 241: my $fileglobnames_count; 242: my %categorycount; 243: 244: my $mode; 245: 246: my @buildall; 247: my @buildinfo; 248: 249: my @configall; 250: 251: # Make new parser with distribution specific input 252: undef($parser); 253: $parser = HTML::TokeParser->new(\$cleanstring) or 254: die('can\'t create TokeParser object'); 255: $parser->xml_mode('1'); 256: 257: # Define handling methods for mode-dependent text rendering 258: 259: $parser->{textify}={ 260: specialnotices => \&format_specialnotices, 261: specialnotice => \&format_specialnotice, 262: targetroot => \&format_targetroot, 263: categories => \&format_categories, 264: category => \&format_category, 265: abbreviation => \&format_abbreviation, 266: chown => \&format_chown, 267: chmod => \&format_chmod, 268: categoryname => \&format_categoryname, 269: files => \&format_files, 270: file => \&format_file, 271: target => \&format_target, 272: note => \&format_note, 273: build => \&format_build, 274: dependencies => \&format_dependencies, 275: filenames => \&format_filenames, 276: perlscript => \&format_perlscript, 277: TARGET => \&format_TARGET, 278: }; 279: 280: my $text; 281: my $token; 282: undef($hloc); 283: undef(@hierarchy); 284: my $hloc; 285: my @hierarchy2; 286: while ($token = $parser->get_tag('piml')) { 287: &format_piml(@{$token}); 288: $text = &trim($parser->get_text('/piml')); 289: $token = $parser->get_tag('/piml'); 290: print($piml); 291: print("\n"); 292: print($text); 293: print("\n"); 294: print(&end()); 295: } 296: exit(0); 297: 298: # ---------- Functions (most all just format contents of different markup tags) 299: 300: # ------------------------ Final output at end of markup parsing and formatting 301: sub end { 302: 303: } 304: 305: # ----------------------- Take in string to parse and the separation expression 306: sub extract_array { 307: my ($stringtoparse,$sepexp) = @_; 308: my @a=split(/$sepexp/,$stringtoparse); 309: return \@a; 310: } 311: 312: # --------------------------------------------------------- Format piml section 313: sub format_piml { 314: my (@tokeninfo)=@_; 315: my $date=`date`; chop $date; 316: $piml=<<END; 317: #!/usr/bin/perl 318: 319: # Generated from a PIML (Post Installation Markup Language) document 320: 321: END 322: } 323: # --------------------------------------------------- Format targetroot section 324: sub format_targetroot { 325: my $text=&trim($parser->get_text('/targetroot')); 326: $text=$targetroot if $targetroot; 327: $parser->get_tag('/targetroot'); 328: return('# TARGET INSTALL LOCATION is "'.$targetroot."\"\n"); 329: } 330: # -------------------------------------------------- Format perl script section 331: sub format_perlscript { 332: my (@tokeninfo)=@_; 333: $mode=$tokeninfo[2]->{'mode'}; 334: my $text=$parser->get_text('/perlscript'); 335: $parser->get_tag('/perlscript'); 336: if ($mode eq 'bg') { 337: open(OUT,">/tmp/piml$$.pl"); 338: print(OUT $text); 339: close(OUT); 340: return(<<END); 341: # launch background process for $target 342: system("perl /tmp/piml$$.pl &"); 343: END 344: } 345: else { 346: return($text); 347: } 348: } 349: # --------------------------------------------------------------- Format TARGET 350: sub format_TARGET { 351: my (@tokeninfo)=@_; 352: $parser->get_tag('/TARGET'); 353: return($target); 354: } 355: # --------------------------------------------------- Format categories section 356: sub format_categories { 357: my $text=&trim($parser->get_text('/categories')); 358: $parser->get_tag('/categories'); 359: return('# CATEGORIES'."\n".$text); 360: } 361: # --------------------------------------------------- Format categories section 362: sub format_category { 363: my (@tokeninfo)=@_; 364: $category_att_name=$tokeninfo[2]->{'name'}; 365: $category_att_type=$tokeninfo[2]->{'type'}; 366: $abbreviation=''; $chmod='';$chown=''; 367: $parser->get_text('/category'); 368: $parser->get_tag('/category'); 369: $fab{$category_att_name}=$abbreviation; 370: if ($category_att_type eq $categorytype) { 371: my ($user,$group)=split(/\:/,$chown); 372: $categoryhash{$category_att_name}='-o '.$user.' -g '.$group. 373: ' -m '.$chmod; 374: } 375: return(''); 376: } 377: # --------------------------------------------------- Format categories section 378: sub format_abbreviation { 379: my @tokeninfo=@_; 380: $abbreviation=''; 381: my $text=&trim($parser->get_text('/abbreviation')); 382: if ($text) { 383: $parser->get_tag('/abbreviation'); 384: $abbreviation=$text; 385: } 386: return(''); 387: } 388: # -------------------------------------------------------- Format chown section 389: sub format_chown { 390: my @tokeninfo=@_; 391: $chown=''; 392: my $text=&trim($parser->get_text('/chown')); 393: if ($text) { 394: $parser->get_tag('/chown'); 395: $chown=$text; 396: } 397: return(''); 398: } 399: # -------------------------------------------------------- Format chmod section 400: sub format_chmod { 401: my @tokeninfo=@_; 402: $chmod=''; 403: my $text=&trim($parser->get_text('/chmod')); 404: if ($text) { 405: $parser->get_tag('/chmod'); 406: $chmod=$text; 407: } 408: return(''); 409: } 410: # ------------------------------------------------- Format categoryname section 411: sub format_categoryname { 412: my @tokeninfo=@_; 413: $categoryname=''; 414: my $text=&trim($parser->get_text('/categoryname')); 415: if ($text) { 416: $parser->get_tag('/categoryname'); 417: $categoryname=$text; 418: } 419: return(''); 420: } 421: # -------------------------------------------------------- Format files section 422: sub format_files { 423: my $text=$parser->get_text('/files'); 424: $parser->get_tag('/files'); 425: return("\n".'# There are '.$file_count.' files this script works on'. 426: "\n\n".$text); 427: } 428: # --------------------------------------------------------- Format file section 429: sub format_file { 430: my @tokeninfo=@_; 431: $file=''; $source=''; $target=''; $categoryname=''; $description=''; 432: $note=''; $build=''; $status=''; $dependencies=''; 433: my $text=&trim($parser->get_text('/file')); 434: $file_count++; 435: $categorycount{$categoryname}++; 436: $parser->get_tag('/file'); 437: return("# File: $target\n". 438: "$text\n"); 439: } 440: # ------------------------------------------------------- Format target section 441: sub format_target { 442: my @tokeninfo=@_; 443: $target=''; 444: my $text=&trim($parser->get_text('/target')); 445: if ($text) { 446: $parser->get_tag('/target'); 447: $target=$targetrootarg.$text; 448: } 449: return(''); 450: } 451: # --------------------------------------------------------- Format note section 452: sub format_note { 453: my @tokeninfo=@_; 454: $note=''; 455: my $aref; 456: my $text; 457: while ($aref=$parser->get_token()) { 458: if ($aref->[0] eq 'E' && $aref->[1] eq 'note') { 459: last; 460: } 461: elsif ($aref->[0] eq 'S') { 462: $text.=$aref->[4]; 463: } 464: elsif ($aref->[0] eq 'E') { 465: $text.=$aref->[2]; 466: } 467: else { 468: $text.=$aref->[1]; 469: } 470: } 471: if ($text) { 472: $note=$text; 473: } 474: return(''); 475: } 476: # ------------------------------------------------- Format dependencies section 477: sub format_dependencies { 478: my @tokeninfo=@_; 479: $dependencies=''; 480: my $text=&trim($parser->get_text('/dependencies')); 481: if ($text) { 482: $parser->get_tag('/dependencies'); 483: $dependencies=join(';', 484: (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text))); 485: } 486: return(''); 487: } 488: # ------------------------------------------------ Format specialnotice section 489: sub format_specialnotices { 490: $parser->get_tag('/specialnotices'); 491: return(''); 492: } 493: # ------------------------------------------------ Format specialnotice section 494: sub format_specialnotice { 495: $parser->get_tag('/specialnotice'); 496: return(''); 497: } 498: # ------------------------------------- Render less-than and greater-than signs 499: sub htmlsafe { 500: my $text=@_[0]; 501: $text =~ s/</</g; 502: $text =~ s/>/>/g; 503: return($text); 504: } 505: # --------------------------------------- remove starting and ending whitespace 506: sub trim { 507: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return($s); 508: } 509: 510: # ----------------------------------- POD (plain old documentation, CPAN style) 511: 512: =pod 513: 514: =head1 NAME 515: 516: piml_parse.pl - This is meant to parse piml files (Post Installation Markup Language) 517: 518: =head1 SYNOPSIS 519: 520: Usage is for piml file to come in through standard input. 521: 522: =over 4 523: 524: =item * 525: 526: 1st argument is the category permissions to use (runtime or development) 527: 528: =item * 529: 530: 2nd argument is the distribution 531: (default,redhat6.2,debian2.2,redhat7.1,etc). 532: 533: =item * 534: 535: 3rd argument is to manually specify a targetroot. 536: 537: =back 538: 539: Only the 1st argument is mandatory for the program to run. 540: 541: Example: 542: 543: cat ../../doc/loncapafiles.piml |\\ 544: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install 545: 546: =head1 DESCRIPTION 547: 548: I am using a multiple pass-through approach to parsing 549: the piml file. This saves memory and makes sure the server 550: will never be overloaded. 551: 552: =head1 README 553: 554: I am using a multiple pass-through approach to parsing 555: the piml file. This saves memory and makes sure the server 556: will never be overloaded. 557: 558: =head1 PREREQUISITES 559: 560: HTML::TokeParser 561: 562: =head1 COREQUISITES 563: 564: =head1 OSNAMES 565: 566: linux 567: 568: =head1 SCRIPT CATEGORIES 569: 570: Packaging/Administrative 571: 572: =head1 AUTHOR 573: 574: Scott Harrison 575: codeharrison@yahoo.com 576: 577: Please let me know how/if you are finding this script useful and 578: any/all suggestions. -Scott 579: 580: =cut