Diff for /loncom/build/lpml_parse.pl between versions 1.2 and 1.25

version 1.2, 2001/06/20 12:32:54 version 1.25, 2001/12/01 16:51:07
Line 1 Line 1
 #!/usr/bin/perl  #!/usr/bin/perl
   
 # Scott Harrison  # Scott Harrison
   # YEAR=2001
 # May 2001  # May 2001
 # 06/19/2001 - Scott Harrison  # 06/19/2001,06/20,06/24 - Scott Harrison
   # 9/5/2001,9/6,9/7,9/8 - Scott Harrison
   # 9/17,9/18 - Scott Harrison
   # 11/4,11/5,11/6,11/7,11/16,11/17 - Scott Harrison
   #
   # $Id$
   ###
   
   ###############################################################################
   ##                                                                           ##
   ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   ## 1. Notes                                                                  ##
   ## 2. Get command line arguments                                             ##
   ## 3. First pass through (grab distribution-specific information)            ##
   ## 4. Second pass through (parse out what is not necessary)                  ##
   ## 5. Third pass through (translate markup according to specified mode)      ##
   ## 6. Functions (most all just format contents of different markup tags)     ##
   ## 7. POD (plain old documentation, CPAN style)                              ##
   ##                                                                           ##
   ###############################################################################
   
   # ----------------------------------------------------------------------- Notes
   #
   # I am using a multiple pass-through approach to parsing
   # the lpml file.  This saves memory and makes sure the server
   # will never be overloaded.
   #
   # This is meant to parse files meeting the lpml document type.
   # See lpml.dtd.  LPML=Linux Packaging Markup Language.
   
 use HTML::TokeParser;  use HTML::TokeParser;
   
 my $target = shift @ARGV;  my $usage=<<END;
 my $dist = shift @ARGV;  **** ERROR ERROR ERROR ERROR ****
   Usage is for lpml file to come in through standard input.
   1st argument is the mode of parsing.
   2nd argument is the category permissions to use (runtime or development)
   3rd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
   4th argument is to manually specify a sourceroot.
   5th argument is to manually specify a targetroot.
   
   Only the 1st argument is mandatory for the program to run.
   
   Example:
   
   cat ../../doc/loncapafiles.lpml |\\
   perl lpml_parse.pl html development default /home/sherbert/loncapa /tmp/install
   END
   
   # ------------------------------------------------- Grab command line arguments
   
   my $mode;
   if (@ARGV==5) {
       $mode = shift @ARGV;
   }
   else {
       @ARGV=();shift @ARGV;
       while(<>){} # throw away the input to avoid broken pipes
       print $usage;
       exit -1; # exit with error status
   }
   
   my $categorytype;
   if (@ARGV) {
       $categorytype = shift @ARGV;
   }
   
   my $dist;
   if (@ARGV) {
       $dist = shift @ARGV;
   }
   
   my $targetroot;
   my $sourceroot;
   if (@ARGV) {
       $sourceroot = shift @ARGV;
   }
   if (@ARGV) {
       $targetroot = shift @ARGV;
   }
   $sourceroot=~s/\/$//;
   $targetroot=~s/\/$//;
   
   my $logcmd='| tee -a WARNINGS';
   
   my $invocation;
   # --------------------------------------------------- Record program invocation
   if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
       $invocation=(<<END);
   # Invocation: STDINPUT | lpml_parse.pl
   #             1st argument (mode) is: $mode
   #             2nd argument (category type) is: $categorytype
   #             3rd argument (distribution) is: $dist
   #             4th argument (targetroot) is: described below
   #             5th argument (sourceroot) is: described below
   END
   }
   
   # ---------------------------------------------------- Start first pass through
 my @parsecontents = <>;  my @parsecontents = <>;
 my $parsestring = join('',@parsecontents);  my $parsestring = join('',@parsecontents);
 my $outstring;  my $outstring;
   
 $outstring = &xmlparse($parsestring,$target,$dist);  # Need to make a pass through and figure out what defaults are
 print $outstring;  # overrided.  Top-down overriding strategy (leaves don't know
   # about distant leaves).
   
   my @hierarchy;
   $hierarchy[0]=0;
   my $hloc=0;
   my $token;
   $parser = HTML::TokeParser->new(\$parsestring) or
       die('can\'t create TokeParser object');
   $parser->xml_mode('1');
   my %hash;
   my $key;
   while ($token = $parser->get_token()) {
       if ($token->[0] eq 'S') {
    $hloc++;
    $hierarchy[$hloc]++;
    $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
    my $thisdist=' '.$token->[2]{'dist'}.' ';
    if ($thisdist eq ' default ') {
       $hash{$key}=1; # there is a default setting for this key
    }
    elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
       $hash{$key}=2; # disregard default setting for this key if
                      # there is a directly requested distribution match
    }
       }
       if ($token->[0] eq 'E') {
    $hloc--;
       }
   }
   
   # --------------------------------------------------- Start second pass through
   undef $hloc;
   undef @hierarchy;
   undef $parser;
   $hierarchy[0]=0;
   $parser = HTML::TokeParser->new(\$parsestring) or
       die('can\'t create TokeParser object');
   $parser->xml_mode('1');
   my $cleanstring;
   while ($token = $parser->get_token()) {
       if ($token->[0] eq 'S') {
    $hloc++;
    $hierarchy[$hloc]++;
    $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
    my $thisdist=' '.$token->[2]{'dist'}.' ';
    # This conditional clause is set up to ignore two sets
    # of invalid conditions before accepting entry into
    # the cleanstring.
    if ($hash{$key}==2 and
       !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
       if ($token->[4]!~/\/>$/) {
    $parser->get_tag('/'.$token->[1]);
    $hloc--;
       }
    }
    elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
          !($thisdist eq ' default ' and $hash{$key}!=2)) {
       if ($token->[4]!~/\/>$/) {
    $parser->get_tag('/'.$token->[1]);
    $hloc--;
       }
    }
    else {
       $cleanstring.=$token->[4];
    }
    if ($token->[4]=~/\/>$/) {
       $hloc--;
    }
       }
       if ($token->[0] eq 'E') {
    $cleanstring.=$token->[2];
    $hloc--;
       }
       if ($token->[0] eq 'T') {
    $cleanstring.=$token->[1];
       }
   }
   $cleanstring=&trim($cleanstring);
   $cleanstring=~s/\>\s*\n\s*\</\>\</g;
   
   # ---------------------------------------------------- Start final pass through
   
   # storage variables
   my $lpml;
   my $categories;
   my $category;
   my $category_att_name;
   my $category_att_type;
   my $chown;
   my $chmod;
   my $abbreviation; # space-free abbreviation; esp. for image names
   my $rpm;
   my $rpmSummary;
   my $rpmName;
   my $rpmVersion;
   my $rpmRelease;
   my $rpmVendor;
   my $rpmBuildRoot;
   my $rpmCopyright;
   my $rpmGroup;
   my $rpmSource;
   my $rpmAutoReqProv;
   my $rpmdescription;
   my $rpmpre;
   my $directories;
   my $directory;
   my $targetdirs;
   my $targetdir;
   my $categoryname;
   my $description;
   my $files;
   my $fileglobs;
   my $links;
   my $file;
   my $link;
   my $fileglob;
   my $sourcedir;
   my $targets;
   my $target;
   my $source;
   my $note;
   my $build;
   my $buildlink;
   my $commands;
   my $command;
   my $status;
   my $dependencies;
   my $dependency;
   my @links;
   my %categoryhash;
   
   my @buildall;
   my @buildinfo;
   
   my @configall;
   
   # Make new parser with distribution specific input
   undef $parser;
   $parser = HTML::TokeParser->new(\$cleanstring) or
       die('can\'t create TokeParser object');
   $parser->xml_mode('1');
   
   # Define handling methods for mode-dependent text rendering
   $parser->{textify}={
       targetroot => \&format_targetroot,
       sourceroot => \&format_sourceroot,
       categories => \&format_categories,
       category => \&format_category,
       abbreviation => \&format_abbreviation,
       targetdir => \&format_targetdir,
       chown => \&format_chown,
       chmod => \&format_chmod,
       rpm => \&format_rpm,
       rpmSummary => \&format_rpmSummary,
       rpmName => \&format_rpmName,
       rpmVersion => \&format_rpmVersion,
       rpmRelease => \&format_rpmRelease,
       rpmVendor => \&format_rpmVendor,
       rpmBuildRoot => \&format_rpmBuildRoot,
       rpmCopyright => \&format_rpmCopyright,
       rpmGroup => \&format_rpmGroup,
       rpmSource => \&format_rpmSource,
       rpmAutoReqProv => \&format_rpmAutoReqProv,
       rpmdescription => \&format_rpmdescription,
       rpmpre => \&format_rpmpre,
       directories => \&format_directories,
       directory => \&format_directory,
       categoryname => \&format_categoryname,
       description => \&format_description,
       files => \&format_files,
       file => \&format_file,
       fileglob => \&format_fileglob,
       links => \&format_links,
       link => \&format_link,
       linkto => \&format_linkto,
       source => \&format_source,
       target => \&format_target,
       note => \&format_note,
       build => \&format_build,
       status => \&format_status,
       dependencies => \&format_dependencies,
       buildlink => \&format_buildlink,
       glob => \&format_glob,
       sourcedir => \&format_sourcedir,
       filenames => \&format_filenames,
       };
   
   my $text;
   my $token;
   undef $hloc;
   undef @hierarchy;
   my $hloc;
   my @hierarchy2;
   while ($token = $parser->get_tag('lpml')) {
       &format_lpml(@{$token});
       $text = &trim($parser->get_text('/lpml'));
       $token = $parser->get_tag('/lpml');
       print $lpml; 
       print "\n";
   #    $text=~s/\s*\n\s*\n\s*/\n/g;
       print $text;
       print "\n";
       print &end();
   }
   exit;
   
   # ---------- Functions (most all just format contents of different markup tags)
   
   # ------------------------ Final output at end of markup parsing and formatting
   sub end {
       if ($mode eq 'html') {
    return "</body></html>\n";
       }
       if ($mode eq 'install') {
    return '';
       }
   }
   
   # ----------------------- Take in string to parse and the separation expression
   sub extract_array {
       my ($stringtoparse,$sepexp) = @_;
       my @a=split(/$sepexp/,$stringtoparse);
       return \@a;
   }
   
   # --------------------------------------------------------- Format lpml section
   sub format_lpml {
       my (@tokeninfo)=@_;
       my $date=`date`; chop $date;
       if ($mode eq 'html') {
    $lpml=<<END;
   <html>
   <head>
   <title>LPML Description Page
   (dist=$dist, categorytype=$categorytype, $date)</title>
   </head>
   <body>
   END
    $lpml .= "<br /><font size='+2'>LPML Description Page (dist=$dist, ".
       "categorytype=$categorytype, $date)".
       "</font>";
    $lpml .=<<END;
   <ul>
   <li><a href='#about'>About this file</a></li>
   <li><a href='#ownperms'>File Type Ownership and Permissions
   Descriptions</a></li>
   <li><a href='#package'>Software Package Description</a></li>
   <li><a href='#directories'>Directory Structure</a></li>
   <li><a href='#files'>File and Directory Structure</a></li>
   </ul>
   END
           $lpml .=<<END;
   <br />&nbsp;<br /><a name='about' />
   <font size='+2'>About this file</font>
   <p>
   This file is generated dynamically by <tt>lpml_parse.pl</tt> as
   part of a development compilation process.  Author: Scott
   Harrison (harris41\@msu.edu).
   </p>
   END
       }
       elsif ($mode eq 'text') {
    $lpml = "LPML Description Page (dist=$dist, $date)";
    $lpml .=<<END;
   
   * About this file
   * Software Package Description
   * Directory Structure
   * File Type Ownership and Permissions
   * File and Directory Structure
   END
           $lpml .=<<END;
   
   About this file
   
   This file is generated dynamically by lpml_parse.pl as
   part of a development compilation process.  Author: Scott
   Harrison (harris41\@msu.edu).
   
   END
       }
       elsif ($mode eq 'install') {
    print '# LPML install targets. Linux Packaging Markup Language,';
    print ' by Scott Harrison 2001'."\n";
    print '# This file was automatically generated on '.`date`;
    print "\n".$invocation;
    $lpml .= "SHELL=\"/bin/bash\"\n\n";
       }
       elsif ($mode eq 'configinstall') {
    print '# LPML configuration file targets (configinstall).'."\n";
    print '# Linux Packaging Markup Language,';
    print ' by Scott Harrison 2001'."\n";
    print '# This file was automatically generated on '.`date`;
    print "\n".$invocation;
    $lpml .= "SHELL=\"/bin/bash\"\n\n";
       }
       elsif ($mode eq 'build') {
    $lpml = "# LPML build targets. Linux Packaging Markup Language,";
    $lpml .= ' by Scott Harrison 2001'."\n";
    $lpml .= '# This file was automatically generated on '.`date`;
    $lpml .= "\n".$invocation;
    $lpml .= "SHELL=\"/bin/sh\"\n\n";
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format targetroot section
   sub format_targetroot {
       my $text=&trim($parser->get_text('/targetroot'));
       $text=$targetroot if $targetroot;
       $parser->get_tag('/targetroot');
       if ($mode eq 'html') {
    return $targetroot="\n<br />TARGETROOT: $text";
       }
       elsif ($mode eq 'install' or $mode eq 'build' or
      $mode eq 'configinstall') {
    return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n";
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format sourceroot section
   sub format_sourceroot {
       my $text=&trim($parser->get_text('/sourceroot'));
       $text=$sourceroot if $sourceroot;
       $parser->get_tag('/sourceroot');
       if ($mode eq 'html') {
    return $sourceroot="\n<br />SOURCEROOT: $text";
       }
       elsif ($mode eq 'install' or $mode eq 'build' or
      $mode eq 'configinstall') {
    return '# SOURCE CODE LOCATION IS "'.$sourceroot."\"\n";;
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format categories section
   sub format_categories {
       my $text=&trim($parser->get_text('/categories'));
       $parser->get_tag('/categories');
       if ($mode eq 'html') {
    return $categories="\n<br />&nbsp;<br />".
       "\n<a name='ownperms'>".
       "\n<font size='+2'>File Type Ownership and Permissions".
       " Descriptions</font>".
       "\n<p>This table shows what permissions and ownership settings ".
       "correspond to each category.</p>".
       "\n<table border='1' cellpadding='5' width='60%'>\n".
       "<tr>".
       "<th align='left' bgcolor='#ffffff'>Icon</th>".
       "<th align='left' bgcolor='#ffffff'>Category Name</th>".
       "<th align='left' bgcolor='#ffffff'>Permissions ".
       "($categorytype)</th>".
       "</tr>".
       "\n$text\n".
       "</table>\n";
       }
       elsif ($mode eq 'text') {
    return $categories="\n".
       "\nFile Type Ownership and Permissions".
       " Descriptions".
       "\n$text".
       "\n";
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format categories section
   sub format_category {
       my (@tokeninfo)=@_;
       $category_att_name=$tokeninfo[2]->{'name'};
       $category_att_type=$tokeninfo[2]->{'type'};
       $abbreviation=''; $chmod='';$chown='';
       $parser->get_text('/category');
       $parser->get_tag('/category');
       if ($mode eq 'html') {
    if ($category_att_type eq $categorytype) {
       return $category="<tr>".
    "<td><img src='$abbreviation.gif' ".
              "alt='${category_att_name}' /></td>\n".
    "<td>${category_att_name}</td>\n".
    "<td>$chmod $chown</td>\n".
    "</tr>".
    "\n";
   # return $category="\n<br />CATEGORY $category_att_name ".
   #    "$category_att_type $chmod $chown";
    }
       }
       else {
    if ($category_att_type eq $categorytype) {
       my ($user,$group)=split(/\:/,$chown);
       $categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
    ' -m '.$chmod;
    }
    return '';
       }
   }
   # --------------------------------------------------- Format categories section
   sub format_abbreviation {
       my @tokeninfo=@_;
       $abbreviation='';
       my $text=&trim($parser->get_text('/abbreviation'));
       if ($text) {
    $parser->get_tag('/abbreviation');
    $abbreviation=$text;
       }
       return '';
   }
   # -------------------------------------------------------- Format chown section
   sub format_chown {
       my @tokeninfo=@_;
       $chown='';
       my $text=&trim($parser->get_text('/chown'));
       if ($text) {
    $parser->get_tag('/chown');
    $chown=$text;
       }
       return '';
   }
   # -------------------------------------------------------- Format chmod section
   sub format_chmod {
       my @tokeninfo=@_;
       $chmod='';
       my $text=&trim($parser->get_text('/chmod'));
       if ($text) {
    $parser->get_tag('/chmod');
    $chmod=$text;
       }
       return '';
   }
   # ---------------------------------------------------------- Format rpm section
   sub format_rpm {
       my $text=&trim($parser->get_text('/rpm'));
       $parser->get_tag('/rpm');
       if ($mode eq 'html') {
    return $rpm=<<END;
   <br />&nbsp;<br />
   <a name='package' />
   <font size='+2'>Software Package Description</font>
   <p>
   <table bgcolor='#ffffff' border='0' cellpadding='10' cellspacing='0'>
   <tr><td><pre>
   $text
   </pre></td></tr>
   </table>
   END
       }
       elsif ($mode eq 'text') {
    return $rpm=<<END;
   Software Package Description
   
   $text
   END
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format rpmSummary section
   sub format_rpmSummary {
       my $text=&trim($parser->get_text('/rpmSummary'));
       $parser->get_tag('/rpmSummary');
       if ($mode eq 'html') {
    return $rpmSummary="\nSummary     : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmSummary="\nSummary     : $text";
       }
       else {
    return '';
       }
   }
   # ------------------------------------------------------ Format rpmName section
   sub format_rpmName {
       my $text=&trim($parser->get_text('/rpmName'));
       $parser->get_tag('/rpmName');
       if ($mode eq 'html') {
    return $rpmName="\nName        : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmName="\nName        : $text";
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format rpmVersion section
   sub format_rpmVersion {
       my $text=$parser->get_text('/rpmVersion');
       $parser->get_tag('/rpmVersion');
       if ($mode eq 'html') {
    return $rpmVersion="\nVersion     : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmVersion="\nVersion     : $text";
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------- Format rpmRelease section
   sub format_rpmRelease {
       my $text=$parser->get_text('/rpmRelease');
       $parser->get_tag('/rpmRelease');
       if ($mode eq 'html') {
    return $rpmRelease="\nRelease     : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmRelease="\nRelease     : $text";
       }
       else {
    return '';
       }
   }
   # ---------------------------------------------------- Format rpmVendor section
   sub format_rpmVendor {
       my $text=$parser->get_text('/rpmVendor');
       $parser->get_tag('/rpmVendor');
       if ($mode eq 'html') {
    return $rpmVendor="\nVendor      : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmVendor="\nVendor      : $text";
       }
       else {
    return '';
       }
   }
   # ------------------------------------------------- Format rpmBuildRoot section
   sub format_rpmBuildRoot {
       my $text=$parser->get_text('/rpmBuildRoot');
       $parser->get_tag('/rpmBuildRoot');
       if ($mode eq 'html') {
    return $rpmBuildRoot="\nBuild Root  : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmBuildRoot="\nBuild Root  : $text";
       }
       else {
    return '';
       }
   }
   # ------------------------------------------------- Format rpmCopyright section
   sub format_rpmCopyright {
       my $text=$parser->get_text('/rpmCopyright');
       $parser->get_tag('/rpmCopyright');
       if ($mode eq 'html') {
    return $rpmCopyright="\nLicense     : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmCopyright="\nLicense     : $text";
       }
       else {
    return '';
       }
   }
   # ----------------------------------------------------- Format rpmGroup section
   sub format_rpmGroup {
       my $text=$parser->get_text('/rpmGroup');
       $parser->get_tag('/rpmGroup');
       if ($mode eq 'html') {
    return $rpmGroup="\nGroup       : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmGroup="\nGroup       : $text";
       }
       else {
    return '';
       }
   }
   # ---------------------------------------------------- Format rpmSource section
   sub format_rpmSource {
       my $text=$parser->get_text('/rpmSource');
       $parser->get_tag('/rpmSource');
       if ($mode eq 'html') {
    return $rpmSource="\nSource      : $text";
       }
       elsif ($mode eq 'text') {
    return $rpmSource="\nSource      : $text";
       }
       else {
    return '';
       }
   }
   # ----------------------------------------------- Format rpmAutoReqProv section
   sub format_rpmAutoReqProv {
       my $text=$parser->get_text('/rpmAutoReqProv');
       $parser->get_tag('/rpmAutoReqProv');
       if ($mode eq 'html') {
    return $rpmAutoReqProv="\nAutoReqProv : $text";
       }
       if ($mode eq 'text') {
    return $rpmAutoReqProv="\nAutoReqProv : $text";
       }
       else {
    return '';
       }
   }
   # ----------------------------------------------- Format rpmdescription section
   sub format_rpmdescription {
       my $text=$parser->get_text('/rpmdescription');
       $parser->get_tag('/rpmdescription');
       if ($mode eq 'html') {
    $text=~s/\n//g;
    $text=~s/\\n/\n/g;
    return $rpmdescription="\nDescription : $text";
       }
       elsif ($mode eq 'text') {
    $text=~s/\n//g;
    $text=~s/\\n/\n/g;
    return $rpmdescription="\nDescription : $text";
       }
       else {
    return '';
       }
   }
   # ------------------------------------------------------- Format rpmpre section
   sub format_rpmpre {
       my $text=$parser->get_text('/rpmpre');
       $parser->get_tag('/rpmpre');
       if ($mode eq 'html') {
   # return $rpmpre="\n<br />RPMPRE $text";
    return '';
       }
       else {
    return '';
       }
   }
   # -------------------------------------------------- Format directories section
   sub format_directories {
       my $text=$parser->get_text('/directories');
       $parser->get_tag('/directories');
       if ($mode eq 'html') {
    return $directories="\n<br />&nbsp;<br />".
       "<a name='directories' />".
       "<font size='+2'>Directory Structure</font>".
       "\n$text\n<br />".
       "\n";
       }
       elsif ($mode eq 'text') {
    return $directories="\nDirectory Structure\n$text\n".
       "\n";
       }
       elsif ($mode eq 'install') {
    return "\n".'directories:'."\n".$text;
      }
       else {
    return '';
       }
   }
   # ---------------------------------------------------- Format directory section
   sub format_directory {
       my (@tokeninfo)=@_;
       $targetdir='';$categoryname='';$description='';
       $parser->get_text('/directory');
       $parser->get_tag('/directory');
       if ($mode eq 'html') {
    return $directory="\n<br />DIRECTORY $targetdir $categoryname ".
       "$description";
       }
       elsif ($mode eq 'install') {
    return "\t".'install '.$categoryhash{$categoryname}.' -d '.
       $targetroot.'/'.$targetdir."\n";
       }
       else {
    return '';
       }
   }
   # ---------------------------------------------------- Format targetdir section
   sub format_targetdir {
       my @tokeninfo=@_;
       $targetdir='';
       my $text=&trim($parser->get_text('/targetdir'));
       if ($text) {
    $parser->get_tag('/targetdir');
    $targetdir=$text;
       }
       return '';
   }
   # ------------------------------------------------- Format categoryname section
   sub format_categoryname {
       my @tokeninfo=@_;
       $categoryname='';
       my $text=&trim($parser->get_text('/categoryname'));
       if ($text) {
    $parser->get_tag('/categoryname');
    $categoryname=$text;
       }
       return '';
   }
   # -------------------------------------------------- Format description section
   sub format_description {
       my @tokeninfo=@_;
       $description='';
       my $text=&htmlsafe(&trim($parser->get_text('/description')));
       if ($text) {
    $parser->get_tag('/description');
    $description=$text;
       }
       return '';
   }
   # -------------------------------------------------------- Format files section
   sub format_files {
       my $text=$parser->get_text('/files');
       $parser->get_tag('/files');
       if ($mode eq 'html') {
    return $directories="\n<br />&nbsp;<br />".
       "<a name='files' />".
       "<font size='+2'>File and Directory Structure</font>".
       "\n$text\n<br />".
       "\n";
       }
       elsif ($mode eq 'text') {
    return $directories="\n".
       "File and Directory Structure".
       "\n$text\n".
       "\n";
       }
       elsif ($mode eq 'install') {
    return "\n".'files:'."\n".$text.
       "\n".'links:'."\n".join('',@links);
       }
       elsif ($mode eq 'configinstall') {
    return "\n".'configfiles: '.
    join(' ',@configall).
    "\n\n".$text.
    "\n\nalwaysrun:\n\n";
       }
       elsif ($mode eq 'build') {
    my $binfo;
    my $tword;
    my $command2;
    my @deps;
    foreach my $bi (@buildinfo) {
       my ($target,$source,$command,$trigger,@deps)=split(/\;/,$bi);
       $tword=''; $tword=' alwaysrun' if $trigger eq 'always run'; 
       $command=~s/\/([^\/]*)$//;
       $command2="cd $command; sh ./$1;\\";
       my $depstring;
       my $depstring2="\t\t\@echo '';\\\n";
       my $olddep;
       foreach my $dep (@deps) {
    unless ($olddep) {
       $olddep=$deps[$#deps];
    }
    $depstring.="\telif !(test -r $command/$dep);\\\n";
    $depstring.="\t\tthen echo ".
    "\"**** WARNING **** missing the file: ".
            "$command/$dep\"$logcmd;\\\n";
    $depstring.="\t\ttest -e $source || test -e $target || echo ".
       "'**** ERROR **** neither source=$source nor target=".
       "$target exist and they cannot be built'$logcmd;\\\n";
    $depstring.="\t\tmake -f Makefile.build ${source}___DEPS;\\\n";
    if ($olddep) {
       $depstring2.="\t\tECODE=0;\\\n";
       $depstring2.="\t\t! test -e $source && test -r $command/$olddep &&".
    " { perl filecompare.pl -b2 $command/$olddep $target ||  ECODE=\$\$?; } && { [ \$\$ECODE != \"2\" ] || echo \"**** WARNING **** dependency $command/$olddep is newer than target file $target; SOMETHING MAY BE WRONG\"$logcmd; };\\\n";
    }
    $olddep=$dep;
       }
       $binfo.="$source: $tword\n".
    "\t\@if !(echo \"\");\\\n\t\tthen echo ".
    "\"**** WARNING **** Strange shell. ".
            "Check your path settings.\"$logcmd;\\\n".
    $depstring.
    "\telse \\\n\t\t$command2\n\tfi\n\n";
       $binfo.="${source}___DEPS:\n".$depstring2."\t\tECODE=0;\n\n";
    }
    return 'all: '.join(' ',@buildall)."\n\n".
            $text.
    $binfo."\n".
    "alwaysrun:\n\n";
       }
       else {
    return '';
       }
   }
   # ---------------------------------------------------- Format fileglobs section
   sub format_fileglobs {
   
   }
   # -------------------------------------------------------- Format links section
   # deprecated.. currently <link></link>'s are included in <files></files>
   sub format_links {
       my $text=$parser->get_text('/links');
       $parser->get_tag('/links');
       if ($mode eq 'html') {
    return $links="\n<br />BEGIN LINKS\n$text\n<br />END LINKS\n";
       }
       elsif ($mode eq 'install') {
    return "\n".'links:'."\n\t".$text;
       }
       else {
    return '';
       }
   }
   # --------------------------------------------------------- Format file section
   sub format_file {
       my @tokeninfo=@_;
       $file=''; $source=''; $target=''; $categoryname=''; $description='';
       $note=''; $build=''; $status=''; $dependencies='';
       my $text=&trim($parser->get_text('/file'));
       my $buildtest;
       if ($source) {
    $parser->get_tag('/file');
    if ($mode eq 'html') {
       return ($file="\n<br />BEGIN FILE\n".
    "$source $target $categoryname $description $note " .
    "$build $status $dependencies" .
    "\nEND FILE");
    }
    elsif ($mode eq 'install' && $categoryname ne 'conf') {
       if ($build) {
    my $bi=$sourceroot.'/'.$source.';'.$build.';'.
       $dependencies;
    my ($source2,$command,$trigger,@deps)=split(/\;/,$bi);
    $tword=''; $tword=' alwaysrun' if $trigger eq 'always run'; 
    $command=~s/\/([^\/]*)$//;
    $command2="cd $command; sh ./$1;\\";
    my $depstring;
    foreach my $dep (@deps) {
       $depstring.=<<END;
    ECODE=0; DEP=''; \\
    test -e $command/$dep || (echo '**** WARNING **** cannot evaluate status of dependency $command/$dep (for building ${sourceroot}/${source} with)'$logcmd); DEP="1"; \\
    [ -n DEP ] && { perl filecompare.pl -b2 $command/$dep ${targetroot}/${target} || ECODE=\$\$?; } || DEP="1"; \\
    case "\$\$ECODE" in \\
    2) echo "**** WARNING **** dependency $command/$dep is newer than target file ${targetroot}/${target}; you may want to run make build"$logcmd;; \\
    esac; \\
   END
    }
                   chomp $depstring;
    $buildtest=<<END;
    \@if !(test -e "${sourceroot}/${source}") && !(test -e "${targetroot}/${target}"); then \\
    echo "**** ERROR **** ${sourceroot}/${source} is missing and is also not present at target location ${targetroot}/${target}; you must run make build"$logcmd; exit; \\
   END
                   $buildtest.=<<END if $depstring;
    elif !(test -e "${sourceroot}/${source}"); then \\
   $depstring
   END
                   $buildtest.=<<END;
    fi
   END
       }
               my $bflag='-b1';
               $bflag='-b3' if $dependencies or $buildlink;
       return <<END;
   $buildtest \@if !(test -e "${sourceroot}/${source}") && !(test -e "${targetroot}/${target}"); then \\
    echo "**** ERROR **** CVS source file does not exist: ${sourceroot}/${source} and neither does target: ${targetroot}/${target}"$logcmd; \\
    elif !(test -e "${sourceroot}/${source}"); then \\
    echo "**** WARNING **** CVS source file does not exist: ${sourceroot}/${source}"$logcmd; \\
    perl verifymodown.pl ${targetroot}/${target} "$categoryhash{$categoryname}"$logcmd; \\
    else \\
    ECODE=0; \\
    perl filecompare.pl $bflag ${sourceroot}/${source} ${targetroot}/${target} || ECODE=\$\$?; \\
    case "\$\$ECODE" in \\
    1) echo "${targetroot}/${target} is unchanged";; \\
    2) echo "**** WARNING **** target file ${targetroot}/${target} is newer than CVS source; saving current (old) target file to ${targetroot}/${target}.lpmlsave and then overwriting"$logcmd && install -o www -g www -m 0600 ${targetroot}/${target} ${targetroot}/${target}.lpmlsave && install $categoryhash{$categoryname} ${sourceroot}/${source} ${targetroot}/${target};; \\
    0) echo "install $categoryhash{$categorname} ${sourceroot}/${source} ${targetroot}/${target}" && install $categoryhash{$categoryname} ${sourceroot}/${source} ${targetroot}/${target};; \\
    esac; \\
    perl verifymodown.pl ${targetroot}/${target} "$categoryhash{$categoryname}"$logcmd; \\
    fi
   END
   #    return "\t".'@test -e '.$sourceroot.'/'.$source.
   # ' && perl filecompare.pl -b '.$sourceroot.'/'.$source.' '.
   # $targetroot.'/'.$target.
   # ' && install '.
   # $categoryhash{$categoryname}.' '.
   # $sourceroot.'/'.$source.' '.
   # $targetroot.'/'.$target.
   # ' || echo "**** WARNING '.
   # '**** CVS source file does not exist: '.$sourceroot.'/'.
   # $source.'"'."\n";
    }
    elsif ($mode eq 'configinstall' && $categoryname eq 'conf') {
       push @configall,$targetroot.'/'.$target;
       return $targetroot.'/'.$target.': alwaysrun'."\n".
    "\t".'@echo -n ""; ECODE=0 && { perl filecompare.pl -b4 '.
    $sourceroot.'/'.$source.' '.$targetroot.'/'.$target.
    ' || ECODE=$$?; } && '.
    '{ [ $$ECODE != "2" ] || (install '.
                   $categoryhash{$categoryname}.' '.
    $sourceroot.'/'.$source.' '.
    $targetroot.'/'.$target.'.lpmlnew'.
    ' && echo "**** NOTE: CONFIGURATION FILE CHANGE ****"'.
    $logcmd.' && echo "'.
    'You likely need to compare contents of '.
    ''.$targetroot.'/'.$target.' with the new '.
                   ''.$targetroot.'/'.$target.'.lpmlnew"'.
    "$logcmd); } && ".
    '{ [ $$ECODE != "3" ] || (install '.
                   $categoryhash{$categoryname}.' '.
    $sourceroot.'/'.$source.' '.
    $targetroot.'/'.$target.''.
    ' && echo "**** WARNING: NEW CONFIGURATION FILE ADDED ****"'.
    $logcmd.' && echo "'.
    'You likely need to review the contents of '.
    ''.$targetroot.'/'.$target.' to make sure its '.
                   'settings are compatible with your overall system"'.
    "$logcmd); } && ".
    '{ [ $$ECODE != "1" ] || ('.
    'echo "**** ERROR ****"'.
    $logcmd.' && echo "'.
    'Configuration source file does not exist '.
    ''.$sourceroot.'/'.$source.'"'.
    "$logcmd); } && perl verifymodown.pl ${targetroot}/${target} \"$categoryhash{$categoryname}\"$logcmd;\n\n";
    }
    elsif ($mode eq 'build' && $build) {
       push @buildall,$sourceroot.'/'.$source;
       push @buildinfo,$targetroot.'/'.$target.';'.$sourceroot.'/'.
    $source.';'.$build.';'.
    $dependencies;
   #    return '# need to build '.$source.";
    }
    else {
       return '';
    }
       }
       return '';
   }
   # --------------------------------------------------------- Format link section
   sub format_link {
       my @tokeninfo=@_;
       $link=''; $linkto=''; $target=''; $categoryname=''; $description='';
       $note=''; $build=''; $status=''; $dependencies='';
       my $text=&trim($parser->get_text('/link'));
       if ($linkto) {
    $parser->get_tag('/link');
    if ($mode eq 'html') {
       return $link="\n<br />BEGIN LINK\n".
    "$linkto $target $categoryname $description $note " .
    "$build $status $dependencies" .
       "\nEND LINK";
    }
    elsif ($mode eq 'install') {
       my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target);
       foreach my $tgt (@targets) {
    push @links,"\t".'ln -fs /'.$linkto.' /'.$targetroot.$tgt.
       "\n";
       }
       return '';
    }
    else {
       return '';
    }
       }
       return '';
   }
   # ----------------------------------------------------- Format fileglob section
   sub format_fileglob {
       my @tokeninfo=@_;
       $fileglob=''; $glob=''; $sourcedir='';
       $targetdir=''; $categoryname=''; $description='';
       $note=''; $build=''; $status=''; $dependencies='';
       $filenames='';
       my $text=&trim($parser->get_text('/fileglob'));
       if ($sourcedir) {
    $parser->get_tag('/fileglob');
    if ($mode eq 'html') {
       return $fileglob="\n<br />BEGIN FILEGLOB\n".
    "$glob sourcedir $targetdir $categoryname $description $note ".
    "$build $status $dependencies $filenames" .
       "\nEND FILEGLOB";
    }
    elsif ($mode eq 'install') {
       return "\t".'install '.
    $categoryhash{$categoryname}.' '.
    $sourceroot.'/'.$sourcedir.'[^C][^V][^S]'.$glob.' '.
    $targetroot.'/'.$targetdir.'.'."\n";
    }
    else {
       return '';
    }
       }
       return '';
   }
   # ---------------------------------------------------- Format sourcedir section
   sub format_sourcedir {
       my @tokeninfo=@_;
       $sourcedir='';
       my $text=&trim($parser->get_text('/sourcedir'));
       if ($text) {
    $parser->get_tag('/sourcedir');
    $sourcedir=$text;
       }
       return '';
   }
   # ------------------------------------------------------- Format target section
   sub format_target {
       my @tokeninfo=@_;
       $target='';
       my $text=&trim($parser->get_text('/target'));
       if ($text) {
    $parser->get_tag('/target');
    $target=$text;
       }
       return '';
   }
   # ------------------------------------------------------- Format source section
   sub format_source {
       my @tokeninfo=@_;
       $source='';
       my $text=&trim($parser->get_text('/source'));
       if ($text) {
    $parser->get_tag('/source');
    $source=$text;
       }
       return '';
   }
   # --------------------------------------------------------- Format note section
   sub format_note {
       my @tokeninfo=@_;
       $note='';
       my $text=&trim($parser->get_text('/note'));
       if ($text) {
    $parser->get_tag('/note');
    $note=$text;
       }
       return '';
   
 # -------------------------- Parse for an input string and specific target mode  
 sub xmlparse {  
     my ($parsestring,$target,$dist)=@_;  
     my $outtext = '';  
     my $parser = HTML::TokeParser->new($parsestring);  
     $parser->xml_mode('1');  
     # strategy: grab first and pass well-parsed information to info-handler subroutines  
     # There should be errors if bad file format.  
     # Unlike other XML-handling strategies we use, this script should  
     # terminate if the XML is bad.  
     # grab first (and only) lpml section  
     # grab target(s) root  
     # grab source root  
     # grab categories  
        # foreach category  
        # attributes: name and type  
        # grab chown  
        # grab chmod  
          # parse user name and group name  
     # grab rpm (remember to replace \n with real new lines)  
        # grab rpmSummary  
        # grab rpmName  
        # grab rpmVersion  
        # grab rpmRelease  
        # grab rpmVendor  
        # grab rpmBuildRoot  
        # grab rpmCopyright  
        # grab rpmGroup  
        # grab rpmSource  
        # grab rpmAutoReqProv  
        # grab rpmdescription  
        # grab rpmpre  
     # grab directories  
        # foreach directory  
        # grab targetdir(s)  
        # grab categoryname  
        # grab (optional) description  
     # grab files  
        # foreach file|link|link|fileglob  
        # grab source  
        # grab target(s)  
        # grab categoryname  
        # grab description  
        # grab note  
 }  
   
 __END__  
   
 while (my $token = $p->get_tag("category")) {  
     my $url = $token->[1]{name} . $token->[1]{type};  
     my $chmodtoken=$p->get_tag("chmod");  
     my $text = $p->get_trimmed_text("/chmod");  
     print "CHMOD: $text\n";  
     my $text = $p->get_trimmed_text("/category");  
     print "$url\t$text\t".join(" ",@{$token->[2]})."\n";  
 }  }
   # -------------------------------------------------------- Format build section
   sub format_build {
       my @tokeninfo=@_;
       $build='';
       my $text=&trim($parser->get_text('/build'));
       if ($text) {
    $parser->get_tag('/build');
    $build=$sourceroot.'/'.$text.';'.$tokeninfo[2]{'trigger'};
       }
       return '';
   }
   # -------------------------------------------------------- Format build section
   sub format_buildlink {
       my @tokeninfo=@_;
       $buildlink='';
       my $text=&trim($parser->get_text('/buildlink'));
       if ($text) {
    $parser->get_tag('/buildlink');
    $buildlink=$sourceroot.'/'.$text;
       }
       return '';
   }
   # ------------------------------------------------------- Format status section
   sub format_status {
       my @tokeninfo=@_;
       $status='';
       my $text=&trim($parser->get_text('/status'));
       if ($text) {
    $parser->get_tag('/status');
    $status=$text;
       }
       return '';
   }
   # ------------------------------------------------- Format dependencies section
   sub format_dependencies {
       my @tokeninfo=@_;
       $dependencies='';
       my $text=&trim($parser->get_text('/dependencies'));
       if ($text) {
    $parser->get_tag('/dependencies');
    $dependencies=join(';',
         (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
       }
       return '';
   }
   # --------------------------------------------------------- Format glob section
   sub format_glob {
       my @tokeninfo=@_;
       $glob='';
       my $text=&trim($parser->get_text('/glob'));
       if ($text) {
    $parser->get_tag('/glob');
    $glob=$text;
       }
       return '';
   }
   # ---------------------------------------------------- Format filenames section
   sub format_filenames {
       my @tokeninfo=@_;
       my $text=&trim($parser->get_text('/filenames'));
       if ($text) {
    $parser->get_tag('/filenames');
    $filenames=$text;
       }
       return '';
   }
   # ------------------------------------------------------- Format linkto section
   sub format_linkto {
       my @tokeninfo=@_;
       my $text=&trim($parser->get_text('/linkto'));
       if ($text) {
    $parser->get_tag('/linkto');
    $linkto=$text;
       }
       return '';
   }
   # ------------------------------------- Render less-than and greater-than signs
   sub htmlsafe {
       my $text=@_[0];
       $text =~ s/</&lt;/g;
       $text =~ s/>/&gt;/g;
       return $text;
   }
   # --------------------------------------- remove starting and ending whitespace
   sub trim {
       my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
   } 
   
   # ----------------------------------- POD (plain old documentation, CPAN style)
   
   =head1 NAME
   
   lpml_parse.pl - This is meant to parse files meeting the lpml document type.
   See lpml.dtd.  LPML=Linux Packaging Markup Language.
   
   =head1 SYNOPSIS
   
   Usage is for lpml file to come in through standard input.
   
   =over 4
   
   =item *
   
   1st argument is the mode of parsing.
   
   =item * 
   
   2nd argument is the category permissions to use (runtime or development)
   
   =item *
   
   3rd argument is the distribution
   (default,redhat6.2,debian2.2,redhat7.1,etc).
   
   =item *
   
   4th argument is to manually specify a sourceroot.
   
   =item *
   
   5th argument is to manually specify a targetroot.
   
   =back
   
   Only the 1st argument is mandatory for the program to run.
   
   Example:
   
   cat ../../doc/loncapafiles.lpml |\\
   perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
   
   =head1 DESCRIPTION
   
   I am using a multiple pass-through approach to parsing
   the lpml file.  This saves memory and makes sure the server
   will never be overloaded.
   
   =head1 README
   
   I am using a multiple pass-through approach to parsing
   the lpml file.  This saves memory and makes sure the server
   will never be overloaded.
   
   =head1 PREREQUISITES
   
   HTML::TokeParser
   
   =head1 COREQUISITES
   
   =head1 OSNAMES
   
   linux
   
   =head1 SCRIPT CATEGORIES
   
   Packaging/Administrative
   
   =cut

Removed from v.1.2  
changed lines
  Added in v.1.25


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>