--- loncom/build/lpml_parse.pl 2001/06/20 12:32:54 1.2 +++ loncom/build/lpml_parse.pl 2002/04/21 23:36:17 1.46 @@ -1,74 +1,1724 @@ #!/usr/bin/perl -# Scott Harrison +# -------------------------------------------------------- Documentation notice +# Run "perldoc ./lpml_parse.pl" in order to best view the software +# documentation internalized in this program. + +# --------------------------------------------------------- Distribution notice +# This script is distributed with the LPML software project available at +# http://lpml.sourceforge.net + +# --------------------------------------------------------- License Information +# The LearningOnline Network with CAPA +# lpml_parse.pl - Linux Packaging Markup Language parser +# +# $Id: lpml_parse.pl,v 1.46 2002/04/21 23:36:17 harris41 Exp $ +# +# Written by Scott Harrison, codeharrison@yahoo.com +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# +# YEAR=2001 # May 2001 -# 06/19/2001 - Scott Harrison +# 06/19/2001,06/20,06/24 - Scott Harrison +# 9/5/2001,9/6,9/7,9/8 - Scott Harrison +# 9/17,9/18 - Scott Harrison +# 11/4,11/5,11/6,11/7,11/16,11/17 - Scott Harrison +# 12/2,12/3,12/4,12/5,12/6,12/13,12/19,12/29 - Scott Harrison +# YEAR=2002 +# 1/8,1/9,1/29,1/31,2/5,3/21,4/8,4/12 - Scott Harrison +# +### + +############################################################################### +## ## +## ORGANIZATION OF THIS PERL SCRIPT ## +## 1. Notes ## +## 2. Get command line arguments ## +## 3. First pass through (grab distribution-specific information) ## +## 4. Second pass through (parse out what is not necessary) ## +## 5. Third pass through (translate markup according to specified mode) ## +## 6. Functions (most all just format contents of different markup tags) ## +## 7. POD (plain old documentation, CPAN style) ## +## ## +############################################################################### + +# ----------------------------------------------------------------------- Notes +# +# I am using a multiple pass-through approach to parsing +# the lpml file. This saves memory and makes sure the server +# will never be overloaded. +# +# This is meant to parse files meeting the lpml document type. +# See lpml.dtd. LPML=Linux Packaging Markup Language. use HTML::TokeParser; -my $target = shift @ARGV; -my $dist = shift @ARGV; +my $usage=<){} # throw away the input to avoid broken pipes + print $usage; + exit -1; # exit with error status +} + +my $categorytype=''; +if (@ARGV) { + $categorytype = shift @ARGV; +} + +my $dist=''; +if (@ARGV) { + $dist = shift @ARGV; +} + +my $targetroot=''; +my $sourceroot=''; +my $targetrootarg=''; +my $sourcerootarg=''; +if (@ARGV) { + $sourceroot = shift @ARGV; +} +if (@ARGV) { + $targetroot = shift @ARGV; +} +$sourceroot=~s/\/$//; # remove trailing directory slash +$targetroot=~s/\/$//; # remove trailing directory slash +$sourcerootarg=$sourceroot; +$targetrootarg=$targetroot; + +my $logcmd='| tee -a WARNINGS'; + +my $invocation; # Record how the program was invoked +# --------------------------------------------------- Record program invocation +if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { + $invocation=(<; +my $parsestring=join('',@parsecontents); + +# Need to make a pass through and figure out what defaults are +# overrided. Top-down overriding strategy (tree leaves don't know +# about distant tree leaves). + +my @hierarchy; +$hierarchy[0]=0; +my $hloc=0; +my $token; +$parser = HTML::TokeParser->new(\$parsestring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); +my %setting; + +# Values for the %setting hash +my $defaultset=1; # a default setting exists for a key +my $distset=2; # a distribution setting exists for a key + # (overrides default setting) + +my $key=''; # this is a unique key identifier (the token name with its + # coordinates inside the hierarchy) +while ($token = $parser->get_token()) { # navigate through $parsestring + if ($token->[0] eq 'S') { + $hloc++; + $hierarchy[$hloc]++; + $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); + my $thisdist=' '.$token->[2]{'dist'}.' '; + if ($thisdist eq ' default ') { + $setting{$key}=$defaultset; + } + elsif (length($dist)>0 && + $setting{$key}==$defaultset && + $thisdist=~/\s$dist\s/) { + $setting{$key}=$distset; + # disregard default setting for this key if + # there is a directly requested distribution match + # (in other words, there must first be a default + # setting for a key in order for it to be overridden) + } + } + if ($token->[0] eq 'E') { + $hloc--; + } +} + +# - Start second pass through (clean up the string to allow for easy rendering) + +# The string is cleaned up so that there is no white-space surrounding any +# XML tag. White-space inside text 'T' elements is preserved. + +# Clear up memory +undef($hloc); +undef(@hierarchy); +undef($parser); +$hierarchy[0]=0; # initialize hierarchy +$parser = HTML::TokeParser->new(\$parsestring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); +my $cleanstring; # contains the output of the second step +while ($token = $parser->get_token()) { # navigate through $parsestring + if ($token->[0] eq 'S') { # a start tag + $hloc++; + $hierarchy[$hloc]++; + $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); + + # Surround tagdist (the dist attribute of an XML tag) + # with white-space to allow for uniform searching a few + # lines below here. + my $tagdist=' '.$token->[2]{'dist'}.' '; + + # This conditional clause is set up to ignore two sets + # of invalid conditions before accepting entry into + # $cleanstring. + + # Condition #1: Ignore this part of the string if the tag + # has a superior distribution-specific setting and the tag + # being evaluated has a dist setting something other than + # blank or $dist. + if ($setting{$key}==$distset and + !($tagdist eq ' ' or $tagdist =~/\s$dist\s/)) { + if ($token->[4]!~/\/>$/) { + $parser->get_tag('/'.$token->[1]); + $hloc--; + } + } + # Condition #2: Ignore this part of the string if the tag has + # is not blank and does not equal dist and + # either does not equal default or it has a prior $dist-specific + # setting. + elsif ($tagdist ne ' ' and $tagdist!~/\s$dist\s/ and + !($tagdist eq ' default ' and $setting{$key}!=$distset)) { + if ($token->[4]!~/\/>$/) { + $parser->get_tag('/'.$token->[1]); + $hloc--; + } + } + # In other words, output to $cleanstring if the tag is dist=default + # or if the tag is set to dist=$dist for the first time. And, always + # output when dist='' is not present. + else { + $cleanstring.=$token->[4]; + } + } + # Note: this loop DOES work with style markup as well as + # style markup since I always check for $token->[4] ending + # with "/>". + if ($token->[0] eq 'E') { # an end tag + $cleanstring.=$token->[2]; + $hloc--; + } + if ($token->[0] eq 'T') { # text contents inside tags + $cleanstring.=$token->[1]; + } +} +$cleanstring=&trim($cleanstring); +$cleanstring=~s/\>\s*\n\s*\\new(\$cleanstring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); + +# Define handling methods for mode-dependent text rendering + +$parser->{textify}={ + specialnotices => \&format_specialnotices, + specialnotice => \&format_specialnotice, + targetroot => \&format_targetroot, + sourceroot => \&format_sourceroot, + categories => \&format_categories, + category => \&format_category, + abbreviation => \&format_abbreviation, + targetdir => \&format_targetdir, + chown => \&format_chown, + chmod => \&format_chmod, + rpm => \&format_rpm, + rpmSummary => \&format_rpmSummary, + rpmName => \&format_rpmName, + rpmVersion => \&format_rpmVersion, + rpmRelease => \&format_rpmRelease, + rpmVendor => \&format_rpmVendor, + rpmBuildRoot => \&format_rpmBuildRoot, + rpmCopyright => \&format_rpmCopyright, + rpmGroup => \&format_rpmGroup, + rpmSource => \&format_rpmSource, + rpmAutoReqProv => \&format_rpmAutoReqProv, + rpmdescription => \&format_rpmdescription, + rpmpre => \&format_rpmpre, + rpmRequires => \&format_rpmRequires, + directories => \&format_directories, + directory => \&format_directory, + categoryname => \&format_categoryname, + description => \&format_description, + files => \&format_files, + file => \&format_file, + fileglob => \&format_fileglob, + links => \&format_links, + link => \&format_link, + linkto => \&format_linkto, + source => \&format_source, + target => \&format_target, + note => \&format_note, + build => \&format_build, + status => \&format_status, + dependencies => \&format_dependencies, + buildlink => \&format_buildlink, + glob => \&format_glob, + sourcedir => \&format_sourcedir, + filenames => \&format_filenames, + }; + +my $text; +my $token; +undef $hloc; +undef @hierarchy; +my $hloc; +my @hierarchy2; +while ($token = $parser->get_tag('lpml')) { + &format_lpml(@{$token}); + $text = &trim($parser->get_text('/lpml')); + $token = $parser->get_tag('/lpml'); + print $lpml; + print "\n"; +# $text=~s/\s*\n\s*\n\s*/\n/g; + print $text; + print "\n"; + print &end(); +} +exit; + +# ---------- Functions (most all just format contents of different markup tags) + +# ------------------------ Final output at end of markup parsing and formatting +sub end { + if ($mode eq 'html') { + return "
 
". + "Summary of Source Repository". + "". + "
 
". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "". + "
Files, Directories, and Symbolic Links
Files (not referenced by globs)$file_count
Files (referenced by globs)$fileglobnames_count
Total Files".($fileglobnames_count+$file_count)."
File globs".$fileglob_count."
Directories".$directory_count."
Symbolic links".$link_count."
". + "". + "". + "". + "". + "". + join("\n",(map {"". + "". + ""} + @categorynamelist)). + "
File Category Count
IconNameNumber of OccurrencesNumber of Incorrect Counts
$_$categorycount{$_}
". + "\n"; + + } + if ($mode eq 'install') { + return ''; + } +} + +# ----------------------- Take in string to parse and the separation expression +sub extract_array { + my ($stringtoparse,$sepexp) = @_; + my @a=split(/$sepexp/,$stringtoparse); + return \@a; +} + +# --------------------------------------------------------- Format lpml section +sub format_lpml { + my (@tokeninfo)=@_; + my $date=`date`; chop $date; + if ($mode eq 'html') { + $lpml=< + +LPML Description Page +(dist=$dist, categorytype=$categorytype, $date) + + +END + $lpml .= "
LPML Description Page (dist=$dist, ". + "categorytype=$categorytype, $date)". + ""; + $lpml .=< +
  • About this file
  • +
  • File Type Ownership and Permissions +Descriptions
  • +
  • Software Package Description
  • +
  • Directory Structure
  • +
  • Files
  • +
  • Summary of Source Repository
  • + +END + $lpml .=< 
    +About this file +

    +This file is generated dynamically by lpml_parse.pl as +part of a development compilation process.

    +

    LPML written by Scott Harrison (harris41\@msu.edu). +

    +END + } + elsif ($mode eq 'text') { + $lpml = "LPML Description Page (dist=$dist, $date)"; + $lpml .=<get_text('/targetroot')); + $text=$targetroot if $targetroot; + $parser->get_tag('/targetroot'); + if ($mode eq 'html') { + return $targetroot="\n
    TARGETROOT: $text"; + } + elsif ($mode eq 'install' or $mode eq 'build' or + $mode eq 'configinstall') { + return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format sourceroot section +sub format_sourceroot { + my $text=&trim($parser->get_text('/sourceroot')); + $text=$sourceroot if $sourceroot; + $parser->get_tag('/sourceroot'); + if ($mode eq 'html') { + return $sourceroot="\n
    SOURCEROOT: $text"; + } + elsif ($mode eq 'install' or $mode eq 'build' or + $mode eq 'configinstall') { + return '# SOURCE CODE LOCATION IS "'.$sourceroot."\"\n";; + } + else { + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_categories { + my $text=&trim($parser->get_text('/categories')); + $parser->get_tag('/categories'); + if ($mode eq 'html') { + return $categories="\n
     
    ". + "\n
    ". + "\nFile Type Ownership and Permissions". + " Descriptions". + "\n

    This table shows what permissions and ownership settings ". + "correspond to each category.

    ". + "\n\n". + "". + "". + "". + "". + "". + "\n$text\n". + "
    IconCategory NamePermissions ". + "($categorytype)
    \n"; + } + elsif ($mode eq 'text') { + return $categories="\n". + "\nFile Type Ownership and Permissions". + " Descriptions". + "\n$text". + "\n"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_category { + my (@tokeninfo)=@_; + $category_att_name=$tokeninfo[2]->{'name'}; + $category_att_type=$tokeninfo[2]->{'type'}; + $abbreviation=''; $chmod='';$chown=''; + $parser->get_text('/category'); + $parser->get_tag('/category'); + $fab{$category_att_name}=$abbreviation; + if ($mode eq 'html') { + if ($category_att_type eq $categorytype) { + push @categorynamelist,$category_att_name; + $categoryhash{$category_att_name}="$chmod $chown"; + return $category="". + "\n". + "${category_att_name}\n". + "$chmod $chown\n". + "". + "\n"; +# return $category="\n
    CATEGORY $category_att_name ". +# "$category_att_type $chmod $chown"; + } + } + else { + if ($category_att_type eq $categorytype) { + my ($user,$group)=split(/\:/,$chown); + $categoryhash{$category_att_name}='-o '.$user.' -g '.$group. + ' -m '.$chmod; + } + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_abbreviation { + my @tokeninfo=@_; + $abbreviation=''; + my $text=&trim($parser->get_text('/abbreviation')); + if ($text) { + $parser->get_tag('/abbreviation'); + $abbreviation=$text; + } + return ''; +} +# -------------------------------------------------------- Format chown section +sub format_chown { + my @tokeninfo=@_; + $chown=''; + my $text=&trim($parser->get_text('/chown')); + if ($text) { + $parser->get_tag('/chown'); + $chown=$text; + } + return ''; +} +# -------------------------------------------------------- Format chmod section +sub format_chmod { + my @tokeninfo=@_; + $chmod=''; + my $text=&trim($parser->get_text('/chmod')); + if ($text) { + $parser->get_tag('/chmod'); + $chmod=$text; + } + return ''; +} +# ---------------------------------------------------------- Format rpm section +sub format_rpm { + my $text=&trim($parser->get_text('/rpm')); + $parser->get_tag('/rpm'); + if ($mode eq 'html') { + return $rpm=< 
    +
    +Software Package Description +

    + + +
    +$text
    +
    +END + } + elsif ($mode eq 'make_rpm') { + return $text; + } + elsif ($mode eq 'text') { + return $rpm=<get_text('/rpmSummary')); + $parser->get_tag('/rpmSummary'); + if ($mode eq 'html') { + return $rpmSummary="\nSummary : $text"; + } + elsif ($mode eq 'text') { + return $rpmSummary="\nSummary : $text"; + } + elsif ($mode eq 'make_rpm') { + return <$text +END + } + else { + return ''; + } +} +# ------------------------------------------------------ Format rpmName section +sub format_rpmName { + my $text=&trim($parser->get_text('/rpmName')); + $parser->get_tag('/rpmName'); + if ($mode eq 'html') { + return $rpmName="\nName : $text"; + } + elsif ($mode eq 'text') { + return $rpmName="\nName : $text"; + } + elsif ($mode eq 'make_rpm') { + return <$text +END + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmVersion section +sub format_rpmVersion { + my $text=$parser->get_text('/rpmVersion'); + $parser->get_tag('/rpmVersion'); + if ($mode eq 'html') { + return $rpmVersion="\nVersion : $text"; + } + elsif ($mode eq 'text') { + return $rpmVersion="\nVersion : $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmRelease section +sub format_rpmRelease { + my $text=$parser->get_text('/rpmRelease'); + $parser->get_tag('/rpmRelease'); + if ($mode eq 'html') { + return $rpmRelease="\nRelease : $text"; + } + elsif ($mode eq 'text') { + return $rpmRelease="\nRelease : $text"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format rpmVendor section +sub format_rpmVendor { + my $text=$parser->get_text('/rpmVendor'); + $parser->get_tag('/rpmVendor'); + if ($mode eq 'html') { + return $rpmVendor="\nVendor : $text"; + } + elsif ($mode eq 'text') { + return $rpmVendor="\nVendor : $text"; + } + elsif ($mode eq 'make_rpm') { + return <$text +END + } + else { + return ''; + } +} +# ------------------------------------------------- Format rpmBuildRoot section +sub format_rpmBuildRoot { + my $text=$parser->get_text('/rpmBuildRoot'); + $parser->get_tag('/rpmBuildRoot'); + if ($mode eq 'html') { + return $rpmBuildRoot="\nBuild Root : $text"; + } + elsif ($mode eq 'text') { + return $rpmBuildRoot="\nBuild Root : $text"; + } + else { + return ''; + } +} +# ------------------------------------------------- Format rpmCopyright section +sub format_rpmCopyright { + my $text=$parser->get_text('/rpmCopyright'); + $parser->get_tag('/rpmCopyright'); + if ($mode eq 'html') { + return $rpmCopyright="\nLicense : $text"; + } + elsif ($mode eq 'text') { + return $rpmCopyright="\nLicense : $text"; + } + elsif ($mode eq 'make_rpm') { + return <$text +END + } + else { + return ''; + } +} +# ----------------------------------------------------- Format rpmGroup section +sub format_rpmGroup { + my $text=$parser->get_text('/rpmGroup'); + $parser->get_tag('/rpmGroup'); + if ($mode eq 'html') { + return $rpmGroup="\nGroup : $text"; + } + elsif ($mode eq 'text') { + return $rpmGroup="\nGroup : $text"; + } + elsif ($mode eq 'make_rpm') { + return <Utilities/System +END + } + else { + return ''; + } +} +# ---------------------------------------------------- Format rpmSource section +sub format_rpmSource { + my $text=$parser->get_text('/rpmSource'); + $parser->get_tag('/rpmSource'); + if ($mode eq 'html') { + return $rpmSource="\nSource : $text"; + } + elsif ($mode eq 'text') { + return $rpmSource="\nSource : $text"; + } + else { + return ''; + } +} +# ----------------------------------------------- Format rpmAutoReqProv section +sub format_rpmAutoReqProv { + my $text=$parser->get_text('/rpmAutoReqProv'); + $parser->get_tag('/rpmAutoReqProv'); + if ($mode eq 'html') { + return $rpmAutoReqProv="\nAutoReqProv : $text"; + } + elsif ($mode eq 'text') { + return $rpmAutoReqProv="\nAutoReqProv : $text"; + } + elsif ($mode eq 'make_rpm') { + return <$text +END + } + else { + return ''; + } +} +# ----------------------------------------------- Format rpmdescription section +sub format_rpmdescription { + my $text=$parser->get_text('/rpmdescription'); + $parser->get_tag('/rpmdescription'); + if ($mode eq 'html') { + $text=~s/\n//g; + $text=~s/\\n/\n/g; + return $rpmdescription="\nDescription : $text"; + } + elsif ($mode eq 'text') { + $text=~s/\n//g; + $text=~s/\\n/\n/g; + return $rpmdescription="\nDescription : $text"; + } + elsif ($mode eq 'make_rpm') { + $text=~s/\n//g; + $text=~s/\\n/\n/g; + return <$text +END + } + else { + return ''; + } +} +# ------------------------------------------------------- Format rpmpre section +sub format_rpmpre { + my $text=$parser->get_text('/rpmpre'); + $parser->get_tag('/rpmpre'); + if ($mode eq 'html') { +# return $rpmpre="\n
    RPMPRE $text"; + return ''; + } + elsif ($mode eq 'make_rpm') { + return <$text +END + } + else { + return ''; + } +} +# -------------------------------------------------- Format requires section +sub format_rpmRequires { + my @tokeninfo=@_; + my $aref; + my $text; + if ($mode eq 'make_rpm') { + while ($aref=$parser->get_token()) { + if ($aref->[0] eq 'E' && $aref->[1] eq 'rpmRequires') { + last; + } + elsif ($aref->[0] eq 'S') { + $text.=$aref->[4]; + } + elsif ($aref->[0] eq 'E') { + $text.=$aref->[2]; + } + else { + $text.=$aref->[1]; + } + } + } + else { + $parser->get_tag('/rpmRequires'); + return ''; + } + return ''.$text.''; +} +# -------------------------------------------------- Format directories section +sub format_directories { + my $text=$parser->get_text('/directories'); + $parser->get_tag('/directories'); + if ($mode eq 'html') { + $text=~s/\[\{\{\{\{\{DPATHLENGTH\}\}\}\}\}\]/$dpathlength/g; + return $directories="\n
     
    ". + "
    ". + "Directory Structure". + "\n
     
    ". + "\n". + "". + "\n". + "\n". + "\n". + "\n$text\n
    CategoryStatusExpected Permissions & OwnershipTarget Directory ". + "Path

    "."\n"; + } + elsif ($mode eq 'text') { + return $directories="\nDirectory Structure\n$text\n". + "\n"; + } + elsif ($mode eq 'install') { + return "\n".'directories:'."\n".$text; + } + elsif ($mode eq 'rpm_file_list') { + return $text; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format directory section +sub format_directory { + my (@tokeninfo)=@_; + $targetdir='';$categoryname='';$description=''; + $parser->get_text('/directory'); + $parser->get_tag('/directory'); + $directory_count++; + $categorycount{$categoryname}++; + if ($mode eq 'html') { + my @a; + @a=($targetdir=~/\//g); + my $d=scalar(@a)+1; + $dpathlength=$d if $d>$dpathlength; + my $thtml=$targetdir; + $thtml=~s/\//\<\/td\>\/g; + my ($chmod,$chown)=split(/\s/,$categoryhash{$categoryname}); + return $directory="\n". + "$categoryname". + "". + " ". + "$chmod
    $chown". + "$thtml". + "". + "$description"; + } + if ($mode eq 'text') { + return $directory="\nDIRECTORY $targetdir $categoryname ". + "$description"; + } + elsif ($mode eq 'install') { + return "\t".'install '.$categoryhash{$categoryname}.' -d '. + $targetroot.'/'.$targetdir."\n"; + } + elsif ($mode eq 'rpm_file_list') { + return $targetroot.'/'.$targetdir."\n"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format targetdir section +sub format_targetdir { + my @tokeninfo=@_; + $targetdir=''; + my $text=&trim($parser->get_text('/targetdir')); + if ($text) { + $parser->get_tag('/targetdir'); + $targetdir=$text; + } + return ''; +} +# ------------------------------------------------- Format categoryname section +sub format_categoryname { + my @tokeninfo=@_; + $categoryname=''; + my $text=&trim($parser->get_text('/categoryname')); + if ($text) { + $parser->get_tag('/categoryname'); + $categoryname=$text; + } + return ''; +} +# -------------------------------------------------- Format description section +sub format_description { + my @tokeninfo=@_; + $description=''; + my $text=&htmlsafe(&trim($parser->get_text('/description'))); + if ($text) { + $parser->get_tag('/description'); + $description=$text; + } + return ''; +} +# -------------------------------------------------------- Format files section +sub format_files { + my $text=$parser->get_text('/files'); + $parser->get_tag('/files'); + if ($mode eq 'MANIFEST') { + return $text; + } + elsif ($mode eq 'html') { + return $directories="\n
     
    ". + "
    ". + "Files
     
    ". + "

    All source and target locations are relative to the ". + "sourceroot and targetroot values at the beginning of this ". + "document.

    ". + "\n". + "". + "". + "". + "$text
    StatusCategoryName/LocationDescriptionNotes
    \n". + "\n"; + } + elsif ($mode eq 'text') { + return $directories="\n". + "File and Directory Structure". + "\n$text\n". + "\n"; + } + elsif ($mode eq 'install') { + return "\n".'files:'."\n".$text. + "\n".'links:'."\n".join('',@links); + } + elsif ($mode eq 'configinstall') { + return "\n".'configfiles: '. + join(' ',@configall). + "\n\n".$text. + "\n\nalwaysrun:\n\n"; + } + elsif ($mode eq 'build') { + my $binfo; + my $tword; + my $command2; + my @deps; + foreach my $bi (@buildinfo) { + my ($target,$source,$command,$trigger,@deps)=split(/\;/,$bi); + $tword=''; $tword=' alwaysrun' if $trigger eq 'always run'; + if ($command!~/\s/) { + $command=~s/\/([^\/]*)$//; + $command2="cd $command; sh ./$1;\\"; + } + else { + $command=~s/(.*?\/)([^\/]+\s+.*)$/$1/; + $command2="cd $command; sh ./$2;\\"; + } + my $depstring; + my $depstring2="\t\t\@echo '';\\\n"; + my $olddep; + foreach my $dep (@deps) { + unless ($olddep) { + $olddep=$deps[$#deps]; + } + $depstring.="\telif !(test -r $command/$dep);\\\n"; + $depstring.="\t\tthen echo ". + "\"**** WARNING **** missing the file: ". + "$command/$dep\"$logcmd;\\\n"; + $depstring.="\t\ttest -e $source || test -e $target || echo ". + "'**** ERROR **** neither source=$source nor target=". + "$target exist and they cannot be built'$logcmd;\\\n"; + $depstring.="\t\tmake -f Makefile.build ${source}___DEPS;\\\n"; + if ($olddep) { + $depstring2.="\t\tECODE=0;\\\n"; + $depstring2.="\t\t! test -e $source && test -r $command/$olddep &&". + " { perl filecompare.pl -b2 $command/$olddep $target || ECODE=\$\$?; } && { [ \$\$ECODE != \"2\" ] || echo \"**** WARNING **** dependency $command/$olddep is newer than target file $target; SOMETHING MAY BE WRONG\"$logcmd; };\\\n"; + } + $olddep=$dep; + } + $binfo.="$source: $tword\n". + "\t\@if !(echo \"\");\\\n\t\tthen echo ". + "\"**** WARNING **** Strange shell. ". + "Check your path settings.\"$logcmd;\\\n". + $depstring. + "\telse \\\n\t\t$command2\n\tfi\n\n"; + $binfo.="${source}___DEPS:\n".$depstring2."\t\tECODE=0;\n\n"; + } + return 'all: '.join(' ',@buildall)."\n\n". + $text. + $binfo."\n". + "alwaysrun:\n\n"; + } + elsif ($mode eq 'rpm_file_list') { + return $text; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format fileglobs section +sub format_fileglobs { + +} +# -------------------------------------------------------- Format links section +# deprecated.. currently 's are included in +sub format_links { + my $text=$parser->get_text('/links'); + $parser->get_tag('/links'); + if ($mode eq 'html') { + return $links="\n
    BEGIN LINKS\n$text\n
    END LINKS\n"; + } + elsif ($mode eq 'install') { + return "\n".'links:'."\n\t".$text; + } + else { + return ''; + } +} +# --------------------------------------------------------- Format file section +sub format_file { + my @tokeninfo=@_; + $file=''; $source=''; $target=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + my $text=&trim($parser->get_text('/file')); + my $buildtest; + $file_count++; + $categorycount{$categoryname}++; + if ($source) { + $parser->get_tag('/file'); + if ($mode eq 'MANIFEST') { + return $source."\n"; + } + elsif ($mode eq 'html') { + return ($file="\n". + "". + " ". + "". + "$categoryname
    ". + $categoryhash{$categoryname}."". + "SOURCE: $source
    TARGET: $target". + "$description". + "$note". + ""); +# return ($file="\n
    BEGIN FILE\n". +# "$source $target $categoryname $description $note " . +# "$build $status $dependencies" . +# "\nEND FILE"); + } + elsif ($mode eq 'install' && $categoryname ne 'conf') { + if ($build) { + my $bi=$sourceroot.'/'.$source.';'.$build.';'. + $dependencies; + my ($source2,$command,$trigger,@deps)=split(/\;/,$bi); + $tword=''; $tword=' alwaysrun' if $trigger eq 'always run'; + $command=~s/\/([^\/]*)$//; + $command2="cd $command; sh ./$1;\\"; + my $depstring; + foreach my $dep (@deps) { + $depstring.=<get_text('/link')); + if ($linkto) { + $parser->get_tag('/link'); + if ($mode eq 'html') { + my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target); + $link_count+=scalar(@targets); + foreach my $tgt (@targets) { + $categorycount{$categoryname}++; + push @links,("\n". + "". + " ". + "". + "$categoryname". + "LINKTO: $linkto
    TARGET: $tgt". + "$description". + "$note". + ""); +# push @links,"\t".'ln -fs /'.$linkto.' /'.$targetroot.$tgt. +# "\n"; + } + return join('',@links); +# return ($link="\n". +# "". +# " ". +# "$categoryname". +# "LINKTO: $linkto
    TARGET: $target". +# "$description". +# "$note". +# ""); +# return $link="\nBEGIN LINK\n". +# "$linkto $target $categoryname $description $note " . +# "$build $status $dependencies" . +# "\nEND LINK"; + } + elsif ($mode eq 'install') { + my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target); + foreach my $tgt (@targets) { + push @links,"\t".'ln -fs /'.$linkto.' '.$targetroot.'/'.$tgt. + "\n"; + } +# return join('',@links); + return ''; + } + elsif ($mode eq 'rpm_file_list') { + my @linklocs; + my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target); + foreach my $tgt (@targets) { + push @linklocs,''.$targetroot.'/'.$tgt."\n"; + } + return join('',@linklocs); + } + else { + return ''; + } + } + return ''; +} +# ----------------------------------------------------- Format fileglob section +sub format_fileglob { + my @tokeninfo=@_; + $fileglob=''; $glob=''; $sourcedir=''; + $targetdir=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + $filenames=''; + my $text=&trim($parser->get_text('/fileglob')); + my $filenames2=$filenames;$filenames2=~s/\s//g; + $fileglob_count++; + my @semi=($filenames2=~/(\;)/g); + $fileglobnames_count+=scalar(@semi)+1; + $categorycount{$categoryname}+=scalar(@semi)+1; + if ($sourcedir) { + $parser->get_tag('/fileglob'); + if ($mode eq 'MANIFEST') { + return join("\n",(map {"$sourcedir$_"} split(/\;/,$filenames2)))."\n"; + } + elsif ($mode eq 'html') { + return $fileglob="\n". + " ". + ""."". + "$categoryname
    ". + "".$categoryhash{$categoryname}."". + "SOURCEDIR: $sourcedir
    ". + "TARGETDIR: $targetdir
    ". + "GLOB: $glob
    ". + "FILENAMES: $filenames". + "". + "$description". + "$note". + ""; +# return $fileglob="\nBEGIN FILEGLOB\n". +# "$glob sourcedir $targetdir $categoryname $description $note ". +# "$build $status $dependencies $filenames" . +# "\nEND FILEGLOB"; + } + elsif ($mode eq 'install') { + my $eglob=$glob; + if ($glob eq '*') { + $eglob='[^C][^V][^S]'.$glob; + } + return "\t".'install '. + $categoryhash{$categoryname}.' '. + $sourceroot.'/'.$sourcedir.$eglob.' '. + $targetroot.'/'.$targetdir.'.'."\n"; + } + elsif ($mode eq 'rpm_file_list') { + my $eglob=$glob; + if ($glob eq '*') { + $eglob='[^C][^V][^S]'.$glob; + } + my $targetdir2=$targetdir;$targetdir2=~s/\/$//; + my @gfiles=map {s/^.*\///;"$targetroot/$targetdir2/$_\n"} + glob("$sourceroot/$sourcedir/$eglob"); + return join('',@gfiles); + } + else { + return ''; + } + } + return ''; +} +# ---------------------------------------------------- Format sourcedir section +sub format_sourcedir { + my @tokeninfo=@_; + $sourcedir=''; + my $text=&trim($parser->get_text('/sourcedir')); + if ($text) { + $parser->get_tag('/sourcedir'); + $sourcedir=$text; + } + return ''; +} +# ------------------------------------------------------- Format target section +sub format_target { + my @tokeninfo=@_; + $target=''; + my $text=&trim($parser->get_text('/target')); + if ($text) { + $parser->get_tag('/target'); + $target=$text; + } + return ''; +} +# ------------------------------------------------------- Format source section +sub format_source { + my @tokeninfo=@_; + $source=''; + my $text=&trim($parser->get_text('/source')); + if ($text) { + $parser->get_tag('/source'); + $source=$text; + } + return ''; +} +# --------------------------------------------------------- Format note section +sub format_note { + my @tokeninfo=@_; + $note=''; +# my $text=&trim($parser->get_text('/note')); + my $aref; + my $text; + while ($aref=$parser->get_token()) { + if ($aref->[0] eq 'E' && $aref->[1] eq 'note') { + last; + } + elsif ($aref->[0] eq 'S') { + $text.=$aref->[4]; + } + elsif ($aref->[0] eq 'E') { + $text.=$aref->[2]; + } + else { + $text.=$aref->[1]; + } + } + if ($text) { +# $parser->get_tag('/note'); + $note=$text; + } + return ''; -my @parsecontents = <>; -my $parsestring = join('',@parsecontents); -my $outstring; - -$outstring = &xmlparse($parsestring,$target,$dist); -print $outstring; - -# -------------------------- Parse for an input string and specific target mode -sub xmlparse { - my ($parsestring,$target,$dist)=@_; - my $outtext = ''; - my $parser = HTML::TokeParser->new($parsestring); - $parser->xml_mode('1'); - # strategy: grab first and pass well-parsed information to info-handler subroutines - # There should be errors if bad file format. - # Unlike other XML-handling strategies we use, this script should - # terminate if the XML is bad. - # grab first (and only) lpml section - # grab target(s) root - # grab source root - # grab categories - # foreach category - # attributes: name and type - # grab chown - # grab chmod - # parse user name and group name - # grab rpm (remember to replace \n with real new lines) - # grab rpmSummary - # grab rpmName - # grab rpmVersion - # grab rpmRelease - # grab rpmVendor - # grab rpmBuildRoot - # grab rpmCopyright - # grab rpmGroup - # grab rpmSource - # grab rpmAutoReqProv - # grab rpmdescription - # grab rpmpre - # grab directories - # foreach directory - # grab targetdir(s) - # grab categoryname - # grab (optional) description - # grab files - # foreach file|link|link|fileglob - # grab source - # grab target(s) - # grab categoryname - # grab description - # grab note -} - -__END__ - -while (my $token = $p->get_tag("category")) { - my $url = $token->[1]{name} . $token->[1]{type}; - my $chmodtoken=$p->get_tag("chmod"); - my $text = $p->get_trimmed_text("/chmod"); - print "CHMOD: $text\n"; - my $text = $p->get_trimmed_text("/category"); - print "$url\t$text\t".join(" ",@{$token->[2]})."\n"; } +# -------------------------------------------------------- Format build section +sub format_build { + my @tokeninfo=@_; + $build=''; + my $text=&trim($parser->get_text('/build')); + if ($text) { + $parser->get_tag('/build'); + $build=$sourceroot.'/'.$text.';'.$tokeninfo[2]{'trigger'}; + $build=~s/([^\\])\\\s+/$1/g; # allow for lines split onto new lines + } + return ''; +} +# -------------------------------------------------------- Format build section +sub format_buildlink { + my @tokeninfo=@_; + $buildlink=''; + my $text=&trim($parser->get_text('/buildlink')); + if ($text) { + $parser->get_tag('/buildlink'); + $buildlink=$sourceroot.'/'.$text; + } + return ''; +} +# ------------------------------------------------------- Format status section +sub format_status { + my @tokeninfo=@_; + $status=''; + my $text=&trim($parser->get_text('/status')); + if ($text) { + $parser->get_tag('/status'); + $status=$text; + } + return ''; +} +# ------------------------------------------------- Format dependencies section +sub format_dependencies { + my @tokeninfo=@_; + $dependencies=''; + my $text=&trim($parser->get_text('/dependencies')); + if ($text) { + $parser->get_tag('/dependencies'); + $dependencies=join(';', + (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text))); + } + return ''; +} +# --------------------------------------------------------- Format glob section +sub format_glob { + my @tokeninfo=@_; + $glob=''; + my $text=&trim($parser->get_text('/glob')); + if ($text) { + $parser->get_tag('/glob'); + $glob=$text; + } + return ''; +} +# ---------------------------------------------------- Format filenames section +sub format_filenames { + my @tokeninfo=@_; + my $text=&trim($parser->get_text('/filenames')); + if ($text) { + $parser->get_tag('/filenames'); + $filenames=$text; + } + return ''; +} +# ----------------------------------------------- Format specialnotices section +sub format_specialnotices { + $parser->get_tag('/specialnotices'); + return ''; +} +# ------------------------------------------------ Format specialnotice section +sub format_specialnotice { + $parser->get_tag('/specialnotice'); + return ''; +} +# ------------------------------------------------------- Format linkto section +sub format_linkto { + my @tokeninfo=@_; + my $text=&trim($parser->get_text('/linkto')); + if ($text) { + $parser->get_tag('/linkto'); + $linkto=$text; + } + return ''; +} +# ------------------------------------- Render less-than and greater-than signs +sub htmlsafe { + my $text=@_[0]; + $text =~ s//>/g; + return $text; +} +# --------------------------------------- remove starting and ending whitespace +sub trim { + my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; +} + +# ----------------------------------- POD (plain old documentation, CPAN style) + +=pod + +=head1 NAME + +lpml_parse.pl - This is meant to parse files meeting the lpml document type. + +=head1 SYNOPSIS + + | perl lpml_parse.pl + +Usage is for the lpml file to come in through standard input. + +=over 4 + +=item * + +1st argument is the mode of parsing. + +=item * + +2nd argument is the category permissions to use (runtime or development) + +=item * + +3rd argument is the distribution +(default,redhat6.2,debian2.2,redhat7.1,etc). + +=item * + +4th argument is to manually specify a sourceroot. + +=item * + +5th argument is to manually specify a targetroot. + +=back + +Only the 1st argument is mandatory for the program to run. + +Example: + +cat ../../doc/loncapafiles.lpml |\\ +perl lpml_parse.pl html runtime default /home/sherbert/loncapa /tmp/install + +=head1 DESCRIPTION + +The general flow of the script is to get command line arguments, run through +the XML document three times, and output according to any desired mode: +install, configinstall, build, rpm, dpkg, htmldoc, textdoc, and status. + +A number of coding decisions are made according to the following principle: +installation software must be stand-alone. Therefore, for instance, I try +not to use the GetOpt::Long module or any other perl modules. (I do however +use HTML::TokeParser.) I also have tried to keep all the MODES of +parsing inside this file. Therefore, format_TAG subroutines are fairly +lengthy with their conditional logic. A more "elegant" solution might +be to dynamically register the parsing mode and subroutines, or maybe even work +with stylesheets. However, in order to make this the installation back-bone +of choice, there are advantages for HAVING EVERYTHING IN ONE FILE. +This way, the LPML installation software does not have to rely on OTHER +installation software (a chicken versus the egg problem). Besides, I would +suggest the modes of parsing are fairly constant: install, configinstall, +build, rpm, dpkg, htmldoc, textdoc, and status. + +Another coding decision is about using a multiple pass-through approach to +parsing the lpml file. This saves memory and makes sure the server will never +be overloaded. During the first pass-through, the script gathers information +specific as to resolving what tags with what 'dist=' attributes are to be used. +During the second pass-through, the script cleans up white-space surrounding +the XML tags, and filters through the tags based on information regarding the +'dist=' attributes (information gathered in the first pass-through). +The third and final pass-through involves formatting and rendering the XML +into whatever XML mode is chosen: install, configinstall, build, rpm, dpkg, +htmldoc, textdoc, and status. + +The hierarchy mandated by the DTD does not always correspond to the hierarchy +that is sensible for a Makefile. For instance, in a Makefile it is sensible +that soft-links are installed after files. However, in an LPML document, it +is sensible that files and links be considered together and the writer of the +LPML document should be free to place things in whatever order makes best +sense in terms of LOOKING at the information. The complication that arises +is that the parser needs to have a memory for passing values from +leaves on the XML tree to higher-up branches. Currently, this memory is +hard-coded (like with the @links array), but it may benefit from a more +formal approach in the future. + +=head1 README + +This parses an LPML file to generate information useful for +source to target installation, compilation, filesystem status +checking, RPM and Debian software packaging, and documentation. + +More information on LPML is available at http://lpml.sourceforge.net. + +=head1 PREREQUISITES + +HTML::TokeParser + +=head1 COREQUISITES + +=head1 OSNAMES + +linux + +=head1 SCRIPT CATEGORIES + +UNIX/System_administration + +=head1 AUTHOR + + Scott Harrison + codeharrison@yahoo.com + +Please let me know how/if you are finding this script useful and +any/all suggestions. -Scott + +=cut