--- loncom/build/lpml_parse.pl	2001/12/19 21:26:52	1.33
+++ loncom/build/lpml_parse.pl	2002/04/26 15:53:00	1.47
@@ -1,11 +1,20 @@
 #!/usr/bin/perl
 
+# -------------------------------------------------------- Documentation notice
+# Run "perldoc ./lpml_parse.pl" in order to best view the software
+# documentation internalized in this program.
+
+# --------------------------------------------------------- Distribution notice
+# This script is distributed with the LPML software project available at
+# http://lpml.sourceforge.net
+
+# --------------------------------------------------------- License Information
 # The LearningOnline Network with CAPA
 # lpml_parse.pl - Linux Packaging Markup Language parser
 #
-# $Id: lpml_parse.pl,v 1.33 2001/12/19 21:26:52 harris41 Exp $
+# $Id: lpml_parse.pl,v 1.47 2002/04/26 15:53:00 harris41 Exp $
 #
-# Written by Scott Harrison, harris41@msu.edu
+# Written by Scott Harrison, codeharrison@yahoo.com
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -35,7 +44,9 @@
 # 9/5/2001,9/6,9/7,9/8 - Scott Harrison
 # 9/17,9/18 - Scott Harrison
 # 11/4,11/5,11/6,11/7,11/16,11/17 - Scott Harrison
-# 12/2,12/3,12/4,12/5,12/6,12/13 - Scott Harrison
+# 12/2,12/3,12/4,12/5,12/6,12/13,12/19,12/29 - Scott Harrison
+# YEAR=2002
+# 1/8,1/9,1/29,1/31,2/5,3/21,4/8,4/12 - Scott Harrison
 #
 ###
 
@@ -66,9 +77,12 @@ use HTML::TokeParser;
 my $usage=<<END;
 **** ERROR ERROR ERROR ERROR ****
 Usage is for lpml file to come in through standard input.
-1st argument is the mode of parsing.
-2nd argument is the category permissions to use (runtime or development)
-3rd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
+1st argument is the mode of parsing:
+    install,configinstall,build,rpm,dpkg,htmldoc,textdoc,status
+2nd argument is the category permissions to use:
+    typical choices: runtime,development
+3rd argument is the distribution:
+    typical choices: default,redhat6.2,debian2.2,redhat7
 4th argument is to manually specify a sourceroot.
 5th argument is to manually specify a targetroot.
 
@@ -78,11 +92,13 @@ Example:
 
 cat ../../doc/loncapafiles.lpml |\\
 perl lpml_parse.pl html development default /home/sherbert/loncapa /tmp/install
+
+For more information, type "perldoc lpml_parse.pl".
 END
 
 # ------------------------------------------------- Grab command line arguments
 
-my $mode;
+my $mode='';
 if (@ARGV==5) {
     $mode = shift @ARGV;
 }
@@ -93,34 +109,34 @@ else {
     exit -1; # exit with error status
 }
 
-my $categorytype;
+my $categorytype='';
 if (@ARGV) {
     $categorytype = shift @ARGV;
 }
 
-my $dist;
+my $dist='';
 if (@ARGV) {
     $dist = shift @ARGV;
 }
 
-my $targetroot;
-my $sourceroot;
-my $targetrootarg;
-my $sourcerootarg;
+my $targetroot='';
+my $sourceroot='';
+my $targetrootarg='';
+my $sourcerootarg='';
 if (@ARGV) {
     $sourceroot = shift @ARGV;
 }
 if (@ARGV) {
     $targetroot = shift @ARGV;
 }
-$sourceroot=~s/\/$//;
-$targetroot=~s/\/$//;
+$sourceroot=~s/\/$//; # remove trailing directory slash
+$targetroot=~s/\/$//; # remove trailing directory slash
 $sourcerootarg=$sourceroot;
 $targetrootarg=$targetroot;
 
 my $logcmd='| tee -a WARNINGS';
 
-my $invocation;
+my $invocation; # Record how the program was invoked
 # --------------------------------------------------- Record program invocation
 if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
     $invocation=(<<END);
@@ -128,19 +144,18 @@ if ($mode eq 'install' or $mode eq 'conf
 #             1st argument (mode) is: $mode
 #             2nd argument (category type) is: $categorytype
 #             3rd argument (distribution) is: $dist
-#             4th argument (targetroot) is: described below
-#             5th argument (sourceroot) is: described below
+#             4th argument (sourceroot) is: described below
+#             5th argument (targetroot) is: described below
 END
 }
 
-# ---------------------------------------------------- Start first pass through
-my @parsecontents = <>;
-my $parsestring = join('',@parsecontents);
-my $outstring;
+# -------------------------- Start first pass through (just gather information)
+my @parsecontents=<>;
+my $parsestring=join('',@parsecontents);
 
 # Need to make a pass through and figure out what defaults are
-# overrided.  Top-down overriding strategy (leaves don't know
-# about distant leaves).
+# overrided.  Top-down overriding strategy (tree leaves don't know
+# about distant tree leaves).
 
 my @hierarchy;
 $hierarchy[0]=0;
@@ -149,20 +164,32 @@ my $token;
 $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
-my %hash;
-my $key;
-while ($token = $parser->get_token()) {
+my %setting;
+
+# Values for the %setting hash
+my $defaultset=1; # a default setting exists for a key
+my $distset=2; # a distribution setting exists for a key
+               # (overrides default setting)
+
+my $key=''; # this is a unique key identifier (the token name with its
+            # coordinates inside the hierarchy)
+while ($token = $parser->get_token()) { # navigate through $parsestring
     if ($token->[0] eq 'S') {
 	$hloc++;
 	$hierarchy[$hloc]++;
 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
 	my $thisdist=' '.$token->[2]{'dist'}.' ';
 	if ($thisdist eq ' default ') {
-	    $hash{$key}=1; # there is a default setting for this key
+	    $setting{$key}=$defaultset;
 	}
-	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
-	    $hash{$key}=2; # disregard default setting for this key if
-	                   # there is a directly requested distribution match
+	elsif (length($dist)>0 &&
+	       $setting{$key}==$defaultset &&
+	       $thisdist=~/\s$dist\s/) {
+	    $setting{$key}=$distset;
+                   # disregard default setting for this key if
+                   # there is a directly requested distribution match
+                   # (in other words, there must first be a default
+	           # setting for a key in order for it to be overridden)
 	}
     }
     if ($token->[0] eq 'E') {
@@ -170,57 +197,79 @@ while ($token = $parser->get_token()) {
     }
 }
 
-# --------------------------------------------------- Start second pass through
-undef $hloc;
-undef @hierarchy;
-undef $parser;
-$hierarchy[0]=0;
+# - Start second pass through (clean up the string to allow for easy rendering)
+
+# The string is cleaned up so that there is no white-space surrounding any
+# XML tag.  White-space inside text 'T' elements is preserved.
+
+# Clear up memory
+undef($hloc);
+undef(@hierarchy);
+undef($parser);
+$hierarchy[0]=0; # initialize hierarchy
 $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
-my $cleanstring;
-while ($token = $parser->get_token()) {
-    if ($token->[0] eq 'S') {
+my $cleanstring; # contains the output of the second step
+while ($token = $parser->get_token()) { # navigate through $parsestring
+    if ($token->[0] eq 'S') { # a start tag
 	$hloc++;
 	$hierarchy[$hloc]++;
 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
-	my $thisdist=' '.$token->[2]{'dist'}.' ';
+
+	# Surround tagdist (the dist attribute of an XML tag)
+	# with white-space to allow for uniform searching a few
+	# lines below here.
+	my $tagdist=' '.$token->[2]{'dist'}.' ';
+
 	# This conditional clause is set up to ignore two sets
 	# of invalid conditions before accepting entry into
-	# the cleanstring.
-	if ($hash{$key}==2 and
-	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
+	# $cleanstring.
+
+	# Condition #1: Ignore this part of the string if the tag 
+	# has a superior distribution-specific setting and the tag
+	# being evaluated has a dist setting something other than
+	# blank or $dist.
+	if ($setting{$key}==$distset and
+	    !($tagdist eq '  ' or $tagdist =~/\s$dist\s/)) {
 	    if ($token->[4]!~/\/>$/) {
 		$parser->get_tag('/'.$token->[1]);
 		$hloc--;
 	    }
 	}
-	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
-	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
+	# Condition #2: Ignore this part of the string if the tag has
+	# is not blank and does not equal dist and
+	# either does not equal default or it has a prior $dist-specific
+	# setting.
+	elsif ($tagdist ne '  ' and $tagdist!~/\s$dist\s/ and
+	       !($tagdist eq ' default ' and $setting{$key}!=$distset)) {
 	    if ($token->[4]!~/\/>$/) {
 		$parser->get_tag('/'.$token->[1]);
 		$hloc--;
 	    }
 	}
+	# In other words, output to $cleanstring if the tag is dist=default
+	# or if the tag is set to dist=$dist for the first time.  And, always
+	# output when dist='' is not present.
 	else {
 	    $cleanstring.=$token->[4];
 	}
-	if ($token->[4]=~/\/>$/) {
-	    $hloc--;
-	}
     }
-    if ($token->[0] eq 'E') {
+    # Note: this loop DOES work with <tag /> style markup as well as
+    # <tag></tag> style markup since I always check for $token->[4] ending
+    # with "/>".
+    if ($token->[0] eq 'E') { # an end tag
 	$cleanstring.=$token->[2];
 	$hloc--;
     }
-    if ($token->[0] eq 'T') {
+    if ($token->[0] eq 'T') { # text contents inside tags
 	$cleanstring.=$token->[1];
     }
 }
 $cleanstring=&trim($cleanstring);
 $cleanstring=~s/\>\s*\n\s*\</\>\</g;
 
-# ---------------------------------------------------- Start final pass through
+# -------------------------------------------- Start final (third) pass through
 
 # storage variables
 my $lpml;
@@ -279,10 +328,6 @@ my $link_count;
 my $fileglob_count;
 my $fileglobnames_count;
 my %categorycount;
-# START TEMP WAY
-#my %bytecount;  # TEMP WAY TO COUNT INFORMATION
-#my %linecount;  # TEMP WAY TO COUNT INFORMATION
-# END TEMP WAY
 
 my @buildall;
 my @buildinfo;
@@ -321,6 +366,7 @@ $parser->{textify}={
     rpmAutoReqProv => \&format_rpmAutoReqProv,
     rpmdescription => \&format_rpmdescription,
     rpmpre => \&format_rpmpre,
+    rpmRequires => \&format_rpmRequires,
     directories => \&format_directories,
     directory => \&format_directory,
     categoryname => \&format_categoryname,
@@ -367,13 +413,6 @@ exit;
 # ------------------------ Final output at end of markup parsing and formatting
 sub end {
     if ($mode eq 'html') {
-	# START TEMP WAY
-#	my $totallinecount;
-#	my $totalbytecount;
-#	map {$totallinecount+=$linecount{$_};
-#	     $totalbytecount+=$bytecount{$_}}
-# 	  @categorynamelist;
-        # END TEMP WAY
 	return "<br />&nbsp;<br />".
 	    "<a name='summary' /><font size='+2'>Summary of Source Repository".
 	    "</font>".
@@ -411,16 +450,6 @@ sub end {
 	    "</table>".
 	    "</body></html>\n";
 
-# START TEMP WAY
-#	    join("\n",(map {"<tr><td><img src='$fab{$_}.gif' ".
-#		 "alt='$_ icon' /></td>".
-# 	         "<td>$_</td><td>$categorycount{$_}</td><td>$linecount{$_}</td><td>$bytecount{$_}</td></tr>"}
-#		@categorynamelist)).
-#	    "<br />&nbsp;<br />".
-#	    "Total Lines of Code: $totallinecount".
-#	    "<br />&nbsp;<br />".
-#	    "Total Bytes: $totalbytecount".
-# END TEMP WAY
     }
     if ($mode eq 'install') {
 	return '';
@@ -664,6 +693,9 @@ $text
 </table>
 END
     }
+    elsif ($mode eq 'make_rpm') {
+	return $text;
+    }
     elsif ($mode eq 'text') {
 	return $rpm=<<END;
 Software Package Description
@@ -685,6 +717,11 @@ sub format_rpmSummary {
     elsif ($mode eq 'text') {
 	return $rpmSummary="\nSummary     : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<summary>$text</summary>
+END
+    }
     else {
 	return '';
     }
@@ -699,6 +736,11 @@ sub format_rpmName {
     elsif ($mode eq 'text') {
 	return $rpmName="\nName        : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<name>$text</name>
+END
+    }
     else {
 	return '';
     }
@@ -741,6 +783,11 @@ sub format_rpmVendor {
     elsif ($mode eq 'text') {
 	return $rpmVendor="\nVendor      : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<vendor>$text</vendor>
+END
+    }
     else {
 	return '';
     }
@@ -769,6 +816,11 @@ sub format_rpmCopyright {
     elsif ($mode eq 'text') {
 	return $rpmCopyright="\nLicense     : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<copyright>$text</copyright>
+END
+    }
     else {
 	return '';
     }
@@ -783,6 +835,11 @@ sub format_rpmGroup {
     elsif ($mode eq 'text') {
 	return $rpmGroup="\nGroup       : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<group>Utilities/System</group>
+END
+    }
     else {
 	return '';
     }
@@ -808,9 +865,14 @@ sub format_rpmAutoReqProv {
     if ($mode eq 'html') {
 	return $rpmAutoReqProv="\nAutoReqProv : $text";
     }
-    if ($mode eq 'text') {
+    elsif ($mode eq 'text') {
 	return $rpmAutoReqProv="\nAutoReqProv : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<AutoReqProv>$text</AutoReqProv>
+END
+    }
     else {
 	return '';
     }
@@ -829,6 +891,13 @@ sub format_rpmdescription {
 	$text=~s/\\n/\n/g;
 	return $rpmdescription="\nDescription : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	$text=~s/\n//g;
+	$text=~s/\\n/\n/g;
+	return <<END;
+<description>$text</description>
+END
+    }
     else {
 	return '';
     }
@@ -841,10 +910,42 @@ sub format_rpmpre {
 #	return $rpmpre="\n<br />RPMPRE $text";
 	return '';
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<pre>$text</pre>
+END
+    }
     else {
 	return '';
     }
 }
+# -------------------------------------------------- Format requires section
+sub format_rpmRequires {
+    my @tokeninfo=@_;
+    my $aref;
+    my $text;
+    if ($mode eq 'make_rpm') {
+	while ($aref=$parser->get_token()) {
+	    if ($aref->[0] eq 'E' && $aref->[1] eq 'rpmRequires') {
+		last;
+	    }
+	    elsif ($aref->[0] eq 'S') {
+		$text.=$aref->[4];
+	    }
+	    elsif ($aref->[0] eq 'E') {
+		$text.=$aref->[2];
+	    }
+	    else {
+		$text.=$aref->[1];
+	    }
+	}
+    }
+    else {
+	$parser->get_tag('/rpmRequires');
+	return '';
+    }
+    return '<rpmRequires>'.$text.'</rpmRequires>';
+}
 # -------------------------------------------------- Format directories section
 sub format_directories {
     my $text=$parser->get_text('/directories');
@@ -869,7 +970,10 @@ sub format_directories {
     }
     elsif ($mode eq 'install') {
 	return "\n".'directories:'."\n".$text;
-   }
+    }
+    elsif ($mode eq 'rpm_file_list') {
+	return $text;
+    }
     else {
 	return '';
     }
@@ -892,7 +996,9 @@ sub format_directory {
 	my ($chmod,$chown)=split(/\s/,$categoryhash{$categoryname});
 	return $directory="\n<tr><td rowspan='2' bgcolor='#ffffff'>".
 	    "$categoryname</td>".
-	    "<td rowspan='2' bgcolor='#ffffff'><!-- POSTEVAL [$categoryname] verify.pl directory /$targetdir $categoryhash{$categoryname} -->&nbsp;</td>".
+	    "<td rowspan='2' bgcolor='#ffffff'><!-- POSTEVAL [$categoryname] ".
+	    "verify.pl directory /$targetdir $categoryhash{$categoryname} -->".
+	    "&nbsp;</td>".
 	    "<td rowspan='2' bgcolor='#ffffff'>$chmod<br />$chown</td>".
 	    "<td bgcolor='#ffffff'>$thtml</td></tr>".
 	    "<tr><td bgcolor='#ffffff' colspan='[{{{{{DPATHLENGTH}}}}}]'>".
@@ -906,6 +1012,9 @@ sub format_directory {
 	return "\t".'install '.$categoryhash{$categoryname}.' -d '.
 	    $targetroot.'/'.$targetdir."\n";
     }
+    elsif ($mode eq 'rpm_file_list') {
+	return $targetroot.'/'.$targetdir."\n";
+    }
     else {
 	return '';
     }
@@ -947,7 +1056,10 @@ sub format_description {
 sub format_files {
     my $text=$parser->get_text('/files');
     $parser->get_tag('/files');
-    if ($mode eq 'html') {
+    if ($mode eq 'MANIFEST') {
+	return $text;
+    }
+    elsif ($mode eq 'html') {
 	return $directories="\n<br />&nbsp;<br />".
 	    "<a name='files' />".
 	    "<font size='+2'>Files</font><br />&nbsp;<br />".
@@ -1028,6 +1140,9 @@ sub format_files {
 		$binfo."\n".
 		"alwaysrun:\n\n";
     }
+    elsif ($mode eq 'rpm_file_list') {
+	return $text;
+    }
     else {
 	return '';
     }
@@ -1060,19 +1175,26 @@ sub format_file {
     my $buildtest;
     $file_count++;
     $categorycount{$categoryname}++;
-    # START TEMP WAY
-#    if (-T "$sourcerootarg/$source") {
-#	$linecount{$categoryname}+=`wc -l $sourcerootarg/$source`;
-#    }
-#    my $bytesize=(-s "$sourcerootarg/$source");
-#    $bytecount{$categoryname}+=$bytesize;
-    # END TEMP WAY
     if ($source) {
 	$parser->get_tag('/file');
-	if ($mode eq 'html') {
+	if ($mode eq 'MANIFEST') {
+	    my $command=$build;
+	    if ($command!~/\s/) {
+		$command=~s/\/([^\/]*)$//;
+	    }
+	    else {
+		$command=~s/(.*?\/)([^\/]+\s+.*)$/$1/;
+	    }
+	    $command=~s/^$sourceroot\///;
+	    my (@deps)=split(/\;/,$dependencies);
+	    my $retval=join("\n",($source,
+		       (map {"$command$_"} @deps)));
+	    return $retval."\n";
+	}
+	elsif ($mode eq 'html') {
 	    return ($file="\n<!-- FILESORT:$target -->".
 		    "<tr>".
-		    "<td><!-- POSTEVAL [$categoryname] verify.pl file '$sourcerootarg' ".
+          "<td><!-- POSTEVAL [$categoryname] verify.pl file '$sourcerootarg' ".
 		    "'$targetrootarg' ".
 		    "'$source' '$target' ".
 		    "$categoryhash{$categoryname} -->&nbsp;</td><td>".
@@ -1101,10 +1223,10 @@ sub format_file {
 		foreach my $dep (@deps) {
 		    $depstring.=<<END;
 		ECODE=0; DEP=''; \\
-		test -e $command/$dep || (echo '**** WARNING **** cannot evaluate status of dependency $command/$dep (for building ${sourceroot}/${source} with)'$logcmd); DEP="1"; \\
-		[ -n DEP ] && { perl filecompare.pl -b2 $command/$dep ${targetroot}/${target} || ECODE=\$\$?; } || DEP="1"; \\
+		test -e $dep || (echo '**** WARNING **** cannot evaluate status of dependency $dep (for building ${sourceroot}/${source} with)'$logcmd); DEP="1"; \\
+		[ -n DEP ] && { perl filecompare.pl -b2 $dep ${targetroot}/${target} || ECODE=\$\$?; } || DEP="1"; \\
 		case "\$\$ECODE" in \\
-			2) echo "**** WARNING **** dependency $command/$dep is newer than target file ${targetroot}/${target}; you may want to run make build"$logcmd;; \\
+			2) echo "**** WARNING **** dependency $dep is newer than target file ${targetroot}/${target}; you may want to run make build"$logcmd;; \\
 		esac; \\
 END
 		}
@@ -1172,7 +1294,8 @@ END
 		$logcmd.' && echo "'.
 		'Configuration source file does not exist '.
 		''.$sourceroot.'/'.$source.'"'.
-		"$logcmd); } && perl verifymodown.pl ${targetroot}/${target} \"$categoryhash{$categoryname}\"$logcmd;\n\n";
+	      "$logcmd); } && perl verifymodown.pl ${targetroot}/${target} \"".
+		"$categoryhash{$categoryname}\"$logcmd;\n\n";
 	}
 	elsif ($mode eq 'build' && $build) {
 	    push @buildall,$sourceroot.'/'.$source;
@@ -1181,6 +1304,17 @@ END
 		$dependencies;
 #	    return '# need to build '.$source.";
 	}
+        elsif ($mode eq 'rpm_file_list') {
+	    if ($categoryname eq 'doc') {
+		return $targetroot.'/'.$target.' # doc'."\n";
+	    }
+	    elsif ($categoryname eq 'conf') {
+		return $targetroot.'/'.$target.' # config'."\n";
+	    }
+	    else {
+		return $targetroot.'/'.$target."\n";
+	    }
+	}
 	else {
 	    return '';
 	}
@@ -1193,7 +1327,6 @@ sub format_link {
     $link=''; $linkto=''; $source=''; $target=''; $categoryname=''; 
     $description=''; $note=''; $build=''; $status=''; $dependencies='';
     my $text=&trim($parser->get_text('/link'));
-    my @links;
     if ($linkto) {
 	$parser->get_tag('/link');
 	if ($mode eq 'html') {
@@ -1234,11 +1367,20 @@ sub format_link {
 	elsif ($mode eq 'install') {
 	    my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target);
 	    foreach my $tgt (@targets) {
-		push @links,"\t".'ln -fs /'.$linkto.' /'.$targetroot.$tgt.
+		push @links,"\t".'ln -fs /'.$linkto.' '.$targetroot.'/'.$tgt.
 		    "\n";
 	    }
+#	    return join('',@links);
 	    return '';
 	}
+	elsif ($mode eq 'rpm_file_list') {
+	    my @linklocs;
+	    my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target);
+	    foreach my $tgt (@targets) {
+		push @linklocs,''.$targetroot.'/'.$tgt."\n";
+	    }
+	    return join('',@linklocs);
+	}
 	else {
 	    return '';
 	}
@@ -1258,23 +1400,14 @@ sub format_fileglob {
     my @semi=($filenames2=~/(\;)/g);
     $fileglobnames_count+=scalar(@semi)+1;
     $categorycount{$categoryname}+=scalar(@semi)+1;
-    # START TEMP WAY
-#    for my $f (split(/\;/,$filenames2)) {
-#	if (-T "$sourcerootarg/$sourcedir/$f") {
-#	    $linecount{$categoryname}+=`wc -l $sourcerootarg/$sourcedir/$f`;
-#	    open OUT,">>/tmp/junk123";
-#	    print OUT "$linecount{$categoryname} $categoryname $sourcerootarg/$sourcedir/$f\n";
-#	    close OUT;
-#	}
-#	my $bytesize=(-s "$sourcerootarg/$sourcedir/$f");
-#	$bytecount{$categoryname}+=$bytesize;
-#    }
-    # END TEMP WAY
     if ($sourcedir) {
 	$parser->get_tag('/fileglob');
-	if ($mode eq 'html') {
+	if ($mode eq 'MANIFEST') {
+         return join("\n",(map {"$sourcedir$_"} split(/\;/,$filenames2)))."\n";
+	}
+	elsif ($mode eq 'html') {
 	    return $fileglob="\n<tr>".
-		"<td><!-- POSTEVAL [$categoryname] verify.pl fileglob '$sourcerootarg' ".
+      "<td><!-- POSTEVAL [$categoryname] verify.pl fileglob '$sourcerootarg' ".
 		"'$targetrootarg' ".
 		"'$glob' '$sourcedir' '$filenames2' '$targetdir' ".
 		"$categoryhash{$categoryname} -->&nbsp;</td>".
@@ -1305,6 +1438,16 @@ sub format_fileglob {
 		$sourceroot.'/'.$sourcedir.$eglob.' '.
 		$targetroot.'/'.$targetdir.'.'."\n";
 	}
+	elsif ($mode eq 'rpm_file_list') {
+	    my $eglob=$glob;
+	    if ($glob eq '*') {
+		$eglob='[^C][^V][^S]'.$glob;
+	    }
+	    my $targetdir2=$targetdir;$targetdir2=~s/\/$//;
+	    my @gfiles=map {s/^.*\///;"$targetroot/$targetdir2/$_\n"}
+	               glob("$sourceroot/$sourcedir/$eglob");
+	    return join('',@gfiles);
+	}
 	else {
 	    return '';
 	}
@@ -1380,6 +1523,7 @@ sub format_build {
     if ($text) {
 	$parser->get_tag('/build');
 	$build=$sourceroot.'/'.$text.';'.$tokeninfo[2]{'trigger'};
+	$build=~s/([^\\])\\\s+/$1/g; # allow for lines split onto new lines
     }
     return '';
 }
@@ -1438,7 +1582,7 @@ sub format_filenames {
     }
     return '';
 }
-# ------------------------------------------------ Format specialnotice section
+# ----------------------------------------------- Format specialnotices section
 sub format_specialnotices {
     $parser->get_tag('/specialnotices');
     return '';
@@ -1472,14 +1616,17 @@ sub trim {
 
 # ----------------------------------- POD (plain old documentation, CPAN style)
 
+=pod
+
 =head1 NAME
 
 lpml_parse.pl - This is meant to parse files meeting the lpml document type.
-See lpml.dtd.  LPML=Linux Packaging Markup Language.
 
 =head1 SYNOPSIS
 
-Usage is for lpml file to come in through standard input.
+<STDIN> | perl lpml_parse.pl <MODE> <CATEGORY> <DIST> <SOURCE> <TARGET>
+
+Usage is for the lpml file to come in through standard input.
 
 =over 4
 
@@ -1511,19 +1658,57 @@ Only the 1st argument is mandatory for t
 Example:
 
 cat ../../doc/loncapafiles.lpml |\\
-perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
+perl lpml_parse.pl html runtime default /home/sherbert/loncapa /tmp/install
 
 =head1 DESCRIPTION
 
-I am using a multiple pass-through approach to parsing
-the lpml file.  This saves memory and makes sure the server
-will never be overloaded.
+The general flow of the script is to get command line arguments, run through
+the XML document three times, and output according to any desired mode:
+install, configinstall, build, rpm, dpkg, htmldoc, textdoc, and status.
+
+A number of coding decisions are made according to the following principle:
+installation software must be stand-alone.  Therefore, for instance, I try
+not to use the GetOpt::Long module or any other perl modules.  (I do however
+use HTML::TokeParser.)  I also have tried to keep all the MODES of
+parsing inside this file.  Therefore, format_TAG subroutines are fairly
+lengthy with their conditional logic.  A more "elegant" solution might
+be to dynamically register the parsing mode and subroutines, or maybe even work
+with stylesheets.  However, in order to make this the installation back-bone
+of choice, there are advantages for HAVING EVERYTHING IN ONE FILE.
+This way, the LPML installation software does not have to rely on OTHER
+installation software (a chicken versus the egg problem).  Besides, I would
+suggest the modes of parsing are fairly constant: install, configinstall,
+build, rpm, dpkg, htmldoc, textdoc, and status.
+
+Another coding decision is about using a multiple pass-through approach to
+parsing the lpml file.  This saves memory and makes sure the server will never
+be overloaded.  During the first pass-through, the script gathers information
+specific as to resolving what tags with what 'dist=' attributes are to be used.
+During the second pass-through, the script cleans up white-space surrounding
+the XML tags, and filters through the tags based on information regarding the
+'dist=' attributes (information gathered in the first pass-through).
+The third and final pass-through involves formatting and rendering the XML
+into whatever XML mode is chosen: install, configinstall, build, rpm, dpkg,
+htmldoc, textdoc, and status.
+
+The hierarchy mandated by the DTD does not always correspond to the hierarchy
+that is sensible for a Makefile.  For instance, in a Makefile it is sensible
+that soft-links are installed after files.  However, in an LPML document, it
+is sensible that files and links be considered together and the writer of the
+LPML document should be free to place things in whatever order makes best
+sense in terms of LOOKING at the information.  The complication that arises
+is that the parser needs to have a memory for passing values from
+leaves on the XML tree to higher-up branches.  Currently, this memory is
+hard-coded (like with the @links array), but it may benefit from a more
+formal approach in the future.
 
 =head1 README
 
-I am using a multiple pass-through approach to parsing
-the lpml file.  This saves memory and makes sure the server
-will never be overloaded.
+This parses an LPML file to generate information useful for
+source to target installation, compilation, filesystem status
+checking, RPM and Debian software packaging, and documentation.
+
+More information on LPML is available at http://lpml.sourceforge.net.
 
 =head1 PREREQUISITES
 
@@ -1537,6 +1722,14 @@ linux
 
 =head1 SCRIPT CATEGORIES
 
-Packaging/Administrative
+UNIX/System_administration
+
+=head1 AUTHOR
+
+ Scott Harrison
+ codeharrison@yahoo.com
+
+Please let me know how/if you are finding this script useful and
+any/all suggestions.  -Scott
 
 =cut