--- loncom/build/lpml_parse.pl	2001/06/20 12:32:54	1.2
+++ loncom/build/lpml_parse.pl	2001/06/24 23:00:32	1.3
@@ -2,73 +2,740 @@
 
 # Scott Harrison
 # May 2001
-# 06/19/2001 - Scott Harrison
+# 06/19/2001,06/20,06/24 - Scott Harrison
+
+# I am using a multiple pass-through approach to parsing
+# the lpml file.  This saves memory and makes sure the server
+# will never be overloaded.
 
 use HTML::TokeParser;
 
-my $target = shift @ARGV;
-my $dist = shift @ARGV;
+my $usage=<<END;
+**** ERROR ERROR ERROR ERROR ****
+Usage is for lpml file to come in through standard input.
+1st argument is the mode of parsing.
+2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
+3rd argument is to manually specify a sourceroot.
+4th argument is to manually specify a targetroot.
+
+Only the 1st argument is mandatory for the program to run.
+
+Example:
+
+cat ../../doc/loncapafiles.lpml |\\
+perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
+END
+
+# ------------------------------------------------- Grab command line arguments
+
+my $mode;
+if (@ARGV) {
+    $mode = shift @ARGV;
+}
+else {
+    while(<>){} # throw away the input to avoid broken pipes
+    print $usage;
+    exit -1; # exit with error status
+}
+
+my $dist;
+if (@ARGV) {
+    $dist = shift @ARGV;
+}
 
+my $targetroot;
+my $sourceroot;
+if (@ARGV) {
+    $targetroot = shift @ARGV;
+}
+if (@ARGV) {
+    $sourceroot = shift @ARGV;
+}
+
+# ---------------------------------------------------- Start first pass through
 my @parsecontents = <>;
 my $parsestring = join('',@parsecontents);
 my $outstring;
 
-$outstring = &xmlparse($parsestring,$target,$dist);
-print $outstring;
+# Need to make a pass through and figure out what defaults are
+# overrided.  Top-down overriding strategy (leaves don't know
+# about distant leaves).
+
+my @hierarchy;
+$hierarchy[0]=0;
+my $hloc=0;
+my $token;
+$parser = HTML::TokeParser->new(\$parsestring) or
+    die('can\'t create TokeParser object');
+$parser->xml_mode('1');
+my %hash;
+my $key;
+while ($token = $parser->get_token()) {
+    if ($token->[0] eq 'S') {
+	$hloc++;
+	$hierarchy[$hloc]++;
+	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
+	my $thisdist=' '.$token->[2]{'dist'}.' ';
+	if ($thisdist eq ' default ') {
+	    $hash{$key}=1; # there is a default setting for this key
+	}
+	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
+	    $hash{$key}=2; # disregard default setting for this key if
+	                   # there is a directly requested distribution match
+	}
+    }
+    if ($token->[0] eq 'E') {
+	$hloc--;
+    }
+}
+
+# --------------------------------------------------- Start second pass through
+undef $hloc;
+undef @hierarchy;
+undef $parser;
+$hierarchy[0]=0;
+$parser = HTML::TokeParser->new(\$parsestring) or
+    die('can\'t create TokeParser object');
+$parser->xml_mode('1');
+my $cleanstring;
+while ($token = $parser->get_token()) {
+    if ($token->[0] eq 'S') {
+	$hloc++;
+	$hierarchy[$hloc]++;
+	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
+	my $thisdist=' '.$token->[2]{'dist'}.' ';
+	if ($hash{$key}==2 and
+	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
+	    if ($token->[4]!~/\/>$/) {
+		$parser->get_tag('/'.$token->[1]);
+		$hloc--;
+	    }
+	}
+	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
+	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
+	    if ($token->[4]!~/\/>$/) {
+		$parser->get_tag('/'.$token->[1]);
+		$hloc--;
+	    }
+	}
+	else {
+	    $cleanstring.=$token->[4];
+	}
+	if ($token->[4]=~/\/>$/) {
+	    $hloc--;
+	}
+    }
+    if ($token->[0] eq 'E') {
+	$cleanstring.=$token->[2];
+	$hloc--;
+    }
+    if ($token->[0] eq 'T') {
+	$cleanstring.=$token->[1];
+    }
+}
+$cleanstring=&trim($cleanstring);
+
+# ---------------------------------------------------- Start final pass through
+
+# storage variables
+my $lpml;
+my $categories;
+my $category;
+my $category_att_name;
+my $category_att_type;
+my $chown;
+my $chmod;
+my $rpm;
+my $rpmSummary;
+my $rpmName;
+my $rpmVersion;
+my $rpmRelease;
+my $rpmVendor;
+my $rpmBuildRoot;
+my $rpmCopyright;
+my $rpmGroup;
+my $rpmSource;
+my $rpmAutoReqProv;
+my $rpmdescription;
+my $rpmpre;
+my $directories;
+my $directory;
+my $targetdirs;
+my $targetdir;
+my $categoryname;
+my $description;
+my $files;
+my $fileglobs;
+my $links;
+my $file;
+my $link;
+my $fileglob;
+my $sourcedir;
+my $targets;
+my $target;
+my $source;
+my $note;
+my $build;
+my $commands;
+my $command;
+my $status;
+my $dependencies;
+my $dependency;
+
+# Make new parser with distribution specific input
+undef $parser;
+$parser = HTML::TokeParser->new(\$cleanstring) or
+    die('can\'t create TokeParser object');
+$parser->xml_mode('1');
+
+# Define handling methods for mode-dependent text rendering
+$parser->{textify}={
+    targetroot => \&format_targetroot,
+    sourceroot => \&format_sourceroot,
+    categories => \&format_categories,
+    category => \&format_category,
+    targetdir => \&format_targetdir,
+    chown => \&format_chown,
+    chmod => \&format_chmod,
+    rpm => \&format_rpm,
+    rpmSummary => \&format_rpmSummary,
+    rpmName => \&format_rpmName,
+    rpmVersion => \&format_rpmVersion,
+    rpmRelease => \&format_rpmRelease,
+    rpmVendor => \&format_rpmVendor,
+    rpmBuildRoot => \&format_rpmBuildRoot,
+    rpmCopyright => \&format_rpmCopyright,
+    rpmGroup => \&format_rpmGroup,
+    rpmSource => \&format_rpmSource,
+    rpmAutoReqProv => \&format_rpmAutoReqProv,
+    rpmdescription => \&format_rpmdescription,
+    rpmpre => \&format_rpmpre,
+    directories => \&format_directories,
+    directory => \&format_directory,
+    categoryname => \&format_categoryname,
+    description => \&format_description,
+    files => \&format_files,
+    file => \&format_file,
+    fileglob => \&format_fileglob,
+    link => \&format_link,
+    linkto => \&format_linkto,
+    source => \&format_source,
+    target => \&format_target,
+    note => \&format_note,
+    build => \&format_build,
+    status => \&format_status,
+    dependencies => \&format_dependencies,
+    glob => \&format_glob,
+    sourcedir => \&format_sourcedir,
+    filenames => \&format_filenames,
+    };
+
+my $text;
+my $token;
+undef $hloc;
+undef @hierarchy;
+my $hloc;
+my @hierarchy2;
+while ($token = $parser->get_tag('lpml')) {
+    &format_lpml(@{$token});
+    $text = &trim($parser->get_text('/lpml'));
+    $token = $parser->get_tag('/lpml');
+    print $lpml; 
+    print "\n";
+    $text=~s/\s*\n\s*\n\s*/\n/g;
+    print $text;
+    print "\n";
+    print &end();
+}
+exit;
+
+sub end {
+    if ($mode eq 'html') {
+	return "THE END\n";
+    }
+}
+
+# ----------------------- Take in string to parse and the separation expression
+sub extract_array {
+    my ($stringtoparse,$sepexp) = @_;
+    my @a=split(/$sepexp/,$stringtoparse);
+    return \@a;
+}
+
+# --------------------------------------------------------- Format lpml section
+sub format_lpml {
+    my (@tokeninfo)=@_;
+    my $date=`date`; chop $date;
+    if ($mode eq 'html') {
+	$lpml = "LPML BEGINNING: $date";
+    }
+}
+# --------------------------------------------------- Format targetroot section
+sub format_targetroot {
+    my $text=&trim($parser->get_text('/targetroot'));
+    $text=$targetroot if $targetroot;
+    $parser->get_tag('/targetroot');
+    if ($mode eq 'html') {
+	return $targetroot="\nTARGETROOT: $text";
+    }
+    else {
+	return '';
+    }
+}
+# --------------------------------------------------- Format sourceroot section
+sub format_sourceroot {
+    my $text=&trim($parser->get_text('/sourceroot'));
+    $text=$sourceroot if $sourceroot;
+    $parser->get_tag('/sourceroot');
+    if ($mode eq 'html') {
+	return $sourceroot="\nSOURCEROOT: $text";
+    }
+    else {
+	return '';
+    }
+}
+# --------------------------------------------------- Format categories section
+sub format_categories {
+    my $text=&trim($parser->get_text('/categories'));
+    $parser->get_tag('/categories');
+    if ($mode eq 'html') {
+	return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n";
+    }
+    else {
+	return '';
+    }
+}
+# --------------------------------------------------- Format categories section
+sub format_category {
+    my (@tokeninfo)=@_;
+    $category_att_name=$tokeninfo[2]->{'name'};
+    $category_att_type=$tokeninfo[2]->{'type'};
+    $chmod='';$chown='';
+    $parser->get_text('/category');
+    $parser->get_tag('/category');
+    if ($mode eq 'html') {
+	return $category="\nCATEGORY $category_att_name $category_att_type ".
+	    "$chmod $chown";
+    }
+    else {
+	return '';
+    }
+}
+# -------------------------------------------------------- Format chown section
+sub format_chown {
+    my @tokeninfo=@_;
+    $chown='';
+    my $text=&trim($parser->get_text('/chown'));
+    if ($text) {
+	$parser->get_tag('/chown');
+	$chown=$text;
+    }
+    return '';
+}
+# -------------------------------------------------------- Format chmod section
+sub format_chmod {
+    my @tokeninfo=@_;
+    $chmod='';
+    my $text=&trim($parser->get_text('/chmod'));
+    if ($text) {
+	$parser->get_tag('/chmod');
+	$chmod=$text;
+    }
+    return '';
+}
+# ---------------------------------------------------------- Format rpm section
+sub format_rpm {
+    my $text=&trim($parser->get_text('/rpm'));
+    $parser->get_tag('/rpm');
+    if ($mode eq 'html') {
+	return $rpm="\nBEGIN RPM\n$text\nEND RPM";
+    }
+    else {
+	return '';
+    }
+}
+# --------------------------------------------------- Format rpmSummary section
+sub format_rpmSummary {
+    my $text=&trim($parser->get_text('/rpmSummary'));
+    $parser->get_tag('/rpmSummary');
+    if ($mode eq 'html') {
+	return $rpmSummary="\nRPMSUMMARY $text";
+    }
+    else {
+	return '';
+    }
+}
+# ------------------------------------------------------ Format rpmName section
+sub format_rpmName {
+    my $text=&trim($parser->get_text('/rpmName'));
+    $parser->get_tag('/rpmName');
+    if ($mode eq 'html') {
+	return $rpmName="\nRPMNAME $text";
+    }
+    else {
+	return '';
+    }
+}
+# --------------------------------------------------- Format rpmVersion section
+sub format_rpmVersion {
+    my $text=$parser->get_text('/rpmVersion');
+    $parser->get_tag('/rpmVersion');
+    if ($mode eq 'html') {
+	return $rpmVersion="\nRPMVERSION $text";
+    }
+    else {
+	return '';
+    }
+}
+# --------------------------------------------------- Format rpmRelease section
+sub format_rpmRelease {
+    my $text=$parser->get_text('/rpmRelease');
+    $parser->get_tag('/rpmRelease');
+    if ($mode eq 'html') {
+	return $rpmRelease="\nRPMRELEASE $text";
+    }
+    else {
+	return '';
+    }
+}
+# ---------------------------------------------------- Format rpmVendor section
+sub format_rpmVendor {
+    my $text=$parser->get_text('/rpmVendor');
+    $parser->get_tag('/rpmVendor');
+    if ($mode eq 'html') {
+	return $rpmVendor="\nRPMVENDOR $text";
+    }
+    else {
+	return '';
+    }
+}
+# ------------------------------------------------- Format rpmBuildRoot section
+sub format_rpmBuildRoot {
+    my $text=$parser->get_text('/rpmBuildRoot');
+    $parser->get_tag('/rpmBuildRoot');
+    if ($mode eq 'html') {
+	return $rpmBuildRoot="\nRPMBUILDROOT $text";
+    }
+    else {
+	return '';
+    }
+}
+# ------------------------------------------------- Format rpmCopyright section
+sub format_rpmCopyright {
+    my $text=$parser->get_text('/rpmCopyright');
+    $parser->get_tag('/rpmCopyright');
+    if ($mode eq 'html') {
+	return $rpmCopyright="\nRPMCOPYRIGHT $text";
+    }
+    else {
+	return '';
+    }
+}
+# ----------------------------------------------------- Format rpmGroup section
+sub format_rpmGroup {
+    my $text=$parser->get_text('/rpmGroup');
+    $parser->get_tag('/rpmGroup');
+    if ($mode eq 'html') {
+	return $rpmGroup="\nRPMGROUP $text";
+    }
+    else {
+	return '';
+    }
+}
+# ---------------------------------------------------- Format rpmSource section
+sub format_rpmSource {
+    my $text=$parser->get_text('/rpmSource');
+    $parser->get_tag('/rpmSource');
+    if ($mode eq 'html') {
+	return $rpmSource="\nRPMSOURCE $text";
+    }
+    else {
+	return '';
+    }
+}
+# ----------------------------------------------- Format rpmAutoReqProv section
+sub format_rpmAutoReqProv {
+    my $text=$parser->get_text('/rpmAutoReqProv');
+    $parser->get_tag('/rpmAutoReqProv');
+    if ($mode eq 'html') {
+	return $rpmAutoReqProv="\nRPMAUTOREQPROV $text";
+    }
+    else {
+	return '';
+    }
+}
+# ----------------------------------------------- Format rpmdescription section
+sub format_rpmdescription {
+    my $text=$parser->get_text('/rpmdescription');
+    $parser->get_tag('/rpmdescription');
+    if ($mode eq 'html') {
+	return $rpmdescription="\nRPMDESCRIPTION $text";
+    }
+    else {
+	return '';
+    }
+}
+# ------------------------------------------------------- Format rpmpre section
+sub format_rpmpre {
+    my $text=$parser->get_text('/rpmpre');
+    $parser->get_tag('/rpmpre');
+    if ($mode eq 'html') {
+	return $rpmpre="\nRPMPRE $text";
+    }
+    else {
+	return '';
+    }
+}
+# -------------------------------------------------- Format directories section
+sub format_directories {
+    my $text=&trim($parser->get_text('/directories'));
+    $parser->get_tag('/directories');
+    if ($mode eq 'html') {
+	return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n";
+    }
+    else {
+	return '';
+    }
+}
+# ---------------------------------------------------- Format directory section
+sub format_directory {
+    my (@tokeninfo)=@_;
+    $targetdir='';$categoryname='';$description='';
+    $parser->get_text('/directory');
+    $parser->get_tag('/directory');
+    if ($mode eq 'html') {
+	return $directory="\nDIRECTORY $targetdir $categoryname $description";
+    }
+    else {
+	return '';
+    }
+}
+# ---------------------------------------------------- Format targetdir section
+sub format_targetdir {
+    my @tokeninfo=@_;
+    $targetdir='';
+    my $text=&trim($parser->get_text('/targetdir'));
+    if ($text) {
+	$parser->get_tag('/targetdir');
+	$targetdir=$text;
+    }
+    return '';
+}
+# ------------------------------------------------- Format categoryname section
+sub format_categoryname {
+    my @tokeninfo=@_;
+    $categoryname='';
+    my $text=&trim($parser->get_text('/categoryname'));
+    if ($text) {
+	$parser->get_tag('/categoryname');
+	$categoryname=$text;
+    }
+    return '';
+}
+# -------------------------------------------------- Format description section
+sub format_description {
+    my @tokeninfo=@_;
+    $description='';
+    my $text=&trim($parser->get_text('/description'));
+    if ($text) {
+	$parser->get_tag('/description');
+	$description=$text;
+    }
+    return '';
+}
+# -------------------------------------------------------- Format files section
+sub format_files {
+    my $text=&trim($parser->get_text('/files'));
+    $parser->get_tag('/files');
+    if ($mode eq 'html') {
+	return $directories="\nBEGIN FILES\n$text\nEND FILES\n";
+    }
+    else {
+	return '';
+    }
+}
+# ---------------------------------------------------- Format fileglobs section
+sub format_fileglobs {
+
+}
+# -------------------------------------------------------- Format links section
+sub format_links {
 
-# -------------------------- Parse for an input string and specific target mode
-sub xmlparse {
-    my ($parsestring,$target,$dist)=@_;
-    my $outtext = '';
-    my $parser = HTML::TokeParser->new($parsestring);
-    $parser->xml_mode('1');
-    # strategy: grab first and pass well-parsed information to info-handler subroutines
-    # There should be errors if bad file format.
-    # Unlike other XML-handling strategies we use, this script should
-    # terminate if the XML is bad.
-    # grab first (and only) lpml section
-    # grab target(s) root
-    # grab source root
-    # grab categories
-       # foreach category
-       # attributes: name and type
-       # grab chown
-       # grab chmod
-         # parse user name and group name
-    # grab rpm (remember to replace \n with real new lines)
-       # grab rpmSummary
-       # grab rpmName
-       # grab rpmVersion
-       # grab rpmRelease
-       # grab rpmVendor
-       # grab rpmBuildRoot
-       # grab rpmCopyright
-       # grab rpmGroup
-       # grab rpmSource
-       # grab rpmAutoReqProv
-       # grab rpmdescription
-       # grab rpmpre
-    # grab directories
-       # foreach directory
-       # grab targetdir(s)
-       # grab categoryname
-       # grab (optional) description
-    # grab files
-       # foreach file|link|link|fileglob
-       # grab source
-       # grab target(s)
-       # grab categoryname
-       # grab description
-       # grab note
-}
-
-__END__
-
-while (my $token = $p->get_tag("category")) {
-    my $url = $token->[1]{name} . $token->[1]{type};
-    my $chmodtoken=$p->get_tag("chmod");
-    my $text = $p->get_trimmed_text("/chmod");
-    print "CHMOD: $text\n";
-    my $text = $p->get_trimmed_text("/category");
-    print "$url\t$text\t".join(" ",@{$token->[2]})."\n";
 }
+# --------------------------------------------------------- Format file section
+sub format_file {
+    my @tokeninfo=@_;
+    $file=''; $source=''; $target=''; $categoryname=''; $description='';
+    $note=''; $build=''; $status=''; $dependencies='';
+    my $text=&trim($parser->get_text('/file'));
+    if ($source) {
+	$parser->get_tag('/file');
+	if ($mode eq 'html') {
+	    return ($file="\nBEGIN FILE\n".
+		"$source $target $categoryname $description $note " .
+		"$build $status $dependencies" .
+		"\nEND FILE");
+	}
+	else {
+	    return '';
+	}
+    }
+    return '';
+}
+# --------------------------------------------------------- Format link section
+sub format_link {
+    my @tokeninfo=@_;
+    $link=''; $linkto=''; $target=''; $categoryname=''; $description='';
+    $note=''; $build=''; $status=''; $dependencies='';
+    my $text=&trim($parser->get_text('/link'));
+    if ($linkto) {
+	$parser->get_tag('/link');
+	if ($mode eq 'html') {
+	    return $link="\nBEGIN LINK\n".
+		"$linkto $target $categoryname $description $note " .
+		"$build $status $dependencies" .
+		    "\nEND LINK";
+	}
+	else {
+	    return '';
+	}
+    }
+    return '';
+}
+# ----------------------------------------------------- Format fileglob section
+sub format_fileglob {
+    my @tokeninfo=@_;
+    $fileglob=''; $glob=''; $sourcedir='';
+    $targetdir=''; $categoryname=''; $description='';
+    $note=''; $build=''; $status=''; $dependencies='';
+    $filenames='';
+    my $text=&trim($parser->get_text('/fileglob'));
+    if ($sourcedir) {
+	$parser->get_tag('/fileglob');
+	if ($mode eq 'html') {
+	    return $fileglob="\nBEGIN FILEGLOB\n".
+		"$glob sourcedir $targetdir $categoryname $description $note ".
+		"$build $status $dependencies $filenames" .
+		    "\nEND FILEGLOB";
+	}
+	else {
+	    return '';
+	}
+    }
+    return '';
+}
+# ---------------------------------------------------- Format sourcedir section
+sub format_sourcedir {
+    my @tokeninfo=@_;
+    $sourcedir='';
+    my $text=&trim($parser->get_text('/sourcedir'));
+    if ($text) {
+	$parser->get_tag('/sourcedir');
+	$sourcedir=$text;
+    }
+    return '';
+}
+# ------------------------------------------------------- Format target section
+sub format_target {
+    my @tokeninfo=@_;
+    $target='';
+    my $text=&trim($parser->get_text('/target'));
+    if ($text) {
+	$parser->get_tag('/target');
+	$target=$text;
+    }
+    return '';
+}
+# ------------------------------------------------------- Format source section
+sub format_source {
+    my @tokeninfo=@_;
+    $source='';
+    my $text=&trim($parser->get_text('/source'));
+    if ($text) {
+	$parser->get_tag('/source');
+	$source=$text;
+    }
+    return '';
+}
+# --------------------------------------------------------- Format note section
+sub format_note {
+    my @tokeninfo=@_;
+    $note='';
+    my $text=&trim($parser->get_text('/note'));
+    if ($text) {
+	$parser->get_tag('/note');
+	$note=$text;
+    }
+    return '';
+
+}
+# -------------------------------------------------------- Format build section
+sub format_build {
+    my @tokeninfo=@_;
+    $build='';
+    my $text=&trim($parser->get_text('/build'));
+    if ($text) {
+	$parser->get_tag('/build');
+	$build=$text;
+    }
+    return '';
+}
+# ------------------------------------------------------- Format status section
+sub format_status {
+    my @tokeninfo=@_;
+    $status='';
+    my $text=&trim($parser->get_text('/status'));
+    if ($text) {
+	$parser->get_tag('/status');
+	$status=$text;
+    }
+    return '';
+}
+# ------------------------------------------------- Format dependencies section
+sub format_dependencies {
+    my @tokeninfo=@_;
+    $dependencies='';
+    my $text=&trim($parser->get_text('/dependencies'));
+    if ($text) {
+	$parser->get_tag('/dependencies');
+	$dependencies=$text;
+    }
+    return '';
+}
+# --------------------------------------------------------- Format glob section
+sub format_glob {
+    my @tokeninfo=@_;
+    $glob='';
+    my $text=&trim($parser->get_text('/glob'));
+    if ($text) {
+	$parser->get_tag('/glob');
+	$glob=$text;
+    }
+    return '';
+}
+# ---------------------------------------------------- Format filenames section
+sub format_filenames {
+    my @tokeninfo=@_;
+    $glob='';
+    my $text=&trim($parser->get_text('/filenames'));
+    if ($text) {
+	$parser->get_tag('/filenames');
+	$filenames=$text;
+    }
+    return '';
+}
+# ------------------------------------------------------- Format linkto section
+sub format_linkto {
+    my @tokeninfo=@_;
+    $glob='';
+    my $text=&trim($parser->get_text('/linkto'));
+    if ($text) {
+	$parser->get_tag('/linkto');
+	$linkto=$text;
+    }
+    return '';
+}
+# --------------------------------------- remove starting and ending whitespace
+sub trim {
+    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
+}