--- loncom/build/lpml_parse.pl 2001/06/20 12:32:54 1.2 +++ loncom/build/lpml_parse.pl 2001/06/24 23:00:32 1.3 @@ -2,73 +2,740 @@ # Scott Harrison # May 2001 -# 06/19/2001 - Scott Harrison +# 06/19/2001,06/20,06/24 - Scott Harrison + +# I am using a multiple pass-through approach to parsing +# the lpml file. This saves memory and makes sure the server +# will never be overloaded. use HTML::TokeParser; -my $target = shift @ARGV; -my $dist = shift @ARGV; +my $usage=<){} # throw away the input to avoid broken pipes + print $usage; + exit -1; # exit with error status +} + +my $dist; +if (@ARGV) { + $dist = shift @ARGV; +} +my $targetroot; +my $sourceroot; +if (@ARGV) { + $targetroot = shift @ARGV; +} +if (@ARGV) { + $sourceroot = shift @ARGV; +} + +# ---------------------------------------------------- Start first pass through my @parsecontents = <>; my $parsestring = join('',@parsecontents); my $outstring; -$outstring = &xmlparse($parsestring,$target,$dist); -print $outstring; +# Need to make a pass through and figure out what defaults are +# overrided. Top-down overriding strategy (leaves don't know +# about distant leaves). + +my @hierarchy; +$hierarchy[0]=0; +my $hloc=0; +my $token; +$parser = HTML::TokeParser->new(\$parsestring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); +my %hash; +my $key; +while ($token = $parser->get_token()) { + if ($token->[0] eq 'S') { + $hloc++; + $hierarchy[$hloc]++; + $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); + my $thisdist=' '.$token->[2]{'dist'}.' '; + if ($thisdist eq ' default ') { + $hash{$key}=1; # there is a default setting for this key + } + elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { + $hash{$key}=2; # disregard default setting for this key if + # there is a directly requested distribution match + } + } + if ($token->[0] eq 'E') { + $hloc--; + } +} + +# --------------------------------------------------- Start second pass through +undef $hloc; +undef @hierarchy; +undef $parser; +$hierarchy[0]=0; +$parser = HTML::TokeParser->new(\$parsestring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); +my $cleanstring; +while ($token = $parser->get_token()) { + if ($token->[0] eq 'S') { + $hloc++; + $hierarchy[$hloc]++; + $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); + my $thisdist=' '.$token->[2]{'dist'}.' '; + if ($hash{$key}==2 and + !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { + if ($token->[4]!~/\/>$/) { + $parser->get_tag('/'.$token->[1]); + $hloc--; + } + } + elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and + !($thisdist eq ' default ' and $hash{$key}!=2)) { + if ($token->[4]!~/\/>$/) { + $parser->get_tag('/'.$token->[1]); + $hloc--; + } + } + else { + $cleanstring.=$token->[4]; + } + if ($token->[4]=~/\/>$/) { + $hloc--; + } + } + if ($token->[0] eq 'E') { + $cleanstring.=$token->[2]; + $hloc--; + } + if ($token->[0] eq 'T') { + $cleanstring.=$token->[1]; + } +} +$cleanstring=&trim($cleanstring); + +# ---------------------------------------------------- Start final pass through + +# storage variables +my $lpml; +my $categories; +my $category; +my $category_att_name; +my $category_att_type; +my $chown; +my $chmod; +my $rpm; +my $rpmSummary; +my $rpmName; +my $rpmVersion; +my $rpmRelease; +my $rpmVendor; +my $rpmBuildRoot; +my $rpmCopyright; +my $rpmGroup; +my $rpmSource; +my $rpmAutoReqProv; +my $rpmdescription; +my $rpmpre; +my $directories; +my $directory; +my $targetdirs; +my $targetdir; +my $categoryname; +my $description; +my $files; +my $fileglobs; +my $links; +my $file; +my $link; +my $fileglob; +my $sourcedir; +my $targets; +my $target; +my $source; +my $note; +my $build; +my $commands; +my $command; +my $status; +my $dependencies; +my $dependency; + +# Make new parser with distribution specific input +undef $parser; +$parser = HTML::TokeParser->new(\$cleanstring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); + +# Define handling methods for mode-dependent text rendering +$parser->{textify}={ + targetroot => \&format_targetroot, + sourceroot => \&format_sourceroot, + categories => \&format_categories, + category => \&format_category, + targetdir => \&format_targetdir, + chown => \&format_chown, + chmod => \&format_chmod, + rpm => \&format_rpm, + rpmSummary => \&format_rpmSummary, + rpmName => \&format_rpmName, + rpmVersion => \&format_rpmVersion, + rpmRelease => \&format_rpmRelease, + rpmVendor => \&format_rpmVendor, + rpmBuildRoot => \&format_rpmBuildRoot, + rpmCopyright => \&format_rpmCopyright, + rpmGroup => \&format_rpmGroup, + rpmSource => \&format_rpmSource, + rpmAutoReqProv => \&format_rpmAutoReqProv, + rpmdescription => \&format_rpmdescription, + rpmpre => \&format_rpmpre, + directories => \&format_directories, + directory => \&format_directory, + categoryname => \&format_categoryname, + description => \&format_description, + files => \&format_files, + file => \&format_file, + fileglob => \&format_fileglob, + link => \&format_link, + linkto => \&format_linkto, + source => \&format_source, + target => \&format_target, + note => \&format_note, + build => \&format_build, + status => \&format_status, + dependencies => \&format_dependencies, + glob => \&format_glob, + sourcedir => \&format_sourcedir, + filenames => \&format_filenames, + }; + +my $text; +my $token; +undef $hloc; +undef @hierarchy; +my $hloc; +my @hierarchy2; +while ($token = $parser->get_tag('lpml')) { + &format_lpml(@{$token}); + $text = &trim($parser->get_text('/lpml')); + $token = $parser->get_tag('/lpml'); + print $lpml; + print "\n"; + $text=~s/\s*\n\s*\n\s*/\n/g; + print $text; + print "\n"; + print &end(); +} +exit; + +sub end { + if ($mode eq 'html') { + return "THE END\n"; + } +} + +# ----------------------- Take in string to parse and the separation expression +sub extract_array { + my ($stringtoparse,$sepexp) = @_; + my @a=split(/$sepexp/,$stringtoparse); + return \@a; +} + +# --------------------------------------------------------- Format lpml section +sub format_lpml { + my (@tokeninfo)=@_; + my $date=`date`; chop $date; + if ($mode eq 'html') { + $lpml = "LPML BEGINNING: $date"; + } +} +# --------------------------------------------------- Format targetroot section +sub format_targetroot { + my $text=&trim($parser->get_text('/targetroot')); + $text=$targetroot if $targetroot; + $parser->get_tag('/targetroot'); + if ($mode eq 'html') { + return $targetroot="\nTARGETROOT: $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format sourceroot section +sub format_sourceroot { + my $text=&trim($parser->get_text('/sourceroot')); + $text=$sourceroot if $sourceroot; + $parser->get_tag('/sourceroot'); + if ($mode eq 'html') { + return $sourceroot="\nSOURCEROOT: $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_categories { + my $text=&trim($parser->get_text('/categories')); + $parser->get_tag('/categories'); + if ($mode eq 'html') { + return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_category { + my (@tokeninfo)=@_; + $category_att_name=$tokeninfo[2]->{'name'}; + $category_att_type=$tokeninfo[2]->{'type'}; + $chmod='';$chown=''; + $parser->get_text('/category'); + $parser->get_tag('/category'); + if ($mode eq 'html') { + return $category="\nCATEGORY $category_att_name $category_att_type ". + "$chmod $chown"; + } + else { + return ''; + } +} +# -------------------------------------------------------- Format chown section +sub format_chown { + my @tokeninfo=@_; + $chown=''; + my $text=&trim($parser->get_text('/chown')); + if ($text) { + $parser->get_tag('/chown'); + $chown=$text; + } + return ''; +} +# -------------------------------------------------------- Format chmod section +sub format_chmod { + my @tokeninfo=@_; + $chmod=''; + my $text=&trim($parser->get_text('/chmod')); + if ($text) { + $parser->get_tag('/chmod'); + $chmod=$text; + } + return ''; +} +# ---------------------------------------------------------- Format rpm section +sub format_rpm { + my $text=&trim($parser->get_text('/rpm')); + $parser->get_tag('/rpm'); + if ($mode eq 'html') { + return $rpm="\nBEGIN RPM\n$text\nEND RPM"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmSummary section +sub format_rpmSummary { + my $text=&trim($parser->get_text('/rpmSummary')); + $parser->get_tag('/rpmSummary'); + if ($mode eq 'html') { + return $rpmSummary="\nRPMSUMMARY $text"; + } + else { + return ''; + } +} +# ------------------------------------------------------ Format rpmName section +sub format_rpmName { + my $text=&trim($parser->get_text('/rpmName')); + $parser->get_tag('/rpmName'); + if ($mode eq 'html') { + return $rpmName="\nRPMNAME $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmVersion section +sub format_rpmVersion { + my $text=$parser->get_text('/rpmVersion'); + $parser->get_tag('/rpmVersion'); + if ($mode eq 'html') { + return $rpmVersion="\nRPMVERSION $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmRelease section +sub format_rpmRelease { + my $text=$parser->get_text('/rpmRelease'); + $parser->get_tag('/rpmRelease'); + if ($mode eq 'html') { + return $rpmRelease="\nRPMRELEASE $text"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format rpmVendor section +sub format_rpmVendor { + my $text=$parser->get_text('/rpmVendor'); + $parser->get_tag('/rpmVendor'); + if ($mode eq 'html') { + return $rpmVendor="\nRPMVENDOR $text"; + } + else { + return ''; + } +} +# ------------------------------------------------- Format rpmBuildRoot section +sub format_rpmBuildRoot { + my $text=$parser->get_text('/rpmBuildRoot'); + $parser->get_tag('/rpmBuildRoot'); + if ($mode eq 'html') { + return $rpmBuildRoot="\nRPMBUILDROOT $text"; + } + else { + return ''; + } +} +# ------------------------------------------------- Format rpmCopyright section +sub format_rpmCopyright { + my $text=$parser->get_text('/rpmCopyright'); + $parser->get_tag('/rpmCopyright'); + if ($mode eq 'html') { + return $rpmCopyright="\nRPMCOPYRIGHT $text"; + } + else { + return ''; + } +} +# ----------------------------------------------------- Format rpmGroup section +sub format_rpmGroup { + my $text=$parser->get_text('/rpmGroup'); + $parser->get_tag('/rpmGroup'); + if ($mode eq 'html') { + return $rpmGroup="\nRPMGROUP $text"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format rpmSource section +sub format_rpmSource { + my $text=$parser->get_text('/rpmSource'); + $parser->get_tag('/rpmSource'); + if ($mode eq 'html') { + return $rpmSource="\nRPMSOURCE $text"; + } + else { + return ''; + } +} +# ----------------------------------------------- Format rpmAutoReqProv section +sub format_rpmAutoReqProv { + my $text=$parser->get_text('/rpmAutoReqProv'); + $parser->get_tag('/rpmAutoReqProv'); + if ($mode eq 'html') { + return $rpmAutoReqProv="\nRPMAUTOREQPROV $text"; + } + else { + return ''; + } +} +# ----------------------------------------------- Format rpmdescription section +sub format_rpmdescription { + my $text=$parser->get_text('/rpmdescription'); + $parser->get_tag('/rpmdescription'); + if ($mode eq 'html') { + return $rpmdescription="\nRPMDESCRIPTION $text"; + } + else { + return ''; + } +} +# ------------------------------------------------------- Format rpmpre section +sub format_rpmpre { + my $text=$parser->get_text('/rpmpre'); + $parser->get_tag('/rpmpre'); + if ($mode eq 'html') { + return $rpmpre="\nRPMPRE $text"; + } + else { + return ''; + } +} +# -------------------------------------------------- Format directories section +sub format_directories { + my $text=&trim($parser->get_text('/directories')); + $parser->get_tag('/directories'); + if ($mode eq 'html') { + return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format directory section +sub format_directory { + my (@tokeninfo)=@_; + $targetdir='';$categoryname='';$description=''; + $parser->get_text('/directory'); + $parser->get_tag('/directory'); + if ($mode eq 'html') { + return $directory="\nDIRECTORY $targetdir $categoryname $description"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format targetdir section +sub format_targetdir { + my @tokeninfo=@_; + $targetdir=''; + my $text=&trim($parser->get_text('/targetdir')); + if ($text) { + $parser->get_tag('/targetdir'); + $targetdir=$text; + } + return ''; +} +# ------------------------------------------------- Format categoryname section +sub format_categoryname { + my @tokeninfo=@_; + $categoryname=''; + my $text=&trim($parser->get_text('/categoryname')); + if ($text) { + $parser->get_tag('/categoryname'); + $categoryname=$text; + } + return ''; +} +# -------------------------------------------------- Format description section +sub format_description { + my @tokeninfo=@_; + $description=''; + my $text=&trim($parser->get_text('/description')); + if ($text) { + $parser->get_tag('/description'); + $description=$text; + } + return ''; +} +# -------------------------------------------------------- Format files section +sub format_files { + my $text=&trim($parser->get_text('/files')); + $parser->get_tag('/files'); + if ($mode eq 'html') { + return $directories="\nBEGIN FILES\n$text\nEND FILES\n"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format fileglobs section +sub format_fileglobs { + +} +# -------------------------------------------------------- Format links section +sub format_links { -# -------------------------- Parse for an input string and specific target mode -sub xmlparse { - my ($parsestring,$target,$dist)=@_; - my $outtext = ''; - my $parser = HTML::TokeParser->new($parsestring); - $parser->xml_mode('1'); - # strategy: grab first and pass well-parsed information to info-handler subroutines - # There should be errors if bad file format. - # Unlike other XML-handling strategies we use, this script should - # terminate if the XML is bad. - # grab first (and only) lpml section - # grab target(s) root - # grab source root - # grab categories - # foreach category - # attributes: name and type - # grab chown - # grab chmod - # parse user name and group name - # grab rpm (remember to replace \n with real new lines) - # grab rpmSummary - # grab rpmName - # grab rpmVersion - # grab rpmRelease - # grab rpmVendor - # grab rpmBuildRoot - # grab rpmCopyright - # grab rpmGroup - # grab rpmSource - # grab rpmAutoReqProv - # grab rpmdescription - # grab rpmpre - # grab directories - # foreach directory - # grab targetdir(s) - # grab categoryname - # grab (optional) description - # grab files - # foreach file|link|link|fileglob - # grab source - # grab target(s) - # grab categoryname - # grab description - # grab note -} - -__END__ - -while (my $token = $p->get_tag("category")) { - my $url = $token->[1]{name} . $token->[1]{type}; - my $chmodtoken=$p->get_tag("chmod"); - my $text = $p->get_trimmed_text("/chmod"); - print "CHMOD: $text\n"; - my $text = $p->get_trimmed_text("/category"); - print "$url\t$text\t".join(" ",@{$token->[2]})."\n"; } +# --------------------------------------------------------- Format file section +sub format_file { + my @tokeninfo=@_; + $file=''; $source=''; $target=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + my $text=&trim($parser->get_text('/file')); + if ($source) { + $parser->get_tag('/file'); + if ($mode eq 'html') { + return ($file="\nBEGIN FILE\n". + "$source $target $categoryname $description $note " . + "$build $status $dependencies" . + "\nEND FILE"); + } + else { + return ''; + } + } + return ''; +} +# --------------------------------------------------------- Format link section +sub format_link { + my @tokeninfo=@_; + $link=''; $linkto=''; $target=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + my $text=&trim($parser->get_text('/link')); + if ($linkto) { + $parser->get_tag('/link'); + if ($mode eq 'html') { + return $link="\nBEGIN LINK\n". + "$linkto $target $categoryname $description $note " . + "$build $status $dependencies" . + "\nEND LINK"; + } + else { + return ''; + } + } + return ''; +} +# ----------------------------------------------------- Format fileglob section +sub format_fileglob { + my @tokeninfo=@_; + $fileglob=''; $glob=''; $sourcedir=''; + $targetdir=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + $filenames=''; + my $text=&trim($parser->get_text('/fileglob')); + if ($sourcedir) { + $parser->get_tag('/fileglob'); + if ($mode eq 'html') { + return $fileglob="\nBEGIN FILEGLOB\n". + "$glob sourcedir $targetdir $categoryname $description $note ". + "$build $status $dependencies $filenames" . + "\nEND FILEGLOB"; + } + else { + return ''; + } + } + return ''; +} +# ---------------------------------------------------- Format sourcedir section +sub format_sourcedir { + my @tokeninfo=@_; + $sourcedir=''; + my $text=&trim($parser->get_text('/sourcedir')); + if ($text) { + $parser->get_tag('/sourcedir'); + $sourcedir=$text; + } + return ''; +} +# ------------------------------------------------------- Format target section +sub format_target { + my @tokeninfo=@_; + $target=''; + my $text=&trim($parser->get_text('/target')); + if ($text) { + $parser->get_tag('/target'); + $target=$text; + } + return ''; +} +# ------------------------------------------------------- Format source section +sub format_source { + my @tokeninfo=@_; + $source=''; + my $text=&trim($parser->get_text('/source')); + if ($text) { + $parser->get_tag('/source'); + $source=$text; + } + return ''; +} +# --------------------------------------------------------- Format note section +sub format_note { + my @tokeninfo=@_; + $note=''; + my $text=&trim($parser->get_text('/note')); + if ($text) { + $parser->get_tag('/note'); + $note=$text; + } + return ''; + +} +# -------------------------------------------------------- Format build section +sub format_build { + my @tokeninfo=@_; + $build=''; + my $text=&trim($parser->get_text('/build')); + if ($text) { + $parser->get_tag('/build'); + $build=$text; + } + return ''; +} +# ------------------------------------------------------- Format status section +sub format_status { + my @tokeninfo=@_; + $status=''; + my $text=&trim($parser->get_text('/status')); + if ($text) { + $parser->get_tag('/status'); + $status=$text; + } + return ''; +} +# ------------------------------------------------- Format dependencies section +sub format_dependencies { + my @tokeninfo=@_; + $dependencies=''; + my $text=&trim($parser->get_text('/dependencies')); + if ($text) { + $parser->get_tag('/dependencies'); + $dependencies=$text; + } + return ''; +} +# --------------------------------------------------------- Format glob section +sub format_glob { + my @tokeninfo=@_; + $glob=''; + my $text=&trim($parser->get_text('/glob')); + if ($text) { + $parser->get_tag('/glob'); + $glob=$text; + } + return ''; +} +# ---------------------------------------------------- Format filenames section +sub format_filenames { + my @tokeninfo=@_; + $glob=''; + my $text=&trim($parser->get_text('/filenames')); + if ($text) { + $parser->get_tag('/filenames'); + $filenames=$text; + } + return ''; +} +# ------------------------------------------------------- Format linkto section +sub format_linkto { + my @tokeninfo=@_; + $glob=''; + my $text=&trim($parser->get_text('/linkto')); + if ($text) { + $parser->get_tag('/linkto'); + $linkto=$text; + } + return ''; +} +# --------------------------------------- remove starting and ending whitespace +sub trim { + my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; +}