#!/usr/bin/perl # Scott Harrison # May 2001 # 06/19/2001,06/20,06/24 - Scott Harrison # I am using a multiple pass-through approach to parsing # the lpml file. This saves memory and makes sure the server # will never be overloaded. use HTML::TokeParser; my $usage=<){} # throw away the input to avoid broken pipes print $usage; exit -1; # exit with error status } my $dist; if (@ARGV) { $dist = shift @ARGV; } my $targetroot; my $sourceroot; if (@ARGV) { $targetroot = shift @ARGV; } if (@ARGV) { $sourceroot = shift @ARGV; } # ---------------------------------------------------- Start first pass through my @parsecontents = <>; my $parsestring = join('',@parsecontents); my $outstring; # Need to make a pass through and figure out what defaults are # overrided. Top-down overriding strategy (leaves don't know # about distant leaves). my @hierarchy; $hierarchy[0]=0; my $hloc=0; my $token; $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my %hash; my $key; while ($token = $parser->get_token()) { if ($token->[0] eq 'S') { $hloc++; $hierarchy[$hloc]++; $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); my $thisdist=' '.$token->[2]{'dist'}.' '; if ($thisdist eq ' default ') { $hash{$key}=1; # there is a default setting for this key } elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { $hash{$key}=2; # disregard default setting for this key if # there is a directly requested distribution match } } if ($token->[0] eq 'E') { $hloc--; } } # --------------------------------------------------- Start second pass through undef $hloc; undef @hierarchy; undef $parser; $hierarchy[0]=0; $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my $cleanstring; while ($token = $parser->get_token()) { if ($token->[0] eq 'S') { $hloc++; $hierarchy[$hloc]++; $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); my $thisdist=' '.$token->[2]{'dist'}.' '; if ($hash{$key}==2 and !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { if ($token->[4]!~/\/>$/) { $parser->get_tag('/'.$token->[1]); $hloc--; } } elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and !($thisdist eq ' default ' and $hash{$key}!=2)) { if ($token->[4]!~/\/>$/) { $parser->get_tag('/'.$token->[1]); $hloc--; } } else { $cleanstring.=$token->[4]; } if ($token->[4]=~/\/>$/) { $hloc--; } } if ($token->[0] eq 'E') { $cleanstring.=$token->[2]; $hloc--; } if ($token->[0] eq 'T') { $cleanstring.=$token->[1]; } } $cleanstring=&trim($cleanstring); # ---------------------------------------------------- Start final pass through # storage variables my $lpml; my $categories; my $category; my $category_att_name; my $category_att_type; my $chown; my $chmod; my $rpm; my $rpmSummary; my $rpmName; my $rpmVersion; my $rpmRelease; my $rpmVendor; my $rpmBuildRoot; my $rpmCopyright; my $rpmGroup; my $rpmSource; my $rpmAutoReqProv; my $rpmdescription; my $rpmpre; my $directories; my $directory; my $targetdirs; my $targetdir; my $categoryname; my $description; my $files; my $fileglobs; my $links; my $file; my $link; my $fileglob; my $sourcedir; my $targets; my $target; my $source; my $note; my $build; my $commands; my $command; my $status; my $dependencies; my $dependency; # Make new parser with distribution specific input undef $parser; $parser = HTML::TokeParser->new(\$cleanstring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # Define handling methods for mode-dependent text rendering $parser->{textify}={ targetroot => \&format_targetroot, sourceroot => \&format_sourceroot, categories => \&format_categories, category => \&format_category, targetdir => \&format_targetdir, chown => \&format_chown, chmod => \&format_chmod, rpm => \&format_rpm, rpmSummary => \&format_rpmSummary, rpmName => \&format_rpmName, rpmVersion => \&format_rpmVersion, rpmRelease => \&format_rpmRelease, rpmVendor => \&format_rpmVendor, rpmBuildRoot => \&format_rpmBuildRoot, rpmCopyright => \&format_rpmCopyright, rpmGroup => \&format_rpmGroup, rpmSource => \&format_rpmSource, rpmAutoReqProv => \&format_rpmAutoReqProv, rpmdescription => \&format_rpmdescription, rpmpre => \&format_rpmpre, directories => \&format_directories, directory => \&format_directory, categoryname => \&format_categoryname, description => \&format_description, files => \&format_files, file => \&format_file, fileglob => \&format_fileglob, link => \&format_link, linkto => \&format_linkto, source => \&format_source, target => \&format_target, note => \&format_note, build => \&format_build, status => \&format_status, dependencies => \&format_dependencies, glob => \&format_glob, sourcedir => \&format_sourcedir, filenames => \&format_filenames, }; my $text; my $token; undef $hloc; undef @hierarchy; my $hloc; my @hierarchy2; while ($token = $parser->get_tag('lpml')) { &format_lpml(@{$token}); $text = &trim($parser->get_text('/lpml')); $token = $parser->get_tag('/lpml'); print $lpml; print "\n"; $text=~s/\s*\n\s*\n\s*/\n/g; print $text; print "\n"; print &end(); } exit; sub end { if ($mode eq 'html') { return "THE END\n"; } } # ----------------------- Take in string to parse and the separation expression sub extract_array { my ($stringtoparse,$sepexp) = @_; my @a=split(/$sepexp/,$stringtoparse); return \@a; } # --------------------------------------------------------- Format lpml section sub format_lpml { my (@tokeninfo)=@_; my $date=`date`; chop $date; if ($mode eq 'html') { $lpml = "LPML BEGINNING: $date"; } } # --------------------------------------------------- Format targetroot section sub format_targetroot { my $text=&trim($parser->get_text('/targetroot')); $text=$targetroot if $targetroot; $parser->get_tag('/targetroot'); if ($mode eq 'html') { return $targetroot="\nTARGETROOT: $text"; } else { return ''; } } # --------------------------------------------------- Format sourceroot section sub format_sourceroot { my $text=&trim($parser->get_text('/sourceroot')); $text=$sourceroot if $sourceroot; $parser->get_tag('/sourceroot'); if ($mode eq 'html') { return $sourceroot="\nSOURCEROOT: $text"; } else { return ''; } } # --------------------------------------------------- Format categories section sub format_categories { my $text=&trim($parser->get_text('/categories')); $parser->get_tag('/categories'); if ($mode eq 'html') { return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n"; } else { return ''; } } # --------------------------------------------------- Format categories section sub format_category { my (@tokeninfo)=@_; $category_att_name=$tokeninfo[2]->{'name'}; $category_att_type=$tokeninfo[2]->{'type'}; $chmod='';$chown=''; $parser->get_text('/category'); $parser->get_tag('/category'); if ($mode eq 'html') { return $category="\nCATEGORY $category_att_name $category_att_type ". "$chmod $chown"; } else { return ''; } } # -------------------------------------------------------- Format chown section sub format_chown { my @tokeninfo=@_; $chown=''; my $text=&trim($parser->get_text('/chown')); if ($text) { $parser->get_tag('/chown'); $chown=$text; } return ''; } # -------------------------------------------------------- Format chmod section sub format_chmod { my @tokeninfo=@_; $chmod=''; my $text=&trim($parser->get_text('/chmod')); if ($text) { $parser->get_tag('/chmod'); $chmod=$text; } return ''; } # ---------------------------------------------------------- Format rpm section sub format_rpm { my $text=&trim($parser->get_text('/rpm')); $parser->get_tag('/rpm'); if ($mode eq 'html') { return $rpm="\nBEGIN RPM\n$text\nEND RPM"; } else { return ''; } } # --------------------------------------------------- Format rpmSummary section sub format_rpmSummary { my $text=&trim($parser->get_text('/rpmSummary')); $parser->get_tag('/rpmSummary'); if ($mode eq 'html') { return $rpmSummary="\nRPMSUMMARY $text"; } else { return ''; } } # ------------------------------------------------------ Format rpmName section sub format_rpmName { my $text=&trim($parser->get_text('/rpmName')); $parser->get_tag('/rpmName'); if ($mode eq 'html') { return $rpmName="\nRPMNAME $text"; } else { return ''; } } # --------------------------------------------------- Format rpmVersion section sub format_rpmVersion { my $text=$parser->get_text('/rpmVersion'); $parser->get_tag('/rpmVersion'); if ($mode eq 'html') { return $rpmVersion="\nRPMVERSION $text"; } else { return ''; } } # --------------------------------------------------- Format rpmRelease section sub format_rpmRelease { my $text=$parser->get_text('/rpmRelease'); $parser->get_tag('/rpmRelease'); if ($mode eq 'html') { return $rpmRelease="\nRPMRELEASE $text"; } else { return ''; } } # ---------------------------------------------------- Format rpmVendor section sub format_rpmVendor { my $text=$parser->get_text('/rpmVendor'); $parser->get_tag('/rpmVendor'); if ($mode eq 'html') { return $rpmVendor="\nRPMVENDOR $text"; } else { return ''; } } # ------------------------------------------------- Format rpmBuildRoot section sub format_rpmBuildRoot { my $text=$parser->get_text('/rpmBuildRoot'); $parser->get_tag('/rpmBuildRoot'); if ($mode eq 'html') { return $rpmBuildRoot="\nRPMBUILDROOT $text"; } else { return ''; } } # ------------------------------------------------- Format rpmCopyright section sub format_rpmCopyright { my $text=$parser->get_text('/rpmCopyright'); $parser->get_tag('/rpmCopyright'); if ($mode eq 'html') { return $rpmCopyright="\nRPMCOPYRIGHT $text"; } else { return ''; } } # ----------------------------------------------------- Format rpmGroup section sub format_rpmGroup { my $text=$parser->get_text('/rpmGroup'); $parser->get_tag('/rpmGroup'); if ($mode eq 'html') { return $rpmGroup="\nRPMGROUP $text"; } else { return ''; } } # ---------------------------------------------------- Format rpmSource section sub format_rpmSource { my $text=$parser->get_text('/rpmSource'); $parser->get_tag('/rpmSource'); if ($mode eq 'html') { return $rpmSource="\nRPMSOURCE $text"; } else { return ''; } } # ----------------------------------------------- Format rpmAutoReqProv section sub format_rpmAutoReqProv { my $text=$parser->get_text('/rpmAutoReqProv'); $parser->get_tag('/rpmAutoReqProv'); if ($mode eq 'html') { return $rpmAutoReqProv="\nRPMAUTOREQPROV $text"; } else { return ''; } } # ----------------------------------------------- Format rpmdescription section sub format_rpmdescription { my $text=$parser->get_text('/rpmdescription'); $parser->get_tag('/rpmdescription'); if ($mode eq 'html') { return $rpmdescription="\nRPMDESCRIPTION $text"; } else { return ''; } } # ------------------------------------------------------- Format rpmpre section sub format_rpmpre { my $text=$parser->get_text('/rpmpre'); $parser->get_tag('/rpmpre'); if ($mode eq 'html') { return $rpmpre="\nRPMPRE $text"; } else { return ''; } } # -------------------------------------------------- Format directories section sub format_directories { my $text=&trim($parser->get_text('/directories')); $parser->get_tag('/directories'); if ($mode eq 'html') { return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n"; } else { return ''; } } # ---------------------------------------------------- Format directory section sub format_directory { my (@tokeninfo)=@_; $targetdir='';$categoryname='';$description=''; $parser->get_text('/directory'); $parser->get_tag('/directory'); if ($mode eq 'html') { return $directory="\nDIRECTORY $targetdir $categoryname $description"; } else { return ''; } } # ---------------------------------------------------- Format targetdir section sub format_targetdir { my @tokeninfo=@_; $targetdir=''; my $text=&trim($parser->get_text('/targetdir')); if ($text) { $parser->get_tag('/targetdir'); $targetdir=$text; } return ''; } # ------------------------------------------------- Format categoryname section sub format_categoryname { my @tokeninfo=@_; $categoryname=''; my $text=&trim($parser->get_text('/categoryname')); if ($text) { $parser->get_tag('/categoryname'); $categoryname=$text; } return ''; } # -------------------------------------------------- Format description section sub format_description { my @tokeninfo=@_; $description=''; my $text=&trim($parser->get_text('/description')); if ($text) { $parser->get_tag('/description'); $description=$text; } return ''; } # -------------------------------------------------------- Format files section sub format_files { my $text=&trim($parser->get_text('/files')); $parser->get_tag('/files'); if ($mode eq 'html') { return $directories="\nBEGIN FILES\n$text\nEND FILES\n"; } else { return ''; } } # ---------------------------------------------------- Format fileglobs section sub format_fileglobs { } # -------------------------------------------------------- Format links section sub format_links { } # --------------------------------------------------------- Format file section sub format_file { my @tokeninfo=@_; $file=''; $source=''; $target=''; $categoryname=''; $description=''; $note=''; $build=''; $status=''; $dependencies=''; my $text=&trim($parser->get_text('/file')); if ($source) { $parser->get_tag('/file'); if ($mode eq 'html') { return ($file="\nBEGIN FILE\n". "$source $target $categoryname $description $note " . "$build $status $dependencies" . "\nEND FILE"); } else { return ''; } } return ''; } # --------------------------------------------------------- Format link section sub format_link { my @tokeninfo=@_; $link=''; $linkto=''; $target=''; $categoryname=''; $description=''; $note=''; $build=''; $status=''; $dependencies=''; my $text=&trim($parser->get_text('/link')); if ($linkto) { $parser->get_tag('/link'); if ($mode eq 'html') { return $link="\nBEGIN LINK\n". "$linkto $target $categoryname $description $note " . "$build $status $dependencies" . "\nEND LINK"; } else { return ''; } } return ''; } # ----------------------------------------------------- Format fileglob section sub format_fileglob { my @tokeninfo=@_; $fileglob=''; $glob=''; $sourcedir=''; $targetdir=''; $categoryname=''; $description=''; $note=''; $build=''; $status=''; $dependencies=''; $filenames=''; my $text=&trim($parser->get_text('/fileglob')); if ($sourcedir) { $parser->get_tag('/fileglob'); if ($mode eq 'html') { return $fileglob="\nBEGIN FILEGLOB\n". "$glob sourcedir $targetdir $categoryname $description $note ". "$build $status $dependencies $filenames" . "\nEND FILEGLOB"; } else { return ''; } } return ''; } # ---------------------------------------------------- Format sourcedir section sub format_sourcedir { my @tokeninfo=@_; $sourcedir=''; my $text=&trim($parser->get_text('/sourcedir')); if ($text) { $parser->get_tag('/sourcedir'); $sourcedir=$text; } return ''; } # ------------------------------------------------------- Format target section sub format_target { my @tokeninfo=@_; $target=''; my $text=&trim($parser->get_text('/target')); if ($text) { $parser->get_tag('/target'); $target=$text; } return ''; } # ------------------------------------------------------- Format source section sub format_source { my @tokeninfo=@_; $source=''; my $text=&trim($parser->get_text('/source')); if ($text) { $parser->get_tag('/source'); $source=$text; } return ''; } # --------------------------------------------------------- Format note section sub format_note { my @tokeninfo=@_; $note=''; my $text=&trim($parser->get_text('/note')); if ($text) { $parser->get_tag('/note'); $note=$text; } return ''; } # -------------------------------------------------------- Format build section sub format_build { my @tokeninfo=@_; $build=''; my $text=&trim($parser->get_text('/build')); if ($text) { $parser->get_tag('/build'); $build=$text; } return ''; } # ------------------------------------------------------- Format status section sub format_status { my @tokeninfo=@_; $status=''; my $text=&trim($parser->get_text('/status')); if ($text) { $parser->get_tag('/status'); $status=$text; } return ''; } # ------------------------------------------------- Format dependencies section sub format_dependencies { my @tokeninfo=@_; $dependencies=''; my $text=&trim($parser->get_text('/dependencies')); if ($text) { $parser->get_tag('/dependencies'); $dependencies=$text; } return ''; } # --------------------------------------------------------- Format glob section sub format_glob { my @tokeninfo=@_; $glob=''; my $text=&trim($parser->get_text('/glob')); if ($text) { $parser->get_tag('/glob'); $glob=$text; } return ''; } # ---------------------------------------------------- Format filenames section sub format_filenames { my @tokeninfo=@_; $glob=''; my $text=&trim($parser->get_text('/filenames')); if ($text) { $parser->get_tag('/filenames'); $filenames=$text; } return ''; } # ------------------------------------------------------- Format linkto section sub format_linkto { my @tokeninfo=@_; $glob=''; my $text=&trim($parser->get_text('/linkto')); if ($text) { $parser->get_tag('/linkto'); $linkto=$text; } return ''; } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; }