#!/usr/bin/perl # Scott Harrison # May 2001 # 06/19/2001 - Scott Harrison use HTML::TokeParser; my $target = shift @ARGV; my $dist = shift @ARGV; my @parsecontents = <>; my $parsestring = join('',@parsecontents); my $outstring; $outstring = &xmlparse($parsestring,$target,$dist); print $outstring; # -------------------------- Parse for an input string and specific target mode sub xmlparse { my ($parsestring,$target,$dist)=@_; my $outtext = ''; my $parser = HTML::TokeParser->new($parsestring); $parser->xml_mode('1'); # strategy: grab first and pass well-parsed information to info-handler subroutines # There should be errors if bad file format. # Unlike other XML-handling strategies we use, this script should # terminate if the XML is bad. # grab first (and only) lpml section # grab target(s) root # grab source root # grab categories # foreach category # attributes: name and type # grab chown # grab chmod # parse user name and group name # grab rpm (remember to replace \n with real new lines) # grab rpmSummary # grab rpmName # grab rpmVersion # grab rpmRelease # grab rpmVendor # grab rpmBuildRoot # grab rpmCopyright # grab rpmGroup # grab rpmSource # grab rpmAutoReqProv # grab rpmdescription # grab rpmpre # grab directories # foreach directory # grab targetdir(s) # grab categoryname # grab (optional) description # grab files # foreach file|link|link|fileglob # grab source # grab target(s) # grab categoryname # grab description # grab note } __END__ while (my $token = $p->get_tag("category")) { my $url = $token->[1]{name} . $token->[1]{type}; my $chmodtoken=$p->get_tag("chmod"); my $text = $p->get_trimmed_text("/chmod"); print "CHMOD: $text\n"; my $text = $p->get_trimmed_text("/category"); print "$url\t$text\t".join(" ",@{$token->[2]})."\n"; }