File:  [LON-CAPA] / loncom / build / lpml_parse.pl
Revision 1.3: download - view: text, annotated - select for diffs
Sun Jun 24 23:00:32 2001 UTC (22 years, 10 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
it now works with a plaintext output

#!/usr/bin/perl

# Scott Harrison
# May 2001
# 06/19/2001,06/20,06/24 - Scott Harrison

# I am using a multiple pass-through approach to parsing
# the lpml file.  This saves memory and makes sure the server
# will never be overloaded.

use HTML::TokeParser;

my $usage=<<END;
**** ERROR ERROR ERROR ERROR ****
Usage is for lpml file to come in through standard input.
1st argument is the mode of parsing.
2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
3rd argument is to manually specify a sourceroot.
4th argument is to manually specify a targetroot.

Only the 1st argument is mandatory for the program to run.

Example:

cat ../../doc/loncapafiles.lpml |\\
perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
END

# ------------------------------------------------- Grab command line arguments

my $mode;
if (@ARGV) {
    $mode = shift @ARGV;
}
else {
    while(<>){} # throw away the input to avoid broken pipes
    print $usage;
    exit -1; # exit with error status
}

my $dist;
if (@ARGV) {
    $dist = shift @ARGV;
}

my $targetroot;
my $sourceroot;
if (@ARGV) {
    $targetroot = shift @ARGV;
}
if (@ARGV) {
    $sourceroot = shift @ARGV;
}

# ---------------------------------------------------- Start first pass through
my @parsecontents = <>;
my $parsestring = join('',@parsecontents);
my $outstring;

# Need to make a pass through and figure out what defaults are
# overrided.  Top-down overriding strategy (leaves don't know
# about distant leaves).

my @hierarchy;
$hierarchy[0]=0;
my $hloc=0;
my $token;
$parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');
my %hash;
my $key;
while ($token = $parser->get_token()) {
    if ($token->[0] eq 'S') {
	$hloc++;
	$hierarchy[$hloc]++;
	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
	my $thisdist=' '.$token->[2]{'dist'}.' ';
	if ($thisdist eq ' default ') {
	    $hash{$key}=1; # there is a default setting for this key
	}
	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
	    $hash{$key}=2; # disregard default setting for this key if
	                   # there is a directly requested distribution match
	}
    }
    if ($token->[0] eq 'E') {
	$hloc--;
    }
}

# --------------------------------------------------- Start second pass through
undef $hloc;
undef @hierarchy;
undef $parser;
$hierarchy[0]=0;
$parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');
my $cleanstring;
while ($token = $parser->get_token()) {
    if ($token->[0] eq 'S') {
	$hloc++;
	$hierarchy[$hloc]++;
	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
	my $thisdist=' '.$token->[2]{'dist'}.' ';
	if ($hash{$key}==2 and
	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
	    if ($token->[4]!~/\/>$/) {
		$parser->get_tag('/'.$token->[1]);
		$hloc--;
	    }
	}
	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
	    if ($token->[4]!~/\/>$/) {
		$parser->get_tag('/'.$token->[1]);
		$hloc--;
	    }
	}
	else {
	    $cleanstring.=$token->[4];
	}
	if ($token->[4]=~/\/>$/) {
	    $hloc--;
	}
    }
    if ($token->[0] eq 'E') {
	$cleanstring.=$token->[2];
	$hloc--;
    }
    if ($token->[0] eq 'T') {
	$cleanstring.=$token->[1];
    }
}
$cleanstring=&trim($cleanstring);

# ---------------------------------------------------- Start final pass through

# storage variables
my $lpml;
my $categories;
my $category;
my $category_att_name;
my $category_att_type;
my $chown;
my $chmod;
my $rpm;
my $rpmSummary;
my $rpmName;
my $rpmVersion;
my $rpmRelease;
my $rpmVendor;
my $rpmBuildRoot;
my $rpmCopyright;
my $rpmGroup;
my $rpmSource;
my $rpmAutoReqProv;
my $rpmdescription;
my $rpmpre;
my $directories;
my $directory;
my $targetdirs;
my $targetdir;
my $categoryname;
my $description;
my $files;
my $fileglobs;
my $links;
my $file;
my $link;
my $fileglob;
my $sourcedir;
my $targets;
my $target;
my $source;
my $note;
my $build;
my $commands;
my $command;
my $status;
my $dependencies;
my $dependency;

# Make new parser with distribution specific input
undef $parser;
$parser = HTML::TokeParser->new(\$cleanstring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# Define handling methods for mode-dependent text rendering
$parser->{textify}={
    targetroot => \&format_targetroot,
    sourceroot => \&format_sourceroot,
    categories => \&format_categories,
    category => \&format_category,
    targetdir => \&format_targetdir,
    chown => \&format_chown,
    chmod => \&format_chmod,
    rpm => \&format_rpm,
    rpmSummary => \&format_rpmSummary,
    rpmName => \&format_rpmName,
    rpmVersion => \&format_rpmVersion,
    rpmRelease => \&format_rpmRelease,
    rpmVendor => \&format_rpmVendor,
    rpmBuildRoot => \&format_rpmBuildRoot,
    rpmCopyright => \&format_rpmCopyright,
    rpmGroup => \&format_rpmGroup,
    rpmSource => \&format_rpmSource,
    rpmAutoReqProv => \&format_rpmAutoReqProv,
    rpmdescription => \&format_rpmdescription,
    rpmpre => \&format_rpmpre,
    directories => \&format_directories,
    directory => \&format_directory,
    categoryname => \&format_categoryname,
    description => \&format_description,
    files => \&format_files,
    file => \&format_file,
    fileglob => \&format_fileglob,
    link => \&format_link,
    linkto => \&format_linkto,
    source => \&format_source,
    target => \&format_target,
    note => \&format_note,
    build => \&format_build,
    status => \&format_status,
    dependencies => \&format_dependencies,
    glob => \&format_glob,
    sourcedir => \&format_sourcedir,
    filenames => \&format_filenames,
    };

my $text;
my $token;
undef $hloc;
undef @hierarchy;
my $hloc;
my @hierarchy2;
while ($token = $parser->get_tag('lpml')) {
    &format_lpml(@{$token});
    $text = &trim($parser->get_text('/lpml'));
    $token = $parser->get_tag('/lpml');
    print $lpml; 
    print "\n";
    $text=~s/\s*\n\s*\n\s*/\n/g;
    print $text;
    print "\n";
    print &end();
}
exit;

sub end {
    if ($mode eq 'html') {
	return "THE END\n";
    }
}

# ----------------------- Take in string to parse and the separation expression
sub extract_array {
    my ($stringtoparse,$sepexp) = @_;
    my @a=split(/$sepexp/,$stringtoparse);
    return \@a;
}

# --------------------------------------------------------- Format lpml section
sub format_lpml {
    my (@tokeninfo)=@_;
    my $date=`date`; chop $date;
    if ($mode eq 'html') {
	$lpml = "LPML BEGINNING: $date";
    }
}
# --------------------------------------------------- Format targetroot section
sub format_targetroot {
    my $text=&trim($parser->get_text('/targetroot'));
    $text=$targetroot if $targetroot;
    $parser->get_tag('/targetroot');
    if ($mode eq 'html') {
	return $targetroot="\nTARGETROOT: $text";
    }
    else {
	return '';
    }
}
# --------------------------------------------------- Format sourceroot section
sub format_sourceroot {
    my $text=&trim($parser->get_text('/sourceroot'));
    $text=$sourceroot if $sourceroot;
    $parser->get_tag('/sourceroot');
    if ($mode eq 'html') {
	return $sourceroot="\nSOURCEROOT: $text";
    }
    else {
	return '';
    }
}
# --------------------------------------------------- Format categories section
sub format_categories {
    my $text=&trim($parser->get_text('/categories'));
    $parser->get_tag('/categories');
    if ($mode eq 'html') {
	return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n";
    }
    else {
	return '';
    }
}
# --------------------------------------------------- Format categories section
sub format_category {
    my (@tokeninfo)=@_;
    $category_att_name=$tokeninfo[2]->{'name'};
    $category_att_type=$tokeninfo[2]->{'type'};
    $chmod='';$chown='';
    $parser->get_text('/category');
    $parser->get_tag('/category');
    if ($mode eq 'html') {
	return $category="\nCATEGORY $category_att_name $category_att_type ".
	    "$chmod $chown";
    }
    else {
	return '';
    }
}
# -------------------------------------------------------- Format chown section
sub format_chown {
    my @tokeninfo=@_;
    $chown='';
    my $text=&trim($parser->get_text('/chown'));
    if ($text) {
	$parser->get_tag('/chown');
	$chown=$text;
    }
    return '';
}
# -------------------------------------------------------- Format chmod section
sub format_chmod {
    my @tokeninfo=@_;
    $chmod='';
    my $text=&trim($parser->get_text('/chmod'));
    if ($text) {
	$parser->get_tag('/chmod');
	$chmod=$text;
    }
    return '';
}
# ---------------------------------------------------------- Format rpm section
sub format_rpm {
    my $text=&trim($parser->get_text('/rpm'));
    $parser->get_tag('/rpm');
    if ($mode eq 'html') {
	return $rpm="\nBEGIN RPM\n$text\nEND RPM";
    }
    else {
	return '';
    }
}
# --------------------------------------------------- Format rpmSummary section
sub format_rpmSummary {
    my $text=&trim($parser->get_text('/rpmSummary'));
    $parser->get_tag('/rpmSummary');
    if ($mode eq 'html') {
	return $rpmSummary="\nRPMSUMMARY $text";
    }
    else {
	return '';
    }
}
# ------------------------------------------------------ Format rpmName section
sub format_rpmName {
    my $text=&trim($parser->get_text('/rpmName'));
    $parser->get_tag('/rpmName');
    if ($mode eq 'html') {
	return $rpmName="\nRPMNAME $text";
    }
    else {
	return '';
    }
}
# --------------------------------------------------- Format rpmVersion section
sub format_rpmVersion {
    my $text=$parser->get_text('/rpmVersion');
    $parser->get_tag('/rpmVersion');
    if ($mode eq 'html') {
	return $rpmVersion="\nRPMVERSION $text";
    }
    else {
	return '';
    }
}
# --------------------------------------------------- Format rpmRelease section
sub format_rpmRelease {
    my $text=$parser->get_text('/rpmRelease');
    $parser->get_tag('/rpmRelease');
    if ($mode eq 'html') {
	return $rpmRelease="\nRPMRELEASE $text";
    }
    else {
	return '';
    }
}
# ---------------------------------------------------- Format rpmVendor section
sub format_rpmVendor {
    my $text=$parser->get_text('/rpmVendor');
    $parser->get_tag('/rpmVendor');
    if ($mode eq 'html') {
	return $rpmVendor="\nRPMVENDOR $text";
    }
    else {
	return '';
    }
}
# ------------------------------------------------- Format rpmBuildRoot section
sub format_rpmBuildRoot {
    my $text=$parser->get_text('/rpmBuildRoot');
    $parser->get_tag('/rpmBuildRoot');
    if ($mode eq 'html') {
	return $rpmBuildRoot="\nRPMBUILDROOT $text";
    }
    else {
	return '';
    }
}
# ------------------------------------------------- Format rpmCopyright section
sub format_rpmCopyright {
    my $text=$parser->get_text('/rpmCopyright');
    $parser->get_tag('/rpmCopyright');
    if ($mode eq 'html') {
	return $rpmCopyright="\nRPMCOPYRIGHT $text";
    }
    else {
	return '';
    }
}
# ----------------------------------------------------- Format rpmGroup section
sub format_rpmGroup {
    my $text=$parser->get_text('/rpmGroup');
    $parser->get_tag('/rpmGroup');
    if ($mode eq 'html') {
	return $rpmGroup="\nRPMGROUP $text";
    }
    else {
	return '';
    }
}
# ---------------------------------------------------- Format rpmSource section
sub format_rpmSource {
    my $text=$parser->get_text('/rpmSource');
    $parser->get_tag('/rpmSource');
    if ($mode eq 'html') {
	return $rpmSource="\nRPMSOURCE $text";
    }
    else {
	return '';
    }
}
# ----------------------------------------------- Format rpmAutoReqProv section
sub format_rpmAutoReqProv {
    my $text=$parser->get_text('/rpmAutoReqProv');
    $parser->get_tag('/rpmAutoReqProv');
    if ($mode eq 'html') {
	return $rpmAutoReqProv="\nRPMAUTOREQPROV $text";
    }
    else {
	return '';
    }
}
# ----------------------------------------------- Format rpmdescription section
sub format_rpmdescription {
    my $text=$parser->get_text('/rpmdescription');
    $parser->get_tag('/rpmdescription');
    if ($mode eq 'html') {
	return $rpmdescription="\nRPMDESCRIPTION $text";
    }
    else {
	return '';
    }
}
# ------------------------------------------------------- Format rpmpre section
sub format_rpmpre {
    my $text=$parser->get_text('/rpmpre');
    $parser->get_tag('/rpmpre');
    if ($mode eq 'html') {
	return $rpmpre="\nRPMPRE $text";
    }
    else {
	return '';
    }
}
# -------------------------------------------------- Format directories section
sub format_directories {
    my $text=&trim($parser->get_text('/directories'));
    $parser->get_tag('/directories');
    if ($mode eq 'html') {
	return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n";
    }
    else {
	return '';
    }
}
# ---------------------------------------------------- Format directory section
sub format_directory {
    my (@tokeninfo)=@_;
    $targetdir='';$categoryname='';$description='';
    $parser->get_text('/directory');
    $parser->get_tag('/directory');
    if ($mode eq 'html') {
	return $directory="\nDIRECTORY $targetdir $categoryname $description";
    }
    else {
	return '';
    }
}
# ---------------------------------------------------- Format targetdir section
sub format_targetdir {
    my @tokeninfo=@_;
    $targetdir='';
    my $text=&trim($parser->get_text('/targetdir'));
    if ($text) {
	$parser->get_tag('/targetdir');
	$targetdir=$text;
    }
    return '';
}
# ------------------------------------------------- Format categoryname section
sub format_categoryname {
    my @tokeninfo=@_;
    $categoryname='';
    my $text=&trim($parser->get_text('/categoryname'));
    if ($text) {
	$parser->get_tag('/categoryname');
	$categoryname=$text;
    }
    return '';
}
# -------------------------------------------------- Format description section
sub format_description {
    my @tokeninfo=@_;
    $description='';
    my $text=&trim($parser->get_text('/description'));
    if ($text) {
	$parser->get_tag('/description');
	$description=$text;
    }
    return '';
}
# -------------------------------------------------------- Format files section
sub format_files {
    my $text=&trim($parser->get_text('/files'));
    $parser->get_tag('/files');
    if ($mode eq 'html') {
	return $directories="\nBEGIN FILES\n$text\nEND FILES\n";
    }
    else {
	return '';
    }
}
# ---------------------------------------------------- Format fileglobs section
sub format_fileglobs {

}
# -------------------------------------------------------- Format links section
sub format_links {

}
# --------------------------------------------------------- Format file section
sub format_file {
    my @tokeninfo=@_;
    $file=''; $source=''; $target=''; $categoryname=''; $description='';
    $note=''; $build=''; $status=''; $dependencies='';
    my $text=&trim($parser->get_text('/file'));
    if ($source) {
	$parser->get_tag('/file');
	if ($mode eq 'html') {
	    return ($file="\nBEGIN FILE\n".
		"$source $target $categoryname $description $note " .
		"$build $status $dependencies" .
		"\nEND FILE");
	}
	else {
	    return '';
	}
    }
    return '';
}
# --------------------------------------------------------- Format link section
sub format_link {
    my @tokeninfo=@_;
    $link=''; $linkto=''; $target=''; $categoryname=''; $description='';
    $note=''; $build=''; $status=''; $dependencies='';
    my $text=&trim($parser->get_text('/link'));
    if ($linkto) {
	$parser->get_tag('/link');
	if ($mode eq 'html') {
	    return $link="\nBEGIN LINK\n".
		"$linkto $target $categoryname $description $note " .
		"$build $status $dependencies" .
		    "\nEND LINK";
	}
	else {
	    return '';
	}
    }
    return '';
}
# ----------------------------------------------------- Format fileglob section
sub format_fileglob {
    my @tokeninfo=@_;
    $fileglob=''; $glob=''; $sourcedir='';
    $targetdir=''; $categoryname=''; $description='';
    $note=''; $build=''; $status=''; $dependencies='';
    $filenames='';
    my $text=&trim($parser->get_text('/fileglob'));
    if ($sourcedir) {
	$parser->get_tag('/fileglob');
	if ($mode eq 'html') {
	    return $fileglob="\nBEGIN FILEGLOB\n".
		"$glob sourcedir $targetdir $categoryname $description $note ".
		"$build $status $dependencies $filenames" .
		    "\nEND FILEGLOB";
	}
	else {
	    return '';
	}
    }
    return '';
}
# ---------------------------------------------------- Format sourcedir section
sub format_sourcedir {
    my @tokeninfo=@_;
    $sourcedir='';
    my $text=&trim($parser->get_text('/sourcedir'));
    if ($text) {
	$parser->get_tag('/sourcedir');
	$sourcedir=$text;
    }
    return '';
}
# ------------------------------------------------------- Format target section
sub format_target {
    my @tokeninfo=@_;
    $target='';
    my $text=&trim($parser->get_text('/target'));
    if ($text) {
	$parser->get_tag('/target');
	$target=$text;
    }
    return '';
}
# ------------------------------------------------------- Format source section
sub format_source {
    my @tokeninfo=@_;
    $source='';
    my $text=&trim($parser->get_text('/source'));
    if ($text) {
	$parser->get_tag('/source');
	$source=$text;
    }
    return '';
}
# --------------------------------------------------------- Format note section
sub format_note {
    my @tokeninfo=@_;
    $note='';
    my $text=&trim($parser->get_text('/note'));
    if ($text) {
	$parser->get_tag('/note');
	$note=$text;
    }
    return '';

}
# -------------------------------------------------------- Format build section
sub format_build {
    my @tokeninfo=@_;
    $build='';
    my $text=&trim($parser->get_text('/build'));
    if ($text) {
	$parser->get_tag('/build');
	$build=$text;
    }
    return '';
}
# ------------------------------------------------------- Format status section
sub format_status {
    my @tokeninfo=@_;
    $status='';
    my $text=&trim($parser->get_text('/status'));
    if ($text) {
	$parser->get_tag('/status');
	$status=$text;
    }
    return '';
}
# ------------------------------------------------- Format dependencies section
sub format_dependencies {
    my @tokeninfo=@_;
    $dependencies='';
    my $text=&trim($parser->get_text('/dependencies'));
    if ($text) {
	$parser->get_tag('/dependencies');
	$dependencies=$text;
    }
    return '';
}
# --------------------------------------------------------- Format glob section
sub format_glob {
    my @tokeninfo=@_;
    $glob='';
    my $text=&trim($parser->get_text('/glob'));
    if ($text) {
	$parser->get_tag('/glob');
	$glob=$text;
    }
    return '';
}
# ---------------------------------------------------- Format filenames section
sub format_filenames {
    my @tokeninfo=@_;
    $glob='';
    my $text=&trim($parser->get_text('/filenames'));
    if ($text) {
	$parser->get_tag('/filenames');
	$filenames=$text;
    }
    return '';
}
# ------------------------------------------------------- Format linkto section
sub format_linkto {
    my @tokeninfo=@_;
    $glob='';
    my $text=&trim($parser->get_text('/linkto'));
    if ($text) {
	$parser->get_tag('/linkto');
	$linkto=$text;
    }
    return '';
}
# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
} 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>