#!/usr/bin/perl

# -------------------------------------------------------- Documentation notice
# Run "perldoc ./lpml_parse.pl" in order to best view the software
# documentation internalized in this program.

# --------------------------------------------------------- License Information
# The LearningOnline Network with CAPA
# piml_parse.pl - Linux Packaging Markup Language parser
#
# $Id: piml_parse.pl,v 1.1 2002/05/05 02:44:57 harris41 Exp $
#
# Written by Scott Harrison, codeharrison@yahoo.com
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
# YEAR=2002
# 1/28,1/29,1/30,1/31,2/5,4/8 - Scott Harrison
#
###

###############################################################################
##                                                                           ##
## ORGANIZATION OF THIS PERL SCRIPT                                          ##
## 1. Notes                                                                  ##
## 2. Get command line arguments                                             ##
## 3. First pass through (grab distribution-specific information)            ##
## 4. Second pass through (parse out what is not necessary)                  ##
## 5. Third pass through (translate markup according to specified mode)      ##
## 6. Functions (most all just format contents of different markup tags)     ##
## 7. POD (plain old documentation, CPAN style)                              ##
##                                                                           ##
###############################################################################

# ----------------------------------------------------------------------- Notes
#
# I am using a multiple pass-through approach to parsing
# the piml file.  This saves memory and makes sure the server
# will never be overloaded.
#
# This is meant to parse files meeting the piml document type.
# See piml.dtd.  PIML=Post Installation Markup Language.

use HTML::TokeParser;

my $usage=(<<END);
**** ERROR ERROR ERROR ERROR ****
Usage is for piml file to come in through standard input.
1st argument is the category permissions to use (runtime or development)
2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
3rd argument is to manually specify a targetroot

Only the 1st argument is mandatory for the program to run.

Example:

cat ../../doc/loncapafiles.piml |\\
perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install
END

# ------------------------------------------------- Grab command line arguments

my $mode;
if (@ARGV==3) {
    $mode = shift @ARGV;
}
else {
    @ARGV=();shift @ARGV;
    while(<>){} # throw away the input to avoid broken pipes
    print $usage;
    exit -1; # exit with error status
}

my $categorytype;
if (@ARGV) {
    $categorytype = shift @ARGV;
}

my $dist;
if (@ARGV) {
    $dist = shift @ARGV;
}

my $targetroot;
my $targetrootarg;
if (@ARGV) {
    $targetroot = shift @ARGV;
}
$targetroot=~s/\/$//;
$targetrootarg=$targetroot;

my $logcmd='| tee -a WARNINGS';

my $invocation;
# --------------------------------------------------- Record program invocation
if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
    $invocation=(<<END);
# Invocation: STDINPUT | piml_parse.pl
#             1st argument (category type) is: $categorytype
#             2nd argument (distribution) is: $dist
#             3rd argument (targetroot) is: described below
END
}

# ---------------------------------------------------- Start first pass through
my @parsecontents = <>;
my $parsestring = join('',@parsecontents);
my $outstring='';

# Need to make a pass through and figure out what defaults are
# overrided.  Top-down overriding strategy (leaves don't know
# about distant leaves).

my @hierarchy;
$hierarchy[0]=0;
my $hloc=0;
my $token='';
$parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');
my %hash;
my $key;
while ($token = $parser->get_token()) {
    if ($token->[0] eq 'S') {
	$hloc++;
	$hierarchy[$hloc]++;
	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
	my $thisdist=' '.$token->[2]{'dist'}.' ';
	if ($thisdist eq ' default ') {
	    $hash{$key}=1; # there is a default setting for this key
	}
	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
	    $hash{$key}=2; # disregard default setting for this key if
	                   # there is a directly requested distribution match
	}
    }
    if ($token->[0] eq 'E') {
	$hloc--;
    }
}

# --------------------------------------------------- Start second pass through
undef $hloc;
undef @hierarchy;
undef $parser;
$hierarchy[0]=0;
$parser = HTML::TokeParser->new(\$parsestring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');
my $cleanstring;
while ($token = $parser->get_token()) {
    if ($token->[0] eq 'S') {
	$hloc++;
	$hierarchy[$hloc]++;
	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
	my $thisdist=' '.$token->[2]{'dist'}.' ';
	# This conditional clause is set up to ignore two sets
	# of invalid conditions before accepting entry into
	# the cleanstring.
	if ($hash{$key}==2 and
	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
	    if ($token->[4]!~/\/>$/) {
		$parser->get_tag('/'.$token->[1]);
		$hloc--;
	    }
	}
	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
	    if ($token->[4]!~/\/>$/) {
		$parser->get_tag('/'.$token->[1]);
		$hloc--;
	    }
	}
	else {
	    $cleanstring.=$token->[4];
	}
	if ($token->[4]=~/\/>$/) {
#	    $hloc--;
	}
    }
    if ($token->[0] eq 'E') {
	$cleanstring.=$token->[2];
	$hloc--;
    }
    if ($token->[0] eq 'T') {
	$cleanstring.=$token->[1];
    }
}
$cleanstring=&trim($cleanstring);
$cleanstring=~s/\>\s*\n\s*\</\>\</g;

# ---------------------------------------------------- Start final pass through

# storage variables
my $piml;
my $categories;
my @categorynamelist;
my $category;
my $category_att_name;
my $category_att_type;
my $chown;
my $chmod;
my $abbreviation; # space-free abbreviation; esp. for image names
my $categoryname;
my $description;
my $files;
my $file;
my $target;
my $note;
my $commands;
my $command;
my $dependencies;
my @links;
my %categoryhash;
my $dpathlength;
my %fab; # file category abbreviation
my $directory_count;
my $file_count;
my $link_count;
my $fileglob_count;
my $fileglobnames_count;
my %categorycount;

my $mode;

my @buildall;
my @buildinfo;

my @configall;

# Make new parser with distribution specific input
undef($parser);
$parser = HTML::TokeParser->new(\$cleanstring) or
    die('can\'t create TokeParser object');
$parser->xml_mode('1');

# Define handling methods for mode-dependent text rendering

$parser->{textify}={
    specialnotices => \&format_specialnotices,
    specialnotice => \&format_specialnotice,
    targetroot => \&format_targetroot,
    categories => \&format_categories,
    category => \&format_category,
    abbreviation => \&format_abbreviation,
    chown => \&format_chown,
    chmod => \&format_chmod,
    categoryname => \&format_categoryname,
    files => \&format_files,
    file => \&format_file,
    target => \&format_target,
    note => \&format_note,
    build => \&format_build,
    dependencies => \&format_dependencies,
    filenames => \&format_filenames,
    perlscript => \&format_perlscript,
    TARGET => \&format_TARGET,
    };

my $text;
my $token;
undef($hloc);
undef(@hierarchy);
my $hloc;
my @hierarchy2;
while ($token = $parser->get_tag('piml')) {
    &format_piml(@{$token});
    $text = &trim($parser->get_text('/piml'));
    $token = $parser->get_tag('/piml');
    print($piml); 
    print("\n");
    print($text);
    print("\n");
    print(&end());
}
exit(0);

# ---------- Functions (most all just format contents of different markup tags)

# ------------------------ Final output at end of markup parsing and formatting
sub end {

}

# ----------------------- Take in string to parse and the separation expression
sub extract_array {
    my ($stringtoparse,$sepexp) = @_;
    my @a=split(/$sepexp/,$stringtoparse);
    return \@a;
}

# --------------------------------------------------------- Format piml section
sub format_piml {
    my (@tokeninfo)=@_;
    my $date=`date`; chop $date;
    $piml=<<END;
#!/usr/bin/perl

# Generated from a PIML (Post Installation Markup Language) document

END
}
# --------------------------------------------------- Format targetroot section
sub format_targetroot {
    my $text=&trim($parser->get_text('/targetroot'));
    $text=$targetroot if $targetroot;
    $parser->get_tag('/targetroot');
    return('# TARGET INSTALL LOCATION is "'.$targetroot."\"\n");
}
# -------------------------------------------------- Format perl script section
sub format_perlscript {
    my (@tokeninfo)=@_;
    $mode=$tokeninfo[2]->{'mode'};
    my $text=$parser->get_text('/perlscript');
    $parser->get_tag('/perlscript');
    if ($mode eq 'bg') {
	open(OUT,">/tmp/piml$$.pl");
	print(OUT $text);
	close(OUT);
	return(<<END);
	# launch background process for $target
	system("perl /tmp/piml$$.pl &");
END
    }
    else {
	return($text);
    }
}
# --------------------------------------------------------------- Format TARGET
sub format_TARGET {
    my (@tokeninfo)=@_;
    $parser->get_tag('/TARGET');
    return($target);
}
# --------------------------------------------------- Format categories section
sub format_categories {
    my $text=&trim($parser->get_text('/categories'));
    $parser->get_tag('/categories');
    return('# CATEGORIES'."\n".$text);
}
# --------------------------------------------------- Format categories section
sub format_category {
    my (@tokeninfo)=@_;
    $category_att_name=$tokeninfo[2]->{'name'};
    $category_att_type=$tokeninfo[2]->{'type'};
    $abbreviation=''; $chmod='';$chown='';
    $parser->get_text('/category');
    $parser->get_tag('/category');
    $fab{$category_att_name}=$abbreviation;
    if ($category_att_type eq $categorytype) {
	my ($user,$group)=split(/\:/,$chown);
	$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
	    ' -m '.$chmod;
    }
    return('');
}
# --------------------------------------------------- Format categories section
sub format_abbreviation {
    my @tokeninfo=@_;
    $abbreviation='';
    my $text=&trim($parser->get_text('/abbreviation'));
    if ($text) {
	$parser->get_tag('/abbreviation');
	$abbreviation=$text;
    }
    return('');
}
# -------------------------------------------------------- Format chown section
sub format_chown {
    my @tokeninfo=@_;
    $chown='';
    my $text=&trim($parser->get_text('/chown'));
    if ($text) {
	$parser->get_tag('/chown');
	$chown=$text;
    }
    return('');
}
# -------------------------------------------------------- Format chmod section
sub format_chmod {
    my @tokeninfo=@_;
    $chmod='';
    my $text=&trim($parser->get_text('/chmod'));
    if ($text) {
	$parser->get_tag('/chmod');
	$chmod=$text;
    }
    return('');
}
# ------------------------------------------------- Format categoryname section
sub format_categoryname {
    my @tokeninfo=@_;
    $categoryname='';
    my $text=&trim($parser->get_text('/categoryname'));
    if ($text) {
	$parser->get_tag('/categoryname');
	$categoryname=$text;
    }
    return('');
}
# -------------------------------------------------------- Format files section
sub format_files {
    my $text=$parser->get_text('/files');
    $parser->get_tag('/files');
    return("\n".'# There are '.$file_count.' files this script works on'.
	"\n\n".$text);
}
# --------------------------------------------------------- Format file section
sub format_file {
    my @tokeninfo=@_;
    $file=''; $source=''; $target=''; $categoryname=''; $description='';
    $note=''; $build=''; $status=''; $dependencies='';
    my $text=&trim($parser->get_text('/file'));
    $file_count++;
    $categorycount{$categoryname}++;
    $parser->get_tag('/file');
    return("# File: $target\n".
	"$text\n");
}
# ------------------------------------------------------- Format target section
sub format_target {
    my @tokeninfo=@_;
    $target='';
    my $text=&trim($parser->get_text('/target'));
    if ($text) {
	$parser->get_tag('/target');
	$target=$targetrootarg.$text;
    }
    return('');
}
# --------------------------------------------------------- Format note section
sub format_note {
    my @tokeninfo=@_;
    $note='';
    my $aref;
    my $text;
    while ($aref=$parser->get_token()) {
	if ($aref->[0] eq 'E' && $aref->[1] eq 'note') {
	    last;
	}
	elsif ($aref->[0] eq 'S') {
	    $text.=$aref->[4];
	}
	elsif ($aref->[0] eq 'E') {
	    $text.=$aref->[2];
	}
	else {
	    $text.=$aref->[1];
	}
    }
    if ($text) {
	$note=$text;
    }
    return('');
}
# ------------------------------------------------- Format dependencies section
sub format_dependencies {
    my @tokeninfo=@_;
    $dependencies='';
    my $text=&trim($parser->get_text('/dependencies'));
    if ($text) {
	$parser->get_tag('/dependencies');
	$dependencies=join(';',
			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
    }
    return('');
}
# ------------------------------------------------ Format specialnotice section
sub format_specialnotices {
    $parser->get_tag('/specialnotices');
    return('');
}
# ------------------------------------------------ Format specialnotice section
sub format_specialnotice {
    $parser->get_tag('/specialnotice');
    return('');
}
# ------------------------------------- Render less-than and greater-than signs
sub htmlsafe {
    my $text=@_[0];
    $text =~ s/</&lt;/g;
    $text =~ s/>/&gt;/g;
    return($text);
}
# --------------------------------------- remove starting and ending whitespace
sub trim {
    my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return($s);
}

# ----------------------------------- POD (plain old documentation, CPAN style)

=pod

=head1 NAME

piml_parse.pl - This is meant to parse piml files (Post Installation Markup Language)

=head1 SYNOPSIS

Usage is for piml file to come in through standard input.

=over 4

=item * 

1st argument is the category permissions to use (runtime or development)

=item *

2nd argument is the distribution
(default,redhat6.2,debian2.2,redhat7.1,etc).

=item *

3rd argument is to manually specify a targetroot.

=back

Only the 1st argument is mandatory for the program to run.

Example:

cat ../../doc/loncapafiles.piml |\\
perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install

=head1 DESCRIPTION

I am using a multiple pass-through approach to parsing
the piml file.  This saves memory and makes sure the server
will never be overloaded.

=head1 README

I am using a multiple pass-through approach to parsing
the piml file.  This saves memory and makes sure the server
will never be overloaded.

=head1 PREREQUISITES

HTML::TokeParser

=head1 COREQUISITES

=head1 OSNAMES

linux

=head1 SCRIPT CATEGORIES

Packaging/Administrative

=head1 AUTHOR

 Scott Harrison
 codeharrison@yahoo.com

Please let me know how/if you are finding this script useful and
any/all suggestions.  -Scott

=cut