--- loncom/publisher/lonpublisher.pm 2002/07/26 19:35:20 1.85 +++ loncom/publisher/lonpublisher.pm 2003/03/14 16:12:14 1.118 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Publication Handler # -# $Id: lonpublisher.pm,v 1.85 2002/07/26 19:35:20 albertel Exp $ +# $Id: lonpublisher.pm,v 1.118 2003/03/14 16:12:14 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -33,18 +33,14 @@ # 11/28,11/29,11/30,12/01,12/02,12/04,12/23 Gerd Kortemeyer # 03/23 Guy Albertelli # 03/24,03/29,04/03 Gerd Kortemeyer -# 04/16/2001 Scott Harrison # 05/03,05/05,05/07 Gerd Kortemeyer -# 05/28/2001 Scott Harrison # 06/23,08/07,08/11,8/13,8/17,8/18,8/24,9/26,10/16 Gerd Kortemeyer # 12/04,12/05 Guy Albertelli # 12/05 Gerd Kortemeyer # 12/05 Guy Albertelli # 12/06,12/07 Gerd Kortemeyer -# 12/15,12/16 Scott Harrison # 12/25 Gerd Kortemeyer # YEAR=2002 -# 1/16,1/17 Scott Harrison # 1/17 Gerd Kortemeyer # ### @@ -61,6 +57,57 @@ ## ## ############################################################################### + +###################################################################### +###################################################################### + +=pod + +=head1 NAME + +lonpublisher - LON-CAPA publishing handler + +=head1 SYNOPSIS + +B is used by B inside B. This is the +invocation by F: + + + PerlAccessHandler Apache::lonacc + SetHandler perl-script + PerlHandler Apache::lonpublisher + ErrorDocument 403 /adm/login + ErrorDocument 404 /adm/notfound.html + ErrorDocument 406 /adm/unauthorized.html + ErrorDocument 500 /adm/errorhandler + + +=head1 DESCRIPTION + +B takes the proper steps to add resources to the LON-CAPA +digital library. This includes updating the metadata table in the +LON-CAPA database. + +B is many things to many people. + +This module publishes a file. This involves gathering metadata, +versioning the file, copying file from construction space to +publication space, and copying metadata from construction space +to publication space. + +=head2 SUBROUTINES + +Many of the undocumented subroutines implement various magical +parsing shortcuts. + +=over 4 + +=cut + +###################################################################### +###################################################################### + + package Apache::lonpublisher; # ------------------------------------------------- modules used by this module @@ -70,24 +117,48 @@ use File::Copy; use Apache::Constants qw(:common :http :methods); use HTML::LCParser; use Apache::lonxml; -use Apache::lonhomework; use Apache::loncacc; use DBI; use Apache::lonnet(); use Apache::loncommon(); +use Apache::lonmysql; +use vars qw(%metadatafields %metadatakeys); my %addid; my %nokey; -my %metadatafields; -my %metadatakeys; - my $docroot; my $cuname; my $cudom; -# ----------------------------------------------- Evaluate string with metadata +=pod + +=item B + +Evaluates a string that contains metadata. This subroutine +stores values inside I<%metadatafields> and I<%metadatakeys>. +The hash key is a I<$unikey> corresponding to a unique id +that is descriptive of the parser location inside the XML tree. + +Parameters: + +=over 4 + +=item I<$metastring> + +A string that contains metadata. + +=back + +Returns: + +nothing + +=cut + +######################################### +######################################### sub metaeval { my $metastring=shift; @@ -130,14 +201,50 @@ sub metaeval { } } -# -------------------------------------------------------- Read a metadata file +######################################### +######################################### + +=pod + +=item B + +Read a metadata file + +Parameters: + +=over + +=item I<$logfile> + +File output stream to output errors and warnings to. + +=item I<$fn> + +File name (including path). + +=back + +Returns: + +=over 4 + +=item Scalar string (if successful) + +XHTML text that indicates successful reading of the metadata. + +=back + +=cut + +######################################### +######################################### sub metaread { my ($logfile,$fn)=@_; unless (-e $fn) { - print $logfile 'No file '.$fn."\n"; + print($logfile 'No file '.$fn."\n"); return '
No file: '.$fn.''; } - print $logfile 'Processing '.$fn."\n"; + print($logfile 'Processing '.$fn."\n"); my $metastring; { my $metafh=Apache::File->new($fn); @@ -147,26 +254,63 @@ sub metaread { return '
Processed file: '.$fn.''; } -# ---------------------------- convert 'time' format into a datetime sql format -sub sqltime { - my $timef=shift @_; - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime($timef); - $mon++; $year+=1900; - return "$year-$mon-$mday $hour:$min:$sec"; +######################################### +######################################### + +sub coursedependencies { + my $url=&Apache::lonnet::declutter(shift); + $url=~s/\.meta$//; + my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___course'; + my %evaldata=&Apache::lonnet::dump('nohist_resevaldata',$adomain, + $aauthor,$regexp); + my %courses=(); + foreach (keys %evaldata) { + if ($_=~/^([a-zA-Z0-9]+_[a-zA-Z0-9]+)___.+___course$/) { + $courses{$1}=1; + } + } + return %courses; } +######################################### +######################################### + -# --------------------------------------------------------- Various form fields +=pod +=item Form-field-generating subroutines. + +For input parameters, these subroutines take in values +such as I<$name>, I<$value> and other form field metadata. +The output (scalar string that is returned) is an XHTML +string which presents the form field (foreseeably inside +
tags). + +=over 4 + +=item B + +=item B + +=item B + +=back + +=cut + +######################################### +######################################### sub textfield { my ($title,$name,$value)=@_; return "\n

$title:
". - ''; + ''; } sub hiddenfield { my ($name,$value)=@_; - return "\n".''; + return "\n".''; } sub selectbox { @@ -184,8 +328,19 @@ sub selectbox { return $selout.''; } -# -------------------------------------------------------- Publication Step One +######################################### +######################################### + +=pod + +=item B + +Fix up a url? First step of publication + +=cut +######################################### +######################################### sub urlfixup { my ($url,$target)=@_; unless ($url) { return ''; } @@ -206,7 +361,19 @@ sub urlfixup { return $url; } +######################################### +######################################### +=pod + +=item B + +Currently undocumented. + +=cut + +######################################### +######################################### sub absoluteurl { my ($url,$target)=@_; unless ($url) { return ''; } @@ -217,6 +384,19 @@ sub absoluteurl { return $url; } +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### sub set_allow { my ($allow,$logfile,$target,$tag,$oldurl)=@_; my $newurl=&urlfixup($oldurl,$target); @@ -235,6 +415,19 @@ sub set_allow { return $return_url } +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### sub get_subscribed_hosts { my ($target)=@_; my @subscribed; @@ -243,9 +436,10 @@ sub get_subscribed_hosts { my $srcf=$2; opendir(DIR,$1); while ($filename=readdir(DIR)) { - if ($filename=~/$srcf\.(\w+)$/) { + if ($filename=~/\Q$srcf\E\.(\w+)$/) { my $subhost=$1; - if ($subhost ne 'meta' && $subhost ne 'subscription') { + if (($subhost ne 'meta' && $subhost ne 'subscription') && + ($subhost ne $Apache::lonnet::perlvar{'lonHostID'})) { push(@subscribed,$subhost); } } @@ -256,30 +450,366 @@ sub get_subscribed_hosts { &Apache::lonnet::logthis("opened $target.subscription"); while (my $subline=<$sh>) { &Apache::lonnet::logthis("Trying $subline"); - if ($subline =~ /(^\w+):/) { push(@subscribed,$1); } else { + if ($subline =~ /(^\w+):/) { + if ($1 ne $Apache::lonnet::perlvar{'lonHostID'}) { + push(@subscribed,$1); + } + } else { &Apache::lonnet::logthis("No Match for $subline"); } } } else { - &Apache::lonnet::logthis("Un able to open $target.subscription"); + &Apache::lonnet::logthis("Unable to open $target.subscription"); } - &Apache::lonnet::logthis("Got list of ".join(':',@subscribed)); return @subscribed; } + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_max_ids_indices { + my ($content)=@_; + my $maxindex=10; + my $maxid=10; + my $needsfixup=0; + my $duplicateids=0; + + my %allids; + my %duplicatedids; + + my $parser=HTML::LCParser->new($content); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $counter; + if ($counter=$addid{$token->[1]}) { + if ($counter eq 'id') { + if (defined($token->[2]->{'id'})) { + $maxid=($token->[2]->{'id'}>$maxid)?$token->[2]->{'id'}:$maxid; + if (exists($allids{$token->[2]->{'id'}})) { + $duplicateids=1; + $duplicatedids{$token->[2]->{'id'}}=1; + } else { + $allids{$token->[2]->{'id'}}=1; + } + } else { + $needsfixup=1; + } + } else { + if (defined($token->[2]->{'index'})) { + $maxindex=($token->[2]->{'index'}>$maxindex)?$token->[2]->{'index'}:$maxindex; + } else { + $needsfixup=1; + } + } + } + } + } + return ($needsfixup,$maxid,$maxindex,$duplicateids, + (keys(%duplicatedids))); +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_all_text_unbalanced { + #there is a copy of this in lonxml.pm + my($tag,$pars)= @_; + my $token; + my $result=''; + $tag='<'.$tag.'>'; + while ($token = $$pars[-1]->get_token) { + if (($token->[0] eq 'T')||($token->[0] eq 'C')||($token->[0] eq 'D')) { + $result.=$token->[1]; + } elsif ($token->[0] eq 'PI') { + $result.=$token->[2]; + } elsif ($token->[0] eq 'S') { + $result.=$token->[4]; + } elsif ($token->[0] eq 'E') { + $result.=$token->[2]; + } + if ($result =~ /(.*)\Q$tag\E(.*)/s) { + #&Apache::lonnet::logthis('Got a winner with leftovers ::'.$2); + #&Apache::lonnet::logthis('Result is :'.$1); + $result=$1; + my $redo=$tag.$2; + push (@$pars,HTML::LCParser->new(\$redo)); + $$pars[-1]->xml_mode('1'); + last; + } + } + return $result +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +#Arguably this should all be done as a lonnet::ssi instead +sub fix_ids_and_indices { + my ($logfile,$source,$target)=@_; + + my %allow; + my $content; + { + my $org=Apache::File->new($source); + $content=join('',<$org>); + } + + my ($needsfixup,$maxid,$maxindex,$duplicateids,@duplicatedids)= + &get_max_ids_indices(\$content); + + print $logfile ("Got $needsfixup,$maxid,$maxindex,$duplicateids--". + join(', ',@duplicatedids)); + if ($duplicateids) { + print $logfile "Duplicate ID(s) exist, ".join(', ',@duplicatedids)."\n"; + my $outstring='Unable to publish file, it contains duplicated ID(s), ID(s) need to be unique. The duplicated ID(s) are: '.join(', ',@duplicatedids).''; + return ($outstring,1); + } + if ($needsfixup) { + print $logfile "Needs ID and/or index fixup\n". + "Max ID : $maxid (min 10)\n". + "Max Index: $maxindex (min 10)\n"; + } + my $outstring=''; + my @parser; + $parser[0]=HTML::LCParser->new(\$content); + $parser[-1]->xml_mode(1); + my $token; + while (@parser) { + while ($token=$parser[-1]->get_token) { + if ($token->[0] eq 'S') { + my $counter; + my $tag=$token->[1]; + my $lctag=lc($tag); + if ($lctag eq 'allow') { + $allow{$token->[2]->{'src'}}=1; + next; + } + my %parms=%{$token->[2]}; + $counter=$addid{$tag}; + if (!$counter) { $counter=$addid{$lctag}; } + if ($counter) { + if ($counter eq 'id') { + unless (defined($parms{'id'})) { + $maxid++; + $parms{'id'}=$maxid; + print $logfile 'ID: '.$tag.':'.$maxid."\n"; + } + } elsif ($counter eq 'index') { + unless (defined($parms{'index'})) { + $maxindex++; + $parms{'index'}=$maxindex; + print $logfile 'Index: '.$tag.':'.$maxindex."\n"; + } + } + } + foreach my $type ('src','href','background','bgimg') { + foreach my $key (keys(%parms)) { + if ($key =~ /^$type$/i) { + $parms{$key}=&set_allow(\%allow,$logfile, + $target,$tag, + $parms{$key}); + } + } + } + # probably a image type

Dependencies

'; @@ -463,15 +851,16 @@ sub publish { if ( &Apache::lonnet::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/'. $thisdep.'.meta') eq '-1') { - $scrout.= - ' - Currently not available'; + $scrout.= ' - Currently not available'. + ''; } else { my %temphash=(&Apache::lonnet::declutter($target).'___'. &Apache::lonnet::declutter($thisdep).'___usage' => time); $thisdep=~/^\/res\/(\w+)\/(\w+)\//; if ((defined($1)) && (defined($2))) { - &Apache::lonnet::put('resevaldata',\%temphash,$1,$2); + &Apache::lonnet::put('nohist_resevaldata',\%temphash, + $1,$2); } } } @@ -480,40 +869,34 @@ sub publish { #Encode any High ASCII characters $outstring=&HTML::Entities::encode($outstring,"\200-\377"); -# ------------------------------------------------------------- Write modified +# ------------------------------------------------------------- Write modified. { my $org; unless ($org=Apache::File->new('>'.$source)) { print $logfile "No write permit to $source\n"; - return - "No write permission to $source, FAIL"; + return ('No write permission to '.$source. + ', FAIL',1); } - print $org $outstring; + print($org $outstring); } $content=$outstring; - if ($needsfixup) { - print $logfile "End of ID and/or index fixup\n". - "Max ID : $maxid (min 10)\n". - "Max Index: $maxindex (min 10)\n"; - } else { - print $logfile "Does not need ID and/or index fixup\n"; - } } -# --------------------------------------------- Initial step done, now metadata +# -------------------------------------------- Initial step done, now metadata. -# ---------------------------------------- Storage for metadata keys and fields +# --------------------------------------- Storage for metadata keys and fields. %metadatafields=(); %metadatakeys=(); my %oldparmstores=(); - + unless ($batch) { $scrout.='

Metadata Information ' . Apache::loncommon::help_open_topic("Metadata_Description") . '

'; + } # ------------------------------------------------ First, check out environment unless (-e $source.'.meta') { @@ -574,63 +957,87 @@ sub publish { } # ---------------- Find and document discrepancies in the parameters and stores - my $chparms=''; - foreach (sort keys %metadatafields) { - if (($_=~/^parameter/) || ($_=~/^stores/)) { - unless ($_=~/\.\w+$/) { - unless ($oldparmstores{$_}) { - print $logfile 'New: '.$_."\n"; - $chparms.=$_.' '; - } - } - } - } - if ($chparms) { - $scrout.='

New parameters or stored values: '. - $chparms; - } + my $chparms=''; + foreach (sort keys %metadatafields) { + if (($_=~/^parameter/) || ($_=~/^stores/)) { + unless ($_=~/\.\w+$/) { + unless ($oldparmstores{$_}) { + print $logfile 'New: '.$_."\n"; + $chparms.=$_.' '; + } + } + } + } + if ($chparms) { + $scrout.='

New parameters or stored values: '.$chparms; + } - $chparms=''; - foreach (sort keys %oldparmstores) { - if (($_=~/^parameter/) || ($_=~/^stores/)) { - unless (($metadatafields{$_.'.name'}) || - ($metadatafields{$_.'.package'}) || ($_=~/\.\w+$/)) { - print $logfile 'Obsolete: '.$_."\n"; - $chparms.=$_.' '; - } - } - } - if ($chparms) { - $scrout.='

Obsolete parameters or stored values: '. - $chparms; - } + $chparms=''; + foreach (sort keys %oldparmstores) { + if (($_=~/^parameter/) || ($_=~/^stores/)) { + unless (($metadatafields{$_.'.name'}) || + ($metadatafields{$_.'.package'}) || ($_=~/\.\w+$/)) { + print $logfile 'Obsolete: '.$_."\n"; + $chparms.=$_.' '; + } + } + } + if ($chparms) { + $scrout.='

Obsolete parameters or stored values: '. + $chparms; + } # ------------------------------------------------------- Now have all metadata + my %keywords=(); + + if (length($content)<500000) { + my $textonly=$content; + $textonly=~s/\//g; + $textonly=~s/\[^\<]+\<\/m\>//g; + $textonly=~s/\<[^\>]*\>//g; + $textonly=~tr/A-Z/a-z/; + $textonly=~s/[\$\&][a-z]\w*//g; + $textonly=~s/[^a-z\s]//g; + + foreach ($textonly=~m/(\w+)/g) { + unless ($nokey{$_}) { + $keywords{$_}=1; + } + } + } + + + foreach (split(/\W+/,$metadatafields{'keywords'})) { + $keywords{$_}=1; + } +# --------------------------------------------------- Now we also have keywords +# ============================================================================= +# INTERACTIVE MODE +# + unless ($batch) { $scrout.= - '

'. - '

'. - &hiddenfield('phase','two'). - &hiddenfield('filename',$ENV{'form.filename'}). - &hiddenfield('allmeta',&Apache::lonnet::escape($allmeta)). - &hiddenfield('dependencies',join(',',keys %allow)). - &textfield('Title','title',$metadatafields{'title'}). - &textfield('Author(s)','author',$metadatafields{'author'}). - &textfield('Subject','subject',$metadatafields{'subject'}); + ''. + '

'. + &hiddenfield('phase','two'). + &hiddenfield('filename',$ENV{'form.filename'}). + &hiddenfield('allmeta',&Apache::lonnet::escape($allmeta)). + &hiddenfield('dependencies',join(',',keys %allow)). + &textfield('Title','title',$metadatafields{'title'}). + &textfield('Author(s)','author',$metadatafields{'author'}). + &textfield('Subject','subject',$metadatafields{'subject'}); # --------------------------------------------------- Scan content for keywords my $keywords_help = Apache::loncommon::help_open_topic("Publishing_Keywords"); my $keywordout=<<"END";