--- loncom/publisher/lonpublisher.pm 2000/11/30 23:01:41 1.7 +++ loncom/publisher/lonpublisher.pm 2009/11/24 00:57:11 1.249.2.1 @@ -1,461 +1,2256 @@ # The LearningOnline Network with CAPA # Publication Handler -# -# (TeX Content Handler # -# 05/29/00,05/30,10/11 Gerd Kortemeyer) +# $Id: lonpublisher.pm,v 1.249.2.1 2009/11/24 00:57:11 raeburn Exp $ # -# 11/28,11/29,11/30 Gerd Kortemeyer +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# +### + +############################################################################### +## ## +## ORGANIZATION OF THIS PERL MODULE ## +## ## +## 1. Modules used by this module ## +## 2. Various subroutines ## +## 3. Publication Step One ## +## 4. Phase Two ## +## 5. Main Handler ## +## ## +############################################################################### + + +###################################################################### +###################################################################### + +=pod + +=head1 NAME + +lonpublisher - LON-CAPA publishing handler + +=head1 SYNOPSIS + +B is used by B inside B. This is the +invocation by F: + + + PerlAccessHandler Apache::lonacc + SetHandler perl-script + PerlHandler Apache::lonpublisher + ErrorDocument 403 /adm/login + ErrorDocument 404 /adm/notfound.html + ErrorDocument 406 /adm/unauthorized.html + ErrorDocument 500 /adm/errorhandler + + +=head1 OVERVIEW + +Authors can only write-access the C space. They can +copy resources into the resource area through the publication step, +and move them back through a recover step. Authors do not have direct +write-access to their resource space. + +During the publication step, several events will be +triggered. Metadata is gathered, where a wizard manages default +entries on a hierarchical per-directory base: The wizard imports the +metadata (including access privileges and royalty information) from +the most recent published resource in the current directory, and if +that is not available, from the next directory above, etc. The Network +keeps all previous versions of a resource and makes them available by +an explicit version number, which is inserted between the file name +and extension, for example C, while the most recent +version does not carry a version number (C). Servers +subscribing to a changed resource are notified that a new version is +available. + +=head1 DESCRIPTION + +B takes the proper steps to add resources to the LON-CAPA +digital library. This includes updating the metadata table in the +LON-CAPA database. + +B is many things to many people. + +This module publishes a file. This involves gathering metadata, +versioning the file, copying file from construction space to +publication space, and copying metadata from construction space +to publication space. + +=head2 SUBROUTINES + +Many of the undocumented subroutines implement various magical +parsing shortcuts. + +=over 4 + +=cut + +###################################################################### +###################################################################### + package Apache::lonpublisher; +# ------------------------------------------------- modules used by this module use strict; use Apache::File; +use File::Copy; use Apache::Constants qw(:common :http :methods); -use HTML::TokeParser; +use HTML::LCParser; +use HTML::Entities; +use Encode::Encoder; use Apache::lonxml; -use Apache::lonhomework; +use Apache::loncacc; +use DBI; +use Apache::lonnet; +use Apache::loncommon(); +use Apache::lonhtmlcommon; +use Apache::lonmysql; +use Apache::lonlocal; +use Apache::loncfile; +use LONCAPA::lonmetadata; +use Apache::lonmsg; +use vars qw(%metadatafields %metadatakeys); +use LONCAPA qw(:DEFAULT :match); + my %addid; my %nokey; -my %metadatafields; -my %metadatakeys; +my $docroot; + +my $cuname; +my $cudom; + +my $registered_cleanup; +my $modified_urls; + +my $lock; + +=pod + +=item B + +Evaluates a string that contains metadata. This subroutine +stores values inside I<%metadatafields> and I<%metadatakeys>. +The hash key is a I<$unikey> corresponding to a unique id +that is descriptive of the parser location inside the XML tree. + +Parameters: + +=over 4 + +=item I<$metastring> + +A string that contains metadata. + +=back + +Returns: + +nothing + +=cut + +######################################### +######################################### +# +# Modifies global %metadatafields %metadatakeys +# sub metaeval { - my $metastring=shift; + my ($metastring,$prefix)=@_; - my $parser=HTML::TokeParser->new(\$metastring); - my $token; - while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; - } - map { - $metadatafields{$unikey.'.'.$_}=$token->[2]->{$_}; - if ($metadatakeys{$unikey}) { - $metadatakeys{$unikey}.=','.$_; - } else { - $metadatakeys{$unikey}=$_; - } - } @{$token->[3]}; - if ($metadatafields{$unikey}) { - $metadatafields{$unikey}.=','.$parser->get_text('/'.$entry); - } else { - $metadatafields{$unikey}=$parser->get_text('/'.$entry); - } - } - } + my $parser=HTML::LCParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + next if ($entry =~ m/^(?:parameter|stores)_/); + if (defined($token->[2]->{'package'})) { + $unikey.="\0package\0".$token->[2]->{'package'}; + } + if (defined($token->[2]->{'part'})) { + $unikey.="\0".$token->[2]->{'part'}; + } + if (defined($token->[2]->{'id'})) { + $unikey.="\0".$token->[2]->{'id'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.="\0".$token->[2]->{'name'}; + } + foreach (@{$token->[3]}) { + $metadatafields{$unikey.'.'.$_}=$token->[2]->{$_}; + if ($metadatakeys{$unikey}) { + $metadatakeys{$unikey}.=','.$_; + } else { + $metadatakeys{$unikey}=$_; + } + } + my $newentry=$parser->get_text('/'.$entry); + if (($entry eq 'customdistributionfile') || + ($entry eq 'sourcerights')) { + $newentry=~s/^\s*//; + if ($newentry !~m|^/res|) { $newentry=$prefix.$newentry; } + } +# actually store + if ( $entry eq 'rule' && exists($metadatafields{$unikey})) { + $metadatafields{$unikey}.=','.$newentry; + } else { + $metadatafields{$unikey}=$newentry; + } + } + } } +######################################### +######################################### + +=pod + +=item B + +Read a metadata file + +Parameters: + +=over + +=item I<$logfile> + +File output stream to output errors and warnings to. + +=item I<$fn> + +File name (including path). + +=back + +Returns: + +=over 4 + +=item Scalar string (if successful) + +XHTML text that indicates successful reading of the metadata. + +=back + +=cut + +######################################### +######################################### sub metaread { - my ($logfile,$fn)=@_; + my ($logfile,$fn,$prefix)=@_; unless (-e $fn) { - print $logfile 'No file '.$fn."\n"; - return '
No file: '.$fn.''; + print($logfile 'No file '.$fn."\n"); + return '
' + .&mt('No file: [_1]' + ,' '.&Apache::loncfile::display($fn).'
'); } - print $logfile 'Processing '.$fn."\n"; + print($logfile 'Processing '.$fn."\n"); my $metastring; { - my $metafh=Apache::File->new($fn); - $metastring=join('',<$metafh>); + my $metafh=Apache::File->new($fn); + $metastring=join('',<$metafh>); + } + &metaeval($metastring,$prefix); + return '
' + .&mt('Processed file: [_1]' + ,' '.&Apache::loncfile::display($fn).'
'); +} + +######################################### +######################################### + +sub coursedependencies { + my $url=&Apache::lonnet::declutter(shift); + $url=~s/\.meta$//; + my ($adomain,$aauthor)=($url=~ m{^($match_domain)/($match_username)/}); + my $regexp=quotemeta($url); + $regexp='___'.$regexp.'___course'; + my %evaldata=&Apache::lonnet::dump('nohist_resevaldata',$adomain, + $aauthor,$regexp); + my %courses=(); + foreach (keys %evaldata) { + if ($_=~/^([a-zA-Z0-9]+_[a-zA-Z0-9]+)___.+___course$/) { + $courses{$1}=1; + } + } + return %courses; +} +######################################### +######################################### + + +=pod + +=item Form-field-generating subroutines. + +For input parameters, these subroutines take in values +such as I<$name>, I<$value> and other form field metadata. +The output (scalar string that is returned) is an XHTML +string which presents the form field (foreseeably inside +
tags). + +=over 4 + +=item B + +=item B + +=item B + +=back + +=cut + +######################################### +######################################### +sub textfield { + my ($title,$name,$value,$noline)=@_; + $value=~s/^\s+//gs; + $value=~s/\s+$//gs; + $value=~s/\s+/ /gs; + $title=&mt($title); + $env{'form.'.$name}=$value; + return "\n".&Apache::lonhtmlcommon::row_title($title) + .'' + .&Apache::lonhtmlcommon::row_closure($noline); +} + +sub text_with_browse_field { + my ($title,$name,$value,$restriction,$noline)=@_; + $value=~s/^\s+//gs; + $value=~s/\s+$//gs; + $value=~s/\s+/ /gs; + $title=&mt($title); + $env{'form.'.$name}=$value; + return "\n".&Apache::lonhtmlcommon::row_title($title) + .'' + .'
' + .'' + .&mt('Select') + .' ' + .'' + .&mt('Search') + .'' + .&Apache::lonhtmlcommon::row_closure($noline); +} + +sub hiddenfield { + my ($name,$value)=@_; + $env{'form.'.$name}=$value; + return "\n".''; +} + +sub checkbox { + my ($name,$text)=@_; + return "\n
"; +} + +sub selectbox { + my ($title,$name,$value,$functionref,@idlist)=@_; + $title=&mt($title); + $value=(split(/\s*,\s*/,$value))[-1]; + if (defined($value)) { + $env{'form.'.$name}=$value; + } else { + $env{'form.'.$name}=$idlist[0]; } - &metaeval($metastring); - return '
Processed file: '.$fn.''; + my $selout="\n".&Apache::lonhtmlcommon::row_title($title) + .''.&Apache::lonhtmlcommon::row_closure(); + return $selout; } +sub select_level_form { + my ($value,$name)=@_; + $env{'form.'.$name}=$value; + if (!defined($value)) { $env{'form.'.$name}=0; } + return &Apache::loncommon::select_level_form($value,$name); +} +######################################### +######################################### + +=pod + +=item B + +Fix up a url? First step of publication + +=cut + +######################################### +######################################### +sub urlfixup { + my ($url,$target)=@_; + unless ($url) { return ''; } + #javascript code needs no fixing + if ($url =~ /^javascript:/i) { return $url; } + if ($url =~ /^mailto:/i) { return $url; } + #internal document links need no fixing + if ($url =~ /^\#/) { return $url; } + my ($host)=($url=~m{(?:(?:http|https|ftp)://)*([^/]+)}); + my @lonids = &Apache::lonnet::machine_ids($host); + if (@lonids) { + $url=~s{^(?:http|https|ftp)://}{}; + $url=~s/^\Q$host\E//; + } + if ($url=~m{^(?:http|https|ftp)://}) { return $url; } + $url=~s{\Q~$cuname\E}{res/$cudom/$cuname}; + return $url; +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented. + +=cut + +######################################### +######################################### +sub absoluteurl { + my ($url,$target)=@_; + unless ($url) { return ''; } + if ($target) { + $target=~s/\/[^\/]+$//; + $url=&Apache::lonnet::hreflocation($target,$url); + } + return $url; +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub set_allow { + my ($allow,$logfile,$target,$tag,$oldurl)=@_; + my $newurl=&urlfixup($oldurl,$target); + my $return_url=$oldurl; + print $logfile 'GUYURL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; + if ($newurl ne $oldurl) { + $return_url=$newurl; + print $logfile 'URL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; + } + if (($newurl !~ /^javascript:/i) && + ($newurl !~ /^mailto:/i) && + ($newurl !~ /^(?:http|https|ftp):/i) && + ($newurl !~ /^\#/)) { + $$allow{&absoluteurl($newurl,$target)}=1; + } + return $return_url; +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_subscribed_hosts { + my ($target)=@_; + my @subscribed; + my $filename; + $target=~/(.*)\/([^\/]+)$/; + my $srcf=$2; + opendir(DIR,$1); + # cycle through listed files, subscriptions used to exist + # as "filename.lonid" + while ($filename=readdir(DIR)) { + if ($filename=~/\Q$srcf\E\.($match_lonid)$/) { + my $subhost=$1; + if (($subhost ne 'meta' + && $subhost ne 'subscription' + && $subhost ne 'meta.subscription' + && $subhost ne 'tmp') && + ($subhost ne $Apache::lonnet::perlvar{'lonHostID'})) { + push(@subscribed,$subhost); + } + } + } + closedir(DIR); + my $sh; + if ( $sh=Apache::File->new("$target.subscription") ) { + while (my $subline=<$sh>) { + if ($subline =~ /^($match_lonid):/) { + if ($1 ne $Apache::lonnet::perlvar{'lonHostID'}) { + push(@subscribed,$1); + } + } + } + } + return @subscribed; +} + + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_max_ids_indices { + my ($content)=@_; + my $maxindex=10; + my $maxid=10; + my $needsfixup=0; + my $duplicateids=0; + + my %allids; + my %duplicatedids; + + my $parser=HTML::LCParser->new($content); + $parser->xml_mode(1); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $counter; + if ($counter=$addid{$token->[1]}) { + if ($counter eq 'id') { + if (defined($token->[2]->{'id'}) && + $token->[2]->{'id'} !~ /^\s*$/) { + $maxid=($token->[2]->{'id'}>$maxid)?$token->[2]->{'id'}:$maxid; + if (exists($allids{$token->[2]->{'id'}})) { + $duplicateids=1; + $duplicatedids{$token->[2]->{'id'}}=1; + } else { + $allids{$token->[2]->{'id'}}=1; + } + } else { + $needsfixup=1; + } + } else { + if (defined($token->[2]->{'index'}) && + $token->[2]->{'index'} !~ /^\s*$/) { + $maxindex=($token->[2]->{'index'}>$maxindex)?$token->[2]->{'index'}:$maxindex; + } else { + $needsfixup=1; + } + } + } + } + } + return ($needsfixup,$maxid,$maxindex,$duplicateids, + (keys(%duplicatedids))); +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_all_text_unbalanced { + #there is a copy of this in lonxml.pm + my($tag,$pars)= @_; + my $token; + my $result=''; + $tag='<'.$tag.'>'; + while ($token = $$pars[-1]->get_token) { + if (($token->[0] eq 'T')||($token->[0] eq 'C')||($token->[0] eq 'D')) { + $result.=$token->[1]; + } elsif ($token->[0] eq 'PI') { + $result.=$token->[2]; + } elsif ($token->[0] eq 'S') { + $result.=$token->[4]; + } elsif ($token->[0] eq 'E') { + $result.=$token->[2]; + } + if ($result =~ /\Q$tag\E/s) { + ($result,my $redo)=$result =~ /(.*)\Q$tag\E(.*)/is; + #&Apache::lonnet::logthis('Got a winner with leftovers ::'.$2); + #&Apache::lonnet::logthis('Result is :'.$1); + $redo=$tag.$redo; + push (@$pars,HTML::LCParser->new(\$redo)); + $$pars[-1]->xml_mode('1'); + last; + } + } + return $result +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +#Arguably this should all be done as a lonnet::ssi instead +sub fix_ids_and_indices { + my ($logfile,$source,$target)=@_; + + my %allow; + my $content; + { + my $org=Apache::File->new($source); + $content=join('',<$org>); + } + + my ($needsfixup,$maxid,$maxindex,$duplicateids,@duplicatedids)= + &get_max_ids_indices(\$content); + + print $logfile ("Got $needsfixup,$maxid,$maxindex,$duplicateids--". + join(', ',@duplicatedids)); + if ($duplicateids) { + print $logfile "Duplicate ID(s) exist, ".join(', ',@duplicatedids)."\n"; + my $outstring=''.&mt('Unable to publish file, it contains duplicated ID(s), ID(s) need to be unique. The duplicated ID(s) are').': '.join(', ',@duplicatedids).''; + return ($outstring,1); + } + if ($needsfixup) { + print $logfile "Needs ID and/or index fixup\n". + "Max ID : $maxid (min 10)\n". + "Max Index: $maxindex (min 10)\n"; + } + my $outstring=''; + my $responsecounter=1; + my @parser; + $parser[0]=HTML::LCParser->new(\$content); + $parser[-1]->xml_mode(1); + my $token; + while (@parser) { + while ($token=$parser[-1]->get_token) { + if ($token->[0] eq 'S') { + my $counter; + my $tag=$token->[1]; + my $lctag=lc($tag); + if ($lctag eq 'allow') { + $allow{$token->[2]->{'src'}}=1; + next; + } + if ($lctag eq 'base') { next; } + if (($lctag eq 'part') || ($lctag eq 'problem')) { + $responsecounter=0; + } + if ($lctag=~/response$/) { $responsecounter++; } + if ($lctag eq 'import') { $responsecounter++; } + my %parms=%{$token->[2]}; + $counter=$addid{$tag}; + if (!$counter) { $counter=$addid{$lctag}; } + if ($counter) { + if ($counter eq 'id') { + unless (defined($parms{'id'}) && + $parms{'id'}!~/^\s*$/) { + $maxid++; + $parms{'id'}=$maxid; + print $logfile 'ID(new) : '.$tag.':'.$maxid."\n"; + } else { + print $logfile 'ID(kept): '.$tag.':'.$parms{'id'}."\n"; + } + } elsif ($counter eq 'index') { + unless (defined($parms{'index'}) && + $parms{'index'}!~/^\s*$/) { + $maxindex++; + $parms{'index'}=$maxindex; + print $logfile 'Index: '.$tag.':'.$maxindex."\n"; + } + } + } + unless ($parms{'type'} eq 'zombie') { + foreach my $type ('src','href','background','bgimg') { + foreach my $key (keys(%parms)) { + if ($key =~ /^$type$/i) { + $parms{$key}=&set_allow(\%allow,$logfile, + $target,$tag, + $parms{$key}); + } + } + } + } + # probably a image type