File:  [LON-CAPA] / loncom / publisher / loncleanup.pm
Revision 1.6: download - view: text, annotated - select for diffs
Wed Jul 13 21:43:02 2005 UTC (18 years, 10 months ago) by www
Branches: MAIN
CVS tags: version_2_1_X, version_2_1_3, version_2_1_2, version_2_1_1, version_2_1_0, version_2_0_X, version_2_0_99_1, version_2_0_2, version_2_0_1, version_2_0_0, version_1_99_3, version_1_99_2, HEAD
Was not finding <img> if there was a slash in the URL (argh).

    1: # The LearningOnline Network with CAPA
    2: # Handler to cleanup XML files
    3: #
    4: # $Id: loncleanup.pm,v 1.6 2005/07/13 21:43:02 www Exp $
    5: #
    6: # Copyright Michigan State University Board of Trustees
    7: #
    8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    9: #
   10: # LON-CAPA is free software; you can redistribute it and/or modify
   11: # it under the terms of the GNU General Public License as published by
   12: # the Free Software Foundation; either version 2 of the License, or
   13: # (at your option) any later version.
   14: #
   15: # LON-CAPA is distributed in the hope that it will be useful,
   16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: # GNU General Public License for more details.
   19: #
   20: # You should have received a copy of the GNU General Public License
   21: # along with LON-CAPA; if not, write to the Free Software
   22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23: #
   24: # /home/httpd/html/adm/gpl.txt
   25: #
   26: # http://www.lon-capa.org/
   27: #
   28: #
   29: ###
   30: 
   31: package Apache::loncleanup;
   32: 
   33: use strict;
   34: use Apache::File;
   35: use File::Copy;
   36: use Apache::Constants qw(:common :http :methods);
   37: use Apache::loncacc;
   38: use Apache::loncommon();
   39: use Apache::lonlocal;
   40: use Apache::lonnet;
   41: 
   42: sub latextrans {
   43:     my $symbolfont=shift;
   44:     my %latexsymb=(
   45: 		   '&#177;' => '\pm',
   46: 		   '&#180;' => '\times',
   47: 		   '&#184;' => '\div',
   48: 		   '&#210;' => '(R)',
   49: 		   '&#211;' => '\copy',
   50: 		   '&#216;' => '\neg',
   51: 		   '&#226;' => '(R)',
   52: 		   '&#227;' => '\copy',
   53: 		   '&#166;' => 'f',
   54: 		   'A' => '\Alpha',
   55: 		   'B' => '\Beta',
   56: 		   'G' => '\Gamma',
   57: 		   'D' => '\Delta',
   58: 		   'E' => '\Epsilon',
   59: 		   'Z' => '\Zeta',
   60: 		   'H' => '\Eta',
   61: 		   'Q' => '\Theta',
   62: 		   'I' => '\Iota',
   63: 		   'K' => '\Kappa',
   64: 		   'L' => '\Lambda',
   65: 		   'M' => '\Mu',
   66: 		   'N' => '\Nu',
   67: 		   'X' => '\Xi',
   68: 		   'O' => '\Omicron',
   69: 		   'P' => '\Pi',
   70: 		   'R' => '\Rho',
   71: 		   'S' => '\Sigma',
   72: 		   'T' => '\Tau',
   73: 		   'U' => 'Y',
   74: 		   'F' => '\Phi',
   75: 		   'C' => '\Chi',
   76: 		   'Y' => '\Psi',
   77: 		   'W' => '\Omega',
   78: 		   'a' => '\alpha',
   79: 		   'b' => '\beta',
   80: 		   'g' => '\gamma',
   81: 		   'd' => '\delta',
   82: 		   'e' => '\epsilon',
   83: 		   'z' => '\zeta',
   84: 		   'h' => '\eta',
   85: 		   'q' => '\theta',
   86: 		   'i' => '\iota',
   87: 		   'k' => '\kappa',
   88: 		   'l' => '\lambda',
   89: 		   'm' => '\mu',
   90: 		   'n' => '\nu',
   91: 		   'x' => '\xi',
   92: 		   'o' => '\omicron',
   93: 		   'p' => '\pi',
   94: 		   'r' => '\rho',
   95: 		   'V' => '\sigmaf',
   96: 		   's' => '\sigma',
   97: 		   't' => '\tau',
   98: 		   'u' => '\upsilon',
   99: 		   'f' => '\phi',
  100: 		   'c' => '\chi',
  101: 		   'y' => '\psi',
  102: 		   'w' => '\omega',
  103: 		   'J' => '\vartheta',
  104: 		   'j' => '\varphi',
  105: 		   'v' => '\varpi',
  106: 		   '&#161;' => '\Upsilon',
  107: 		   '&#162;' => "'",
  108: 		   '&#164;' => '/',
  109: 		   '&#178;' => '"',
  110: 		   '&#188;' => '\ldots',
  111: 		   '&#192;' => '\aleph',
  112: 		   '&#193;' => '\Im',
  113: 		   '&#194;' => '\Re',
  114: 		   '&#195;' => '\wp',
  115: 		   '&#212;' => '^{TM}',
  116: 		   '&#228;' => '^{TM}',
  117: 		   '&#240;' => 'EUR',
  118: 		   '&#171;' => '\leftrightarrow',
  119: 		   '&#172;' => '\leftarrow',
  120: 		   '&#173;' => '\uparrow',
  121: 		   '&#174;' => '\rightarrow',
  122: 		   '&#175;' => '\downarraw',
  123: 		   '&#191;' => '\hookleftarrow',
  124: 		   '&#219;' => '\Leftrightarrow',
  125: 		   '&#220;' => '\Leftarrow',
  126: 		   '&#221;' => '\Uparrow',
  127: 		   '&#222;' => '\Rightarrow',
  128: 		   '&#223;' => '\Downarrow',
  129: 		   '&#34;' => '\forall',
  130: 		   '&#36;' => '\exists',
  131: 		   '&#39;' => '\ni',
  132: 		   '&#42;' => '\ast',
  133: 		   '&#45;' => '-',
  134: 		   '&#64;' => '\cong',
  135: 		   '&#92;' => '\therefore',
  136: 		   '&#94;' => '\perp',
  137: 		   '&#126;' => '\sim',
  138: 		   '&#163;' => '\leq',
  139: 		   '&#165;' => '\infty',
  140: 		   '&#179;' => '\geq',
  141: 		   '&#181;' => '\propto',
  142: 		   '&#182;' => '\partial',
  143: 		   '&#183;' => '\cdot',
  144: 		   '&#185;' => '\not=',
  145: 		   '&#186;' => '\equiv',
  146: 		   '&#187;' => '\approx',
  147: 		   '&#196;' => '\otimes',
  148: 		   '&#197;' => '\oplus',
  149: 		   '&#198;' => '\emptyset',
  150: 		   '&#199;' => '\cap',
  151: 		   '&#200;' => '\cup',
  152: 		   '&#201;' => '\supset',
  153: 		   '&#202;' => '\supseteq',
  154: 		   '&#203;' => '\not\subset',
  155: 		   '&#204;' => '\subset',
  156: 		   '&#205;' => '\subseteq',
  157: 		   '&#206;' => '\in',
  158: 		   '&#207;' => '\not\in',
  159: 		   '&#208;' => '\angle',
  160: 		   '&#209;' => '\nabla',
  161: 		   '&#213;' => '\prod',
  162: 		   '&#214;' => '\surd',
  163: 		   '&#215;' => '\cdot',
  164: 		   '&#217;' => '\wedge',
  165: 		   '&#218;' => '\wee',
  166: 		   '&#229;' => '\sum',
  167: 		   '&#242;' => '\int',
  168: 		   '&#225;' => '\langle',
  169: 		   '&#241;' => '\rangle',
  170: 		   '&#224;' => '\diamondsuit',
  171: 		   '&#167;' => '\clubsuit',
  172: 		   '&#168;' => '\diamondsuit',
  173: 		   '&#169;' => '\heartsuit',
  174: 		   '&#170;' => '\spadesuit'
  175: 		   );
  176:     my $output='';
  177:     my $char='';
  178:     my $entitymode=0;
  179:     for (my $i=0; $i<length($symbolfont); $i++) {
  180:         my $newchar=substr($symbolfont,$i,1);
  181:         $char.=$newchar;
  182:         if ($newchar eq '&') { $entitymode=1; }
  183:         if (($entitymode) && ($newchar ne ';')) { next; }
  184:         my $latex=$latexsymb{$char};
  185: 	if ($latex) {
  186: 	    $output.=$latex;
  187: 	} else {
  188: 	    $output.=$char;
  189: 	}
  190:         $char='';
  191:         $entitymode=0;
  192:     }
  193:     return $output;
  194: }
  195: 
  196: sub insidetrans {
  197:     my @args=@_;
  198:     return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
  199: }
  200: 
  201: sub symbolfontreplace {
  202:     my $text=shift;
  203:     my @fragments=split(/\<\/font\>/si,$text);
  204:     for (my $i=0; $i<=$#fragments;$i++) {
  205: 	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
  206:     }
  207:     return join('</font>',@fragments);
  208: }
  209: 
  210: sub htmlclean {
  211:     my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
  212: # Take care of CRLF etc
  213:     unless ($blocklinefeed) {
  214: 	$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
  215: 	$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
  216: 	$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
  217: 	$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
  218:     }
  219: # Generate empty tags, remove wrong end tags
  220:     unless ($blockemptytags) {
  221: 	$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
  222: 	$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
  223: 	$raw=~s/\/ \/\>/\/\>/gs;
  224: 	unless ($full) {
  225: 	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
  226: 	}
  227:     }
  228: # Make standard tags lowercase
  229:     unless ($blocklowercasing) {
  230: 	foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
  231: 		 'table','tr','td','th','p','br','hr','img','embed','font',
  232: 		 'a','strong','center','title','basefont','li','ol','ul',
  233: 		 'input','select','form','option','script','pre') {
  234: 	    $raw=~s/\<$_\s*\>/\<$_\>/gis;
  235: 	    $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
  236: 	    $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
  237: 	}
  238:     }
  239: # Replace <font face="symbol">
  240:     unless ($blockdesymboling) {
  241: 	$raw=&symbolfontreplace($raw);
  242:     }
  243:     return $raw;
  244: }
  245: 
  246: sub phaseone {
  247:     my ($r,$fn,$uname,$udom)=@_;
  248:     $r->print(&mt('Select actions to attempt:').
  249: 	      '<br /><input type="checkbox" name="linefeed" checked="checked" /> '.
  250: 	      &mt('Linefeeds, formfeeds, and carriage returns').
  251: 	      '<br /><input type="checkbox" name="empty" checked="checked" /> '.
  252: 	      &mt('Empty tags').
  253: 	      '<br /><input type="checkbox" name="lower" checked="checked" /> '.
  254: 	      &mt('Lower casing').
  255: 	      '<br /><input type="checkbox" name="symbol"checked="checked" /> '.
  256: 	      &mt('Symbol font').
  257: 	      '<input type="hidden" name="phase" value="two" />'.
  258: 	      '<p><input type="submit" value="'.&mt('Cleanup').'" /></p>');
  259: }
  260: 
  261: sub phasetwo {
  262:     my ($r,$fn,$uname,$udom)=@_;
  263:     open(IN,'/home/'.$uname.'/public_html/'.$fn);
  264:     my $text='';
  265:     while (my $line=<IN>) {
  266: 	$text.=$line;
  267:     }
  268:     close(IN);
  269:     my $uri='/~'.$uname.$fn;
  270:     my $result=&Apache::lonnet::ssi_body($uri,
  271: 					 ('grade_target'=>'web',
  272: 					  'return_only_error_and_warning_counts' => 1));
  273:     my ($errorcount,$warningcount)=split(':',$result);
  274:     $r->print(&mt('Original file').': '.
  275: 	      $errorcount.' '.&mt('error(s)').', '.
  276: 	      $warningcount.' '.&mt('warning(s)'));
  277:     $text=&htmlclean($text,1,
  278:                ($env{'form.linefeed'} ne 'on'),
  279:                ($env{'form.empty'} ne 'on'),
  280:                ($env{'form.lower'} ne 'on'),
  281:                ($env{'form.symbol'} ne 'on'));
  282:     my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
  283:     my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
  284:     open(OUT,'>/home/'.$uname.'/public_html'.$newfn);
  285:     print OUT $text;
  286:     close(OUT);
  287:     my $newuri='/~'.$uname.$newfn;
  288:     $result=&Apache::lonnet::ssi_body($newuri,
  289: 					 ('grade_target'=>'web',
  290: 					  'return_only_error_and_warning_counts' => 1));
  291:     ($errorcount,$warningcount)=split(':',$result);
  292:     $r->print('<br />'.&mt('Cleaned up file').': '.
  293: 	      $errorcount.' '.&mt('error(s)').', '.
  294: 	      $warningcount.' '.&mt('warning(s)').
  295:               '<br /><a href="'.$newuri.'" target="prev">'.
  296: 	      &mt('Open (and edit) cleaned up file in new window').'</a>'.
  297:               '<br /><a href="/adm/diff?filename='.&Apache::lonnet::escape($uri).
  298: 	      '&versionone=priv&filetwo='.
  299: 	      &Apache::lonnet::escape($newuri).'" target="prev">'.
  300: 	      &mt('Show diffs in new window').'</a><br />'.
  301: 	      '<input type="hidden" name="phase" value="three" />'.
  302: 	      '<input type="submit" name="accept" value="'.&mt('Accept Result').'" />'.
  303: 	      '<input type="submit" name="reject" value="'.&mt('Reject Result').'" />'
  304: 	      );
  305: }
  306: 
  307: sub phasethree {
  308:     my ($r,$fn,$uname,$udom)=@_;
  309:     my $old='/home/'.$uname.'/public_html/'.$fn;
  310:     my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
  311:     my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
  312:     my $new='/home/'.$uname.'/public_html'.$newfn;
  313:     if ($env{'form.accept'}) {
  314: 	$r->print(&mt('Accepting changes'));
  315:         move($new,$old);
  316:     } else {
  317: 	$r->print(&mt('Rejeting changes'));
  318:         unlink($new);
  319:     }
  320: }
  321: 
  322: # ---------------------------------------------------------------- Main Handler
  323: sub handler {
  324: 
  325:     my $r=shift;
  326:     my $fn='';
  327: 
  328: # Get query string for limited number of parameters
  329: 
  330:     &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
  331: 					    ['filename']);
  332: 
  333:     if ($env{'form.filename'}) {
  334: 	$fn=$env{'form.filename'};
  335: 	$fn=~s/^http\:\/\/[^\/]+//;
  336:     } else {
  337: 	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
  338: 		       ' unspecified filename for cleanup', $r->filename); 
  339: 	return HTTP_NOT_FOUND;
  340:     }
  341: 
  342:     unless ($fn) { 
  343: 	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
  344: 		       ' trying to cleanup non-existing file', $r->filename); 
  345: 	return HTTP_NOT_FOUND;
  346:     } 
  347: 
  348: # ----------------------------------------------------------- Start page output
  349:     my $uname;
  350:     my $udom;
  351: 
  352:     ($uname,$udom)=
  353: 	&Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
  354:     unless (($uname) && ($udom)) {
  355: 	$r->log_reason($uname.' at '.$udom.
  356: 		       ' trying to cleanup file '.$env{'form.filename'}.
  357: 		       ' ('.$fn.') - not authorized', 
  358: 		       $r->filename); 
  359: 	return HTTP_NOT_ACCEPTABLE;
  360:     }
  361: 
  362:     $fn=~s/\/\~(\w+)//;
  363: 
  364:     &Apache::loncommon::content_type($r,'text/html');
  365:     $r->send_http_header;
  366: 
  367:     my $html=&Apache::lonxml::xmlbegin();
  368:     $r->print($html.'<head><title>LON-CAPA Construction Space</title></head>');
  369: 
  370:     $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));
  371:     $r->print('<h2>'.$fn.'</h2>'.
  372:               '<form action="/adm/cleanup" method="post">'.
  373:               '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
  374:     unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
  375: 	$r->print(&mt('Cannot cleanup this filetype'));
  376:     } else {
  377: 	if ($env{'form.phase'} eq 'three') {
  378: 	    &phasethree($r,$fn,$uname,$udom);
  379: 	} elsif ($env{'form.phase'} eq 'two') {
  380: 	    &phasetwo($r,$fn,$uname,$udom);
  381: 	} else {
  382: 	    &phaseone($r,$fn,$uname,$udom);
  383: 	}
  384:     }
  385:     my $dir=$fn;
  386:     $dir=~s/\/[^\/]+$/\//;
  387:     $r->print('</form>'.
  388: 	      '<br /><a href="/priv/'.$uname.'/'.$fn.'">'.&mt('Back to Source File').'</a>'.
  389:               '<br /><a href="/priv/'.$uname.'/'.$dir.'">'.&mt('Back to Source Directory').'</a>'.
  390: 	      '</body></html>');
  391:     return OK;  
  392: }
  393: 
  394: 1;
  395: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>