File:  [LON-CAPA] / loncom / publisher / loncleanup.pm
Revision 1.9: download - view: text, annotated - select for diffs
Wed Dec 20 22:41:08 2006 UTC (17 years, 4 months ago) by albertel
Branches: MAIN
CVS tags: version_2_7_X, version_2_7_1, version_2_7_0, version_2_6_X, version_2_6_99_1, version_2_6_99_0, version_2_6_3, version_2_6_2, version_2_6_1, version_2_6_0, version_2_5_X, version_2_5_99_1, version_2_5_99_0, version_2_5_2, version_2_5_1, version_2_5_0, version_2_4_X, version_2_4_99_0, version_2_4_2, version_2_4_1, version_2_4_0, version_2_3_X, version_2_3_99_0, version_2_3_2, version_2_3_1, version_2_3_0, version_2_2_99_1, HEAD
- bunch of \w replacements

    1: # The LearningOnline Network with CAPA
    2: # Handler to cleanup XML files
    3: #
    4: # $Id: loncleanup.pm,v 1.9 2006/12/20 22:41:08 albertel Exp $
    5: #
    6: # Copyright Michigan State University Board of Trustees
    7: #
    8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    9: #
   10: # LON-CAPA is free software; you can redistribute it and/or modify
   11: # it under the terms of the GNU General Public License as published by
   12: # the Free Software Foundation; either version 2 of the License, or
   13: # (at your option) any later version.
   14: #
   15: # LON-CAPA is distributed in the hope that it will be useful,
   16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: # GNU General Public License for more details.
   19: #
   20: # You should have received a copy of the GNU General Public License
   21: # along with LON-CAPA; if not, write to the Free Software
   22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23: #
   24: # /home/httpd/html/adm/gpl.txt
   25: #
   26: # http://www.lon-capa.org/
   27: #
   28: #
   29: ###
   30: 
   31: package Apache::loncleanup;
   32: 
   33: use strict;
   34: use Apache::File;
   35: use File::Copy;
   36: use Apache::Constants qw(:common :http :methods);
   37: use Apache::loncacc;
   38: use Apache::loncommon();
   39: use Apache::lonlocal;
   40: use Apache::lonnet;
   41: use lib '/home/httpd/lib/perl/';
   42: use LONCAPA;
   43:  
   44: 
   45: sub latextrans {
   46:     my $symbolfont=shift;
   47:     my %latexsymb=(
   48: 		   '±' => '\pm',
   49: 		   '´' => '\times',
   50: 		   '¸' => '\div',
   51: 		   'Ò' => '(R)',
   52: 		   'Ó' => '\copy',
   53: 		   'Ø' => '\neg',
   54: 		   'â' => '(R)',
   55: 		   'ã' => '\copy',
   56: 		   '¦' => 'f',
   57: 		   'A' => '\Alpha',
   58: 		   'B' => '\Beta',
   59: 		   'G' => '\Gamma',
   60: 		   'D' => '\Delta',
   61: 		   'E' => '\Epsilon',
   62: 		   'Z' => '\Zeta',
   63: 		   'H' => '\Eta',
   64: 		   'Q' => '\Theta',
   65: 		   'I' => '\Iota',
   66: 		   'K' => '\Kappa',
   67: 		   'L' => '\Lambda',
   68: 		   'M' => '\Mu',
   69: 		   'N' => '\Nu',
   70: 		   'X' => '\Xi',
   71: 		   'O' => '\Omicron',
   72: 		   'P' => '\Pi',
   73: 		   'R' => '\Rho',
   74: 		   'S' => '\Sigma',
   75: 		   'T' => '\Tau',
   76: 		   'U' => 'Y',
   77: 		   'F' => '\Phi',
   78: 		   'C' => '\Chi',
   79: 		   'Y' => '\Psi',
   80: 		   'W' => '\Omega',
   81: 		   'a' => '\alpha',
   82: 		   'b' => '\beta',
   83: 		   'g' => '\gamma',
   84: 		   'd' => '\delta',
   85: 		   'e' => '\epsilon',
   86: 		   'z' => '\zeta',
   87: 		   'h' => '\eta',
   88: 		   'q' => '\theta',
   89: 		   'i' => '\iota',
   90: 		   'k' => '\kappa',
   91: 		   'l' => '\lambda',
   92: 		   'm' => '\mu',
   93: 		   'n' => '\nu',
   94: 		   'x' => '\xi',
   95: 		   'o' => '\omicron',
   96: 		   'p' => '\pi',
   97: 		   'r' => '\rho',
   98: 		   'V' => '\sigmaf',
   99: 		   's' => '\sigma',
  100: 		   't' => '\tau',
  101: 		   'u' => '\upsilon',
  102: 		   'f' => '\phi',
  103: 		   'c' => '\chi',
  104: 		   'y' => '\psi',
  105: 		   'w' => '\omega',
  106: 		   'J' => '\vartheta',
  107: 		   'j' => '\varphi',
  108: 		   'v' => '\varpi',
  109: 		   '¡' => '\Upsilon',
  110: 		   '¢' => "'",
  111: 		   '¤' => '/',
  112: 		   '²' => '"',
  113: 		   '¼' => '\ldots',
  114: 		   'À' => '\aleph',
  115: 		   'Á' => '\Im',
  116: 		   'Â' => '\Re',
  117: 		   'Ã' => '\wp',
  118: 		   'Ô' => '^{TM}',
  119: 		   'ä' => '^{TM}',
  120: 		   'ð' => 'EUR',
  121: 		   '«' => '\leftrightarrow',
  122: 		   '¬' => '\leftarrow',
  123: 		   '­' => '\uparrow',
  124: 		   '®' => '\rightarrow',
  125: 		   '¯' => '\downarraw',
  126: 		   '¿' => '\hookleftarrow',
  127: 		   'Û' => '\Leftrightarrow',
  128: 		   'Ü' => '\Leftarrow',
  129: 		   'Ý' => '\Uparrow',
  130: 		   'Þ' => '\Rightarrow',
  131: 		   'ß' => '\Downarrow',
  132: 		   '"' => '\forall',
  133: 		   '$' => '\exists',
  134: 		   ''' => '\ni',
  135: 		   '*' => '\ast',
  136: 		   '-' => '-',
  137: 		   '@' => '\cong',
  138: 		   '\' => '\therefore',
  139: 		   '^' => '\perp',
  140: 		   '~' => '\sim',
  141: 		   '£' => '\leq',
  142: 		   '¥' => '\infty',
  143: 		   '³' => '\geq',
  144: 		   'µ' => '\propto',
  145: 		   '¶' => '\partial',
  146: 		   '·' => '\cdot',
  147: 		   '¹' => '\not=',
  148: 		   'º' => '\equiv',
  149: 		   '»' => '\approx',
  150: 		   'Ä' => '\otimes',
  151: 		   'Å' => '\oplus',
  152: 		   'Æ' => '\emptyset',
  153: 		   'Ç' => '\cap',
  154: 		   'È' => '\cup',
  155: 		   'É' => '\supset',
  156: 		   'Ê' => '\supseteq',
  157: 		   'Ë' => '\not\subset',
  158: 		   'Ì' => '\subset',
  159: 		   'Í' => '\subseteq',
  160: 		   'Î' => '\in',
  161: 		   'Ï' => '\not\in',
  162: 		   'Ð' => '\angle',
  163: 		   'Ñ' => '\nabla',
  164: 		   'Õ' => '\prod',
  165: 		   'Ö' => '\surd',
  166: 		   '×' => '\cdot',
  167: 		   'Ù' => '\wedge',
  168: 		   'Ú' => '\wee',
  169: 		   'å' => '\sum',
  170: 		   'ò' => '\int',
  171: 		   'á' => '\langle',
  172: 		   'ñ' => '\rangle',
  173: 		   'à' => '\diamondsuit',
  174: 		   '§' => '\clubsuit',
  175: 		   '¨' => '\diamondsuit',
  176: 		   '©' => '\heartsuit',
  177: 		   'ª' => '\spadesuit'
  178: 		   );
  179:     my $output='';
  180:     my $char='';
  181:     my $entitymode=0;
  182:     for (my $i=0; $i<length($symbolfont); $i++) {
  183:         my $newchar=substr($symbolfont,$i,1);
  184:         $char.=$newchar;
  185:         if ($newchar eq '&') { $entitymode=1; }
  186:         if (($entitymode) && ($newchar ne ';')) { next; }
  187:         my $latex=$latexsymb{$char};
  188: 	if ($latex) {
  189: 	    $output.=$latex;
  190: 	} else {
  191: 	    $output.=$char;
  192: 	}
  193:         $char='';
  194:         $entitymode=0;
  195:     }
  196:     return $output;
  197: }
  198: 
  199: sub insidetrans {
  200:     my @args=@_;
  201:     return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
  202: }
  203: 
  204: sub symbolfontreplace {
  205:     my $text=shift;
  206:     my @fragments=split(/\<\/font\>/si,$text);
  207:     for (my $i=0; $i<=$#fragments;$i++) {
  208: 	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
  209:     }
  210:     return join('</font>',@fragments);
  211: }
  212: 
  213: sub htmlclean {
  214:     my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
  215: # Take care of CRLF etc
  216:     unless ($blocklinefeed) {
  217: 	$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
  218: 	$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
  219: 	$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
  220: 	$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
  221:     }
  222: # Generate empty tags, remove wrong end tags
  223:     unless ($blockemptytags) {
  224: 	$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
  225: 	$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
  226: 	$raw=~s/\/ \/\>/\/\>/gs;
  227: 	unless ($full) {
  228: 	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
  229: 	}
  230:     }
  231: # Make standard tags lowercase
  232:     unless ($blocklowercasing) {
  233: 	foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
  234: 		 'table','tr','td','th','p','br','hr','img','embed','font',
  235: 		 'a','strong','center','title','basefont','li','ol','ul',
  236: 		 'input','select','form','option','script','pre') {
  237: 	    $raw=~s/\<$_\s*\>/\<$_\>/gis;
  238: 	    $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
  239: 	    $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
  240: 	}
  241:     }
  242: # Replace <font face="symbol">
  243:     unless ($blockdesymboling) {
  244: 	$raw=&symbolfontreplace($raw);
  245:     }
  246:     return $raw;
  247: }
  248: 
  249: sub phaseone {
  250:     my ($r,$fn,$uname,$udom)=@_;
  251:     $r->print(&mt('Select actions to attempt:').
  252: 	      '<br /><input type="checkbox" name="linefeed" checked="checked" /> '.
  253: 	      &mt('Linefeeds, formfeeds, and carriage returns').
  254: 	      '<br /><input type="checkbox" name="empty" checked="checked" /> '.
  255: 	      &mt('Empty tags').
  256: 	      '<br /><input type="checkbox" name="lower" checked="checked" /> '.
  257: 	      &mt('Lower casing').
  258: 	      '<br /><input type="checkbox" name="symbol"checked="checked" /> '.
  259: 	      &mt('Symbol font').
  260: 	      '<input type="hidden" name="phase" value="two" />'.
  261: 	      '<p><input type="submit" value="'.&mt('Cleanup').'" /></p>');
  262: }
  263: 
  264: sub phasetwo {
  265:     my ($r,$fn,$uname,$udom)=@_;
  266:     open(IN,'/home/'.$uname.'/public_html/'.$fn);
  267:     my $text='';
  268:     while (my $line=<IN>) {
  269: 	$text.=$line;
  270:     }
  271:     close(IN);
  272:     my $uri='/~'.$uname.$fn;
  273:     my $result=&Apache::lonnet::ssi_body($uri,
  274: 					 ('grade_target'=>'web',
  275: 					  'return_only_error_and_warning_counts' => 1));
  276:     my ($errorcount,$warningcount)=split(':',$result);
  277:     $r->print(&mt('Original file').': '.
  278: 	      $errorcount.' '.&mt('error(s)').', '.
  279: 	      $warningcount.' '.&mt('warning(s)'));
  280:     $text=&htmlclean($text,1,
  281:                ($env{'form.linefeed'} ne 'on'),
  282:                ($env{'form.empty'} ne 'on'),
  283:                ($env{'form.lower'} ne 'on'),
  284:                ($env{'form.symbol'} ne 'on'));
  285:     my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
  286:     my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
  287:     open(OUT,'>/home/'.$uname.'/public_html'.$newfn);
  288:     print OUT $text;
  289:     close(OUT);
  290:     my $newuri='/~'.$uname.$newfn;
  291:     $result=&Apache::lonnet::ssi_body($newuri,
  292: 					 ('grade_target'=>'web',
  293: 					  'return_only_error_and_warning_counts' => 1));
  294:     ($errorcount,$warningcount)=split(':',$result);
  295:     $r->print('<br />'.&mt('Cleaned up file').': '.
  296: 	      $errorcount.' '.&mt('error(s)').', '.
  297: 	      $warningcount.' '.&mt('warning(s)').
  298:               '<br /><a href="'.$newuri.'" target="prev">'.
  299: 	      &mt('Open (and edit) cleaned up file in new window').'</a>'.
  300:               '<br /><a href="/adm/diff?filename='.&escape($uri).
  301: 	      '&versionone=priv&filetwo='.
  302: 	      &escape($newuri).'" target="prev">'.
  303: 	      &mt('Show diffs in new window').'</a><br />'.
  304: 	      '<input type="hidden" name="phase" value="three" />'.
  305: 	      '<input type="submit" name="accept" value="'.&mt('Accept Result').'" />'.
  306: 	      '<input type="submit" name="reject" value="'.&mt('Reject Result').'" />'
  307: 	      );
  308: }
  309: 
  310: sub phasethree {
  311:     my ($r,$fn,$uname,$udom)=@_;
  312:     my $old='/home/'.$uname.'/public_html/'.$fn;
  313:     my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
  314:     my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
  315:     my $new='/home/'.$uname.'/public_html'.$newfn;
  316:     if ($env{'form.accept'}) {
  317: 	$r->print(&mt('Accepting changes'));
  318:         move($new,$old);
  319:     } else {
  320: 	$r->print(&mt('Rejeting changes'));
  321:         unlink($new);
  322:     }
  323: }
  324: 
  325: # ---------------------------------------------------------------- Main Handler
  326: sub handler {
  327: 
  328:     my $r=shift;
  329:     my $fn='';
  330: 
  331: # Get query string for limited number of parameters
  332: 
  333:     &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
  334: 					    ['filename']);
  335: 
  336:     if ($env{'form.filename'}) {
  337: 	$fn=$env{'form.filename'};
  338: 	$fn=~s/^http\:\/\/[^\/]+//;
  339:     } else {
  340: 	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
  341: 		       ' unspecified filename for cleanup', $r->filename); 
  342: 	return HTTP_NOT_FOUND;
  343:     }
  344: 
  345:     unless ($fn) { 
  346: 	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
  347: 		       ' trying to cleanup non-existing file', $r->filename); 
  348: 	return HTTP_NOT_FOUND;
  349:     } 
  350: 
  351: # ----------------------------------------------------------- Start page output
  352:     my $uname;
  353:     my $udom;
  354: 
  355:     ($uname,$udom)=
  356: 	&Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
  357:     unless (($uname) && ($udom)) {
  358: 	$r->log_reason($uname.' at '.$udom.
  359: 		       ' trying to cleanup file '.$env{'form.filename'}.
  360: 		       ' ('.$fn.') - not authorized', 
  361: 		       $r->filename); 
  362: 	return HTTP_NOT_ACCEPTABLE;
  363:     }
  364: 
  365:     $fn=~s{/~($LONCAPA::username_re)}{};
  366: 
  367:     &Apache::loncommon::content_type($r,'text/html');
  368:     $r->send_http_header;
  369: 
  370:     $r->print(&Apache::loncommon::start_page('Cleanup XML Document'));
  371:     $r->print('<h2>'.$fn.'</h2>'.
  372:               '<form action="/adm/cleanup" method="post">'.
  373:               '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
  374:     unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
  375: 	$r->print(&mt('Cannot cleanup this filetype'));
  376:     } else {
  377: 	if ($env{'form.phase'} eq 'three') {
  378: 	    &phasethree($r,$fn,$uname,$udom);
  379: 	} elsif ($env{'form.phase'} eq 'two') {
  380: 	    &phasetwo($r,$fn,$uname,$udom);
  381: 	} else {
  382: 	    &phaseone($r,$fn,$uname,$udom);
  383: 	}
  384:     }
  385:     my $dir=$fn;
  386:     $dir=~s/\/[^\/]+$/\//;
  387:     $r->print('</form>'.
  388: 	      '<br /><a href="/priv/'.$uname.'/'.$fn.'">'.&mt('Back to Source File').'</a>'.
  389:               '<br /><a href="/priv/'.$uname.'/'.$dir.'">'.&mt('Back to Source Directory').'</a>'.
  390: 	      &Apache::loncommon::end_page());
  391:     return OK;  
  392: }
  393: 
  394: 1;
  395: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>