File:  [LON-CAPA] / loncom / publisher / loncleanup.pm
Revision 1.1: download - view: text, annotated - select for diffs
Sat May 28 01:32:33 2005 UTC (18 years, 11 months ago) by www
Branches: MAIN
CVS tags: HEAD
Put all XML/HTML cleanup into separate handler

# The LearningOnline Network with CAPA
# Handler to cleanup XML files
#
# $Id: loncleanup.pm,v 1.1 2005/05/28 01:32:33 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#
###

package Apache::loncleanup;

use strict;
use Apache::File;
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use Apache::loncacc;
use Apache::loncommon();
use Apache::lonlocal;
use Apache::lonnet;

sub latextrans {
    my $symbolfont=shift;
    my %latexsymb=(
		   '±' => '\pm',
		   '´' => '\times',
		   '¸' => '\div',
		   'Ò' => '(R)',
		   'Ó' => '\copy',
		   'Ø' => '\neg',
		   'â' => '(R)',
		   'ã' => '\copy',
		   '¦' => 'f',
		   'A' => '\Alpha',
		   'B' => '\Beta',
		   'G' => '\Gamma',
		   'D' => '\Delta',
		   'E' => '\Epsilon',
		   'Z' => '\Zeta',
		   'H' => '\Eta',
		   'Q' => '\Theta',
		   'I' => '\Iota',
		   'K' => '\Kappa',
		   'L' => '\Lambda',
		   'M' => '\Mu',
		   'N' => '\Nu',
		   'X' => '\Xi',
		   'O' => '\Omicron',
		   'P' => '\Pi',
		   'R' => '\Rho',
		   'S' => '\Sigma',
		   'T' => '\Tau',
		   'U' => 'Y',
		   'F' => '\Phi',
		   'C' => '\Chi',
		   'Y' => '\Psi',
		   'W' => '\Omega',
		   'a' => '\alpha',
		   'b' => '\beta',
		   'g' => '\gamma',
		   'd' => '\delta',
		   'e' => '\epsilon',
		   'z' => '\zeta',
		   'h' => '\eta',
		   'q' => '\theta',
		   'i' => '\iota',
		   'k' => '\kappa',
		   'l' => '\lambda',
		   'm' => '\mu',
		   'n' => '\nu',
		   'x' => '\xi',
		   'o' => '\omicron',
		   'p' => '\pi',
		   'r' => '\rho',
		   'V' => '\sigmaf',
		   's' => '\sigma',
		   't' => '\tau',
		   'u' => '\upsilon',
		   'f' => '\phi',
		   'c' => '\chi',
		   'y' => '\psi',
		   'w' => '\omega',
		   'J' => '\vartheta',
		   'j' => '\varphi',
		   'v' => '\varpi',
		   '¡' => '\Upsilon',
		   '¢' => "'",
		   '¤' => '/',
		   '²' => '"',
		   '¼' => '\ldots',
		   'À' => '\aleph',
		   'Á' => '\Im',
		   'Â' => '\Re',
		   'Ã' => '\wp',
		   'Ô' => '^{TM}',
		   'ä' => '^{TM}',
		   'ð' => 'EUR',
		   '«' => '\leftrightarrow',
		   '¬' => '\leftarrow',
		   '­' => '\uparrow',
		   '®' => '\rightarrow',
		   '¯' => '\downarraw',
		   '¿' => '\hookleftarrow',
		   'Û' => '\Leftrightarrow',
		   'Ü' => '\Leftarrow',
		   'Ý' => '\Uparrow',
		   'Þ' => '\Rightarrow',
		   'ß' => '\Downarrow',
		   '"' => '\forall',
		   '$' => '\exists',
		   ''' => '\ni',
		   '*' => '\ast',
		   '-' => '-',
		   '@' => '\cong',
		   '\' => '\therefore',
		   '^' => '\perp',
		   '~' => '\sim',
		   '£' => '\leq',
		   '¥' => '\infty',
		   '³' => '\geq',
		   'µ' => '\propto',
		   '¶' => '\partial',
		   '·' => '\cdot',
		   '¹' => '\not=',
		   'º' => '\equiv',
		   '»' => '\approx',
		   'Ä' => '\otimes',
		   'Å' => '\oplus',
		   'Æ' => '\emptyset',
		   'Ç' => '\cap',
		   'È' => '\cup',
		   'É' => '\supset',
		   'Ê' => '\supseteq',
		   'Ë' => '\not\subset',
		   'Ì' => '\subset',
		   'Í' => '\subseteq',
		   'Î' => '\in',
		   'Ï' => '\not\in',
		   'Ð' => '\angle',
		   'Ñ' => '\nabla',
		   'Õ' => '\prod',
		   'Ö' => '\surd',
		   '×' => '\cdot',
		   'Ù' => '\wedge',
		   'Ú' => '\wee',
		   'å' => '\sum',
		   'ò' => '\int',
		   'á' => '\langle',
		   'ñ' => '\rangle',
		   'à' => '\diamondsuit',
		   '§' => '\clubsuit',
		   '¨' => '\diamondsuit',
		   '©' => '\heartsuit',
		   'ª' => '\spadesuit'
		   );
    my $output='';
    my $char='';
    my $entitymode=0;
    for (my $i=0; $i<length($symbolfont); $i++) {
        my $newchar=substr($symbolfont,$i,1);
        $char.=$newchar;
        if ($newchar eq '&') { $entitymode=1; }
        if (($entitymode) && ($newchar ne ';')) { next; }
        my $latex=$latexsymb{$char};
	if ($latex) {
	    $output.=$latex;
	} else {
	    $output.=$char;
	}
        $char='';
        $entitymode=0;
    }
    return $output;
}

sub insidetrans {
    my @args=@_;
    return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
}

sub symbolfontreplace {
    my $text=shift;
    my @fragments=split(/\<\/font\>/si,$text);
    for (my $i=0; $i<=$#fragments;$i++) {
	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
    }
    return join('</font>',@fragments);
}

sub htmlclean {
    my ($raw,$full)=@_;
# Take care of CRLF etc

    $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
    $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
    $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
    $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;

# Generate empty tags, remove wrong end tags
    $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
    $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
    unless ($full) {
       $raw=~s/\<[\/]*(body|head|html)\>//gis;
    }
# Make standard tags lowercase
    foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
             'table','tr','td','th','p','br','hr','img','embed','font',
             'a','strong','center','title','basefont','li','ol','ul',
             'input','select','form','option','script','pre') {
	$raw=~s/\<$_\s*\>/\<$_\>/gis;
        $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
        $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
    }
    return $raw;
}

sub phaseone {
}

sub phasetwo {
}

# ---------------------------------------------------------------- Main Handler
sub handler {

  my $r=shift;


# Get query string for limited number of parameters

  &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
					  ['filename']);

  if ($env{'form.filename'}) {
      $fn=$env{'form.filename'};
      $fn=~s/^http\:\/\/[^\/]+//;
  } else {
     $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
         ' unspecified filename for cleanup', $r->filename); 
     return HTTP_NOT_FOUND;
  }

  unless ($fn) { 
     $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
         ' trying to cleanup non-existing file', $r->filename); 
     return HTTP_NOT_FOUND;
  } 

# ----------------------------------------------------------- Start page output
  my $uname;
  my $udom;

  ($uname,$udom)=
    &Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
  unless (($uname) && ($udom)) {
     $r->log_reason($uname.' at '.$udom.
         ' trying to cleanup file '.$env{'form.filename'}.
         ' ('.$fn.') - not authorized', 
         $r->filename); 
     return HTTP_NOT_ACCEPTABLE;
  }

  $fn=~s/\/\~(\w+)//;

  &Apache::loncommon::content_type($r,'text/html');
  $r->send_http_header;

  $r->print('<html><head><title>LON-CAPA Construction Space</title></head>');

  $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));

  if ($env{'form.phase'} eq 'two') {
      &phasetwo($r,$fn,$uname,$udom);
  } else {
      &phaseone($r,$fn,$uname,$udom);
  }

  $r->print('</body></html>');
  return OK;  
}

1;
__END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>