--- loncom/xml/lonxml.pm 2005/01/28 09:49:52 1.352 +++ loncom/xml/lonxml.pm 2005/01/30 23:31:12 1.355 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # XML Parser Module # -# $Id: lonxml.pm,v 1.352 2005/01/28 09:49:52 albertel Exp $ +# $Id: lonxml.pm,v 1.355 2005/01/30 23:31:12 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -368,23 +368,29 @@ sub xmlparse { sub htmlclean { my ($raw,$full)=@_; +# Take care of CRLF etc - my $tree = HTML::TreeBuilder->new; - $tree->ignore_unknown(0); - - $tree->parse($raw); - - my $output= $tree->as_HTML(undef,' '); - - $output=~s/\<(br|hr|img|meta|allow)(.*?)\>/\<$1$2 \/\>/gis; - $output=~s/\<\/(br|hr|img|meta|allow)\>//gis; + $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs; + $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs; + $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs; + $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs; + +# Generate empty tags, remove wrong end tags + $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis; + $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis; unless ($full) { - $output=~s/\<[\/]*(body|head|html)\>//gis; + $raw=~s/\<[\/]*(body|head|html)\>//gis; } - - $tree = $tree->delete; - - return $output; +# Make standard tags lowercase + foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m', + 'table','tr','td','th','p','br','hr','img','embed','font', + 'a','strong','center','title','basefont','li','ol','ul', + 'input','select','form','option','script','pre') { + $raw=~s/\<$_\s*\>/\<$_\>/gis; + $raw=~s/\<\/$_\s*\>/<\/$_\>/gis; + $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis; + } + return $raw; } sub latex_special_symbols { @@ -1262,7 +1268,7 @@ sub handler { } &Apache::loncommon::no_cache($request); $request->set_last_modified(&Apache::lonnet::metadata($request->uri, - 'lastrevisiondate'); + 'lastrevisiondate')); $request->send_http_header; return OK if $request->header_only;