--- loncom/xml/lonxml.pm 2001/08/06 19:44:54 1.105 +++ loncom/xml/lonxml.pm 2001/08/07 16:54:14 1.106 @@ -13,12 +13,14 @@ # 6/12,6/13 H. K. Ng # 6/16 Gerd Kortemeyer # 7/27 H. K. Ng +# 8/7 Gerd Kortemeyer package Apache::lonxml; use vars qw(@pwd @outputstack $redirection $import @extlinks $metamode $evaluate %insertlist @namespace); use strict; use HTML::TokeParser; +use HTML::TreeBuilder; use Safe; use Safe::Hole; use Math::Cephes qw(:trigs :hypers :bessels erf erfc); @@ -281,6 +283,26 @@ sub xmlparse { return $finaloutput; } +sub htmlclean { + my $raw=shift; + + my $tree = HTML::TreeBuilder->new; + $tree->ignore_unknown(0); + + $tree->parse($raw); + my %emptyhash=(); + + my $output= $tree->as_HTML(undef,' ',\%emptyhash), "\n"; + + $output=~s/\<(br|hr|img)([^\>\/]*)\>/\<$1$2 \/\>/gis; + $output=~s/\<\/(br|hr|img)\>//gis; + $output=~s/\<[\/]*(body|head|html)\>//gis; + + $tree = $tree->delete; + + return $output; +} + sub inner_xmlparse { my ($target,$stack,$parstack,$pars,$safeeval,$style_for_target)=@_; &Apache::lonxml::debug('Reentrant parser starting, again?');