Annotation of loncom/interface/lonhtmlgateway.pm, revision 1.1

1.1     ! faziophi    1: # The LearningOnline Network with CAPA
        !             2: # gateway for html input/output to be properly parsed and handled
        !             3: #
        !             4: # $Id:$
        !             5: #
        !             6: # Copyright Michigan State University Board of Trustees
        !             7: #
        !             8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
        !             9: #
        !            10: # LON-CAPA is free software; you can redistribute it and/or modify
        !            11: # it under the terms of the GNU General Public License as published by
        !            12: # the Free Software Foundation; either version 2 of the License, or
        !            13: # (at your option) any later version.
        !            14: #
        !            15: # LON-CAPA is distributed in the hope that it will be useful,
        !            16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            18: # GNU General Public License for more details.
        !            19: #
        !            20: # You should have received a copy of the GNU General Public License
        !            21: # along with LON-CAPA; if not, write to the Free Software
        !            22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
        !            23: #
        !            24: # /home/httpd/html/adm/gpl.txt
        !            25: #
        !            26: # http://www.lon-capa.org/
        !            27: #
        !            28: ######################################################################
        !            29: ######################################################################
        !            30: 
        !            31: =pod
        !            32: 
        !            33: =head1 NAME
        !            34: 
        !            35: Apache::lonhtmlgateway - properly parse and handle HTML input and output
        !            36: 
        !            37: =head1 SYNOPSIS
        !            38: 
        !            39: This is part of the LearningOnline Network with CAPA project
        !            40: described at http://www.lon-capa.org.
        !            41: 
        !            42: =head1 INTRODUCTION
        !            43: 
        !            44: lonhtmlgateway is an object-oriented module used to parse and correct
        !            45: malformed HTML input from the client, as well as to perform processing
        !            46: of custom LON-CAPA HTML output before it is sent along to the end-user.
        !            47: It replaces a number of subroutines in various modules, and adds new
        !            48: code to tidy and process malformed HTML using XML::LibXML. 
        !            49: 
        !            50: This module is intended to be used for all non-authoring perspectives
        !            51: in the system.
        !            52: 
        !            53: New to LON-CAPA version 3.0.
        !            54: 
        !            55: =head2 Example Usage
        !            56: 
        !            57: Below is intended code to be invoked and called for use outside 
        !            58: of this module:
        !            59: 
        !            60:     $gateway = Apache::lonhtmlgateway->new();
        !            61:     $gateway = Apache::lonhtmlgateway->new($target);
        !            62:     
        !            63:     $xhtml = $gateway->process_incoming_html($html);
        !            64:     $xhtml = $gateway->process_incoming_html($html, $legacy);
        !            65:     
        !            66:     $xml = $gateway->process_html_to_xml($html);
        !            67:     $xhtml = $gateway->process_xml_to_html($xml); 
        !            68:     
        !            69:     $bool = Apache::lonhtmlgateway->contains_block_level_tags($input);
        !            70:     
        !            71: =head1 GLOBAL VARIABLES
        !            72: 
        !            73: =over 4
        !            74: 
        !            75: =cut 
        !            76: 
        !            77: ######################################################################
        !            78: ######################################################################
        !            79: 
        !            80: package Apache::lonhtmlgateway;
        !            81: 
        !            82: use strict;
        !            83: use utf8;
        !            84: use Time::Local;
        !            85: use Time::HiRes;
        !            86: use Apache::lonlocal;
        !            87: use Apache::lonnet;
        !            88: use Apache::lonhtmlcommon;
        !            89: use Apache::lonxml;
        !            90: use Apache::lontexconvert;
        !            91: use lib '/home/httpd/lib/perl/';
        !            92: use LONCAPA;
        !            93: use XML::LibXML;
        !            94: use Encode;
        !            95: use HTML::Entities;
        !            96: use HTML::LCParser();
        !            97: use Safe();
        !            98: 
        !            99: local $XML::LibXML::skipXMLDeclaration = 1;
        !           100: local $XML::LibXML::skipDTD = 1;
        !           101: local $XML::LibXML::setTagCompression = 1;
        !           102: 
        !           103: ##############################################
        !           104: ##############################################
        !           105: 
        !           106: =item %LONCAPA_ALLOWED_STANDARD_TAGS
        !           107: 
        !           108: This is a hash of all tags, both HTML and custom LON-CAPA tags that
        !           109: are allowed in non-authoring spaces.  Examples of this include
        !           110: course documents, bulletin boards, discussion posts, templated pages,
        !           111: etc.  In addition, in the event of rich text editing, the WYSIWYG
        !           112: editor needs to know how to display LON-CAPA custom tags as either
        !           113: inline-level (<span>) or block-level (<div>). Therefore, the hash is
        !           114: set up with uppercase tag names as keys ("H1"), and the corresponding
        !           115: entry an integer constant indicating that tag's role or purpose:
        !           116: 
        !           117: =over 4
        !           118: 
        !           119: =item 0 =
        !           120: 
        !           121: Tag is explictly not allowed.  Currently not used anywhere in this
        !           122: module, but reserved for the future in case certain tags would like
        !           123: to be explicitly blacklisted.
        !           124: 
        !           125: =item 1 =
        !           126: 
        !           127: Tag is allowed, and in cases where it is unclear, is rendered as an
        !           128: inline-level element.  Example: <algebra> should be rendered as an 
        !           129: inline element.
        !           130: 
        !           131: =item 2 =
        !           132: 
        !           133: Tag is allowed, and in cases where it is unclear, is rendered as a
        !           134: block-level element.  Example: <md> should be rendered as a block
        !           135: element.
        !           136: 
        !           137: =back
        !           138: 
        !           139: =back
        !           140: 
        !           141: =cut
        !           142: 
        !           143: ##############################################
        !           144: ##############################################
        !           145: 
        !           146: our %LONCAPA_ALLOWED_STANDARD_TAGS = (
        !           147:     # standard html header tags
        !           148:     H1 => 2, H2 => 2, H3 => 2, H4 => 2, H5 => 2, H6 => 2,
        !           149:     # basic inline formatting and phrases
        !           150:     B => 1, I => 1, U => 1, STRONG => 1, EM => 1, STRIKE => 1,
        !           151:     BIG => 1, SMALL => 1, INS => 1, DEL => 1, S => 1,
        !           152:     Q => 1, DFN => 1, CODE => 1, SAMP => 1, KBD => 1, VAR => 1,
        !           153:     SUB => 1, SUP => 1,
        !           154:     # linking and embedding
        !           155:     A => 1, IMG => 1, 
        !           156:     # block level tags
        !           157:     P => 2, DIV => 2, OL => 2, UL => 2, LI => 2, ADDRESS => 2,
        !           158:     BR => 2, HR => 2, BLOCKQUOTE => 2, PRE => 2, 
        !           159:     # table-related tags
        !           160:     TABLE => 2, CAPTION => 2, TBODY => 2, TR => 2, TD => 2,
        !           161:     TH => 2, 
        !           162:     # LON-CAPA custom tags
        !           163:     M => 1, MI => 1, MD => 2, ALGEBRA => 1,
        !           164:     CHEM => 1
        !           165: );
        !           166: 
        !           167: ##############################################
        !           168: ##############################################
        !           169: 
        !           170: =head1 PARSING LON-CAPA CUSTOM TAGS
        !           171: 
        !           172: This module maintains a hash %custom_tag_parsers, containing 
        !           173: lowercase tag names as keys and function references as entries.
        !           174: Convention used here names the actual parsing function whose
        !           175: reference is stored here to be of the name &parse_tagname_tag().
        !           176: These functions are called during the processing of outgoing 
        !           177: HTML output in the &process_outgoing_html() function.
        !           178: 
        !           179: Each of these functions is passed the following arguments:
        !           180: 
        !           181: =over 4
        !           182: 
        !           183: =item self
        !           184: 
        !           185: Reference to Apache::lonhtmlgateway object calling the function.
        !           186: 
        !           187: =item input
        !           188: 
        !           189: Textual context extracted between the <tag> and </tag> tags.
        !           190: Note that this text I<could> contain HTML entities.  Thus, for 
        !           191: functions that cannot handle entitized input, 
        !           192: &HTML::Entities::decode_entities() should be called on this data
        !           193: before further handing it off.
        !           194: 
        !           195: =back
        !           196: 
        !           197: Example hash entry:
        !           198: 
        !           199:     mi => \&parse_mi_tag,
        !           200: 
        !           201: =head2 Currently Supported Custom Tags
        !           202: 
        !           203: =over 4
        !           204: 
        !           205: =item <algebra>
        !           206: 
        !           207: Intended to convert and simplify simple algebraic functions into
        !           208: readable output.  Corrects cases such as double negatives or 
        !           209: eliminates coefficients of 1 where appropriate.  The actual
        !           210: handling of content contained in this tag takes place inside
        !           211: L<Apache::lontexconvert>, which in turn uses the AlgParser 
        !           212: module to actually process the input.
        !           213: 
        !           214: Usage:
        !           215:     <algebra>2*x+(-5)</algebra>
        !           216: 
        !           217: =item <chem>
        !           218: 
        !           219: Formatter for chemical equations, adding superscripts, subscripts,
        !           220: and appropriate arrow characters as appropriate.  This parser is
        !           221: wholly contained inside this module, but is a copy of a routine
        !           222: found in homework/default_homework.lcpm.
        !           223: 
        !           224: Usage:
        !           225:     <chem>CH3CO2H + H2O <=> CH3CO2- + H3O+</chem>
        !           226: 
        !           227: =back
        !           228: 
        !           229: =head3 Math Mode Tags
        !           230: 
        !           231: These tags are intended for LaTeX math mode input, in order to
        !           232: produce complex mathematical and scientific constructs, which
        !           233: normal HTML cannot produce.  The output is later rendered by
        !           234: a user-defined TeX engine in web target, or handled directly
        !           235: in the case of tex target.  The only difference between the tags
        !           236: below is determining the author's intent on how to appropriately
        !           237: render the contents within the tag - this intent is
        !           238: important in preserving the What You See Is What You Get philosophy
        !           239: of the rich text editor.
        !           240: 
        !           241: =over 4
        !           242: 
        !           243: =item <mi>
        !           244: 
        !           245: Inline math mode tag.  Content is surrounded by "$" characters and
        !           246: passed to the parser for the <m> tag.
        !           247: 
        !           248: I<New for LON-CAPA 3.0>.
        !           249: 
        !           250: =item <md>
        !           251: 
        !           252: Display block math mode tag.  Content is surrounded by "\[" and 
        !           253: "\]" characters and passed to the parser for the <m> tag.
        !           254: 
        !           255: I<New for LON-CAPA 3.0>.
        !           256: 
        !           257: =item <m>
        !           258: 
        !           259: Math mode tag.  Allows author to fully specify the display of their
        !           260: TeX input, and contain mixed inline-and-block content within a single
        !           261: tag.  
        !           262: 
        !           263: Due to tools such as the rich text editor needing to know whether a
        !           264: custom tag is block-level or inline-level on render, the use of this
        !           265: tag is discouraged starting with LON-CAPA 3.0 although it will continue
        !           266: to function.  Fully compatible with legacy LON-CAPA 2.x content.
        !           267: 
        !           268: =back
        !           269: 
        !           270: =cut
        !           271: 
        !           272: ##############################################
        !           273: ##############################################
        !           274: 
        !           275: my %custom_tag_parsers = (
        !           276:     mi => \&parse_mi_tag,
        !           277:     md => \&parse_md_tag,
        !           278:     m => \&parse_m_tag,
        !           279:     algebra => \&parse_algebra_tag,
        !           280:     chem => \&parse_chem_tag
        !           281: );
        !           282: 
        !           283: ##############################################
        !           284: ##############################################
        !           285: 
        !           286: =head1 CLASS OBJECT CONSTRUCTOR
        !           287: 
        !           288: =over 4
        !           289: 
        !           290: =item new
        !           291: 
        !           292:     $gateway = Apache::libhtmlgateway->new();
        !           293:     $gateway = Apache::libhtmlgateway->new($target);
        !           294: 
        !           295: Constructs and returns a new gateway object.  An optional argument
        !           296: allows one to specify the target of the output, defaults to 'web'.
        !           297: Behind the scenes, a single XML::LibXML parser object is created
        !           298: behind the scenes.  On destroy, this parser object is destroyed
        !           299: as well.
        !           300: 
        !           301: =back
        !           302: 
        !           303: =cut
        !           304: 
        !           305: ##############################################
        !           306: ##############################################
        !           307: 
        !           308: sub new {
        !           309:     my $invocant = shift;
        !           310:     my $class = ref($invocant) || $invocant;
        !           311:     my $target = shift;
        !           312:     # create a new parser instance for libxml
        !           313:     my $self = {
        !           314:         parser => XML::LibXML->new(),
        !           315:         target => ($target) ? $target : 'web'
        !           316:     };
        !           317:     # options for the libxml parser
        !           318:     $self->{parser}->recover(1);
        !           319:     $self->{parser}->recover_silently(1);
        !           320:     bless($self, $class);  # bless = pray that it works
        !           321:     return $self;
        !           322: }
        !           323: 
        !           324: sub DESTROY {
        !           325:     my $self = shift;
        !           326:     my $parser = $self->{parser};
        !           327:     undef $parser;  # destroy the parser instance
        !           328: }
        !           329: 
        !           330: ##############################################
        !           331: ##############################################
        !           332: 
        !           333: =head1 PUBLIC OBJECT METHODS
        !           334: 
        !           335: =over 4
        !           336: 
        !           337: =item process_html_to_xml
        !           338: 
        !           339:     $xml = $gateway->process_html_to_xml($html);
        !           340: 
        !           341: Takes presumably-malformed HTML, encodes ampersands characters 
        !           342: and passes the result to the Xml::LibXML parser, which creates
        !           343: a DOM tree in memory of the content.  This parse is as error-tolerant
        !           344: as can be set, and libxml attempts to recover from any errors as much
        !           345: as possible. This DOM tree is then taken and serialized,
        !           346: eliminating unbalanced and malformed tags along the way. This
        !           347: XML code (without any header tags) is then returned to the caller.
        !           348: 
        !           349: =cut
        !           350: 
        !           351: ##############################################
        !           352: ##############################################
        !           353: 
        !           354: sub process_html_to_xml {
        !           355:     my $self = shift;
        !           356:     my $input = shift;
        !           357:     my $parser = $self->{parser};
        !           358: 
        !           359:     if (length($input) < 1) { return ""; }
        !           360:     
        !           361:     # only encode ampersands -- brackets may be valid tags
        !           362:     my $encoded = &HTML::Entities::encode_entities($input, '&');
        !           363:     
        !           364:     # for the <chem> tag, we want the strings "<=>", "<-", "->" to be properly
        !           365:     # entitized so the parser doesn't destroy it
        !           366:     $encoded =~ s/(\<\s*chem\s*>.*)\<\=\>(.*\<\s*\/chem\s*>)/$1\&lt\;\&\#61\;\&gt\;$2/gi;
        !           367:     $encoded =~ s/(\<\s*chem\s*>.*)\-\>(.*\<\s*\/chem\s*>)/$1\-\&gt\;$2/gi;
        !           368:     $encoded =~ s/(\<\s*chem\s*>.*)\<\-(.*\<\s*\/chem\s*>)/$1\&lt\;\-$2/gi;
        !           369:     
        !           370:     # parse into libXML to tidy tags, we suppress any errors
        !           371:     # because otherwise the parser complains about non-HTML
        !           372:     # tags to STDERR and the Apache error logs
        !           373:     my $dom = $parser->parse_html_string($encoded,
        !           374:         {
        !           375:             suppress_errors => 1,
        !           376:             suppress_warnings => 1,
        !           377:             recover => 2
        !           378:         }
        !           379:     );
        !           380:     # the dom returns a full <html> structure, so just get
        !           381:     # all the child nodes of the <body> tag and put them together
        !           382:     my @body_nodes = $dom->findnodes('/html/body');
        !           383:     my @body_children = $body_nodes[0]->childNodes;
        !           384:     my $xml = "";
        !           385:     foreach my $child (@body_children) {
        !           386:         $xml .= $child->toString();
        !           387:     }
        !           388:     # entities passed into $input are in the form of '&amp;lt;'
        !           389:     # they are double entities
        !           390:     return $xml;    
        !           391: }
        !           392: 
        !           393: ##############################################
        !           394: ##############################################
        !           395: 
        !           396: =item process_xml_to_html
        !           397: 
        !           398:     $xhtml = $gateway->process_xml_to_html($xml);
        !           399: 
        !           400: Takes XML input, decodes ampersands characters 
        !           401: and passes the result then to the caller.
        !           402: 
        !           403: =cut
        !           404: 
        !           405: ##############################################
        !           406: ##############################################
        !           407: 
        !           408: sub process_xml_to_html {
        !           409:     my $self = shift;
        !           410:     my $input = shift;
        !           411:     # decode one level of entities (XML) such that the
        !           412:     # output is returned to the original level of entities
        !           413:     # $input "&lt;" --> $xml "&amp;lt;" --> "&lt;"
        !           414:     my $xhtml = &HTML::Entities::decode_entities($input);
        !           415:     # now we have valid XHTML that can be stored and parsed
        !           416:     return $xhtml;
        !           417: }
        !           418: 
        !           419: ##############################################
        !           420: ##############################################
        !           421: 
        !           422: =item process_incoming_html
        !           423: 
        !           424:     $xhtml = $gateway->process_incoming_html($html);
        !           425:     $xhtml = $gateway->process_incoming_html($html, $legacy);
        !           426: 
        !           427: Designed to be called for all raw HTML inputs from the client
        !           428: side before storing or rendering data.  Decodes UTF-8 data,
        !           429: trims leading and trailing "\n" and "<br />" tags.  Processes
        !           430: the result through the XML parser, converts this back to
        !           431: balanced well-formed XHTML, re-encodes the result as UTF-8,
        !           432: and returns the result to the caller.
        !           433: 
        !           434: =over 4
        !           435: 
        !           436: =item legacy
        !           437: 
        !           438:     $legacy = 0;
        !           439:     $legacy = 1; 
        !           440: 
        !           441: I<(optional)> If true, adds additional processing intended
        !           442: to emulate LON-CAPA 2.x parsing of the content.
        !           443: 
        !           444: =back
        !           445: 
        !           446: =cut
        !           447: 
        !           448: ##############################################
        !           449: ##############################################
        !           450: 
        !           451: sub process_incoming_html {
        !           452:     # this should be called by all HTML inputs before storing
        !           453:     # data --> for consistency's sake, call process_html_to_xml
        !           454:     # afterwards if you need to embed this in XML later on
        !           455:     my $self = shift;
        !           456:     my $input = shift;
        !           457:     my $legacy = shift;
        !           458:     
        !           459:     # no idea why i have to call this to get unicode characters
        !           460:     # working, but i do, so here it is.
        !           461:     $input = &Encode::decode_utf8($input);
        !           462:     
        !           463:     # trim leading and trailing whitespace and HTML breaks
        !           464:     chomp($input);
        !           465:     $input =~ s/\s+$//s;
        !           466:     $input =~ s/^\s+//s;
        !           467:     $input =~ s/\<br\s*\/*\>$//s;
        !           468:     my $no_p_input = (length($input) > 0 && $input !~ m/.*\<[\s]*p[\s]*\>.*/is);
        !           469:     my $xml = $self->process_html_to_xml($input);
        !           470:     if ($legacy && !&contains_block_level_tags($input)) {
        !           471:         # the xml returns content inside a <p> tag
        !           472:         # if there are no block tags... thus to preserve
        !           473:         # old behavior, we strip out that <p></p>
        !           474:         if ($no_p_input) {
        !           475:             $xml =~ s/^\<p\>(.*)\<\/p\>/$1/si;
        !           476:         }
        !           477:     }
        !           478:     my $xhtml = $self->process_xml_to_html($xml);
        !           479:     # see above unicode encoding comment
        !           480:     $xhtml = &Encode::encode_utf8($xhtml);
        !           481:     return $xhtml;
        !           482: }
        !           483: 
        !           484: ##############################################
        !           485: ##############################################
        !           486: 
        !           487: =item process_outgoing_html
        !           488: 
        !           489:     $html = $gateway->process_outgoing_html($xhtml);
        !           490:     $html = $gateway->process_outgoing_html($xhtml, $legacy);
        !           491: 
        !           492: Designed to be called for all HTML outputs to the client
        !           493: side before rendering data.  This entitizes all non-allowed
        !           494: tags, as was previously done in Apache::lonfeedback, and
        !           495: processes and converts all LON-CAPA supported custom tags (see
        !           496: above) to their respective output HTML.
        !           497: 
        !           498: =over 4
        !           499: 
        !           500: =item legacy
        !           501: 
        !           502:     $legacy = 0;
        !           503:     $legacy = 1; 
        !           504: 
        !           505: I<(optional)> If true, adds additional processing intended
        !           506: to emulate LON-CAPA 2.x parsing of the content.  This includes
        !           507: behavior to convert "\n" to "<br />" if there are no block-level
        !           508: tags detected in the input.  In addition, raw URLs are converted
        !           509: automatically to <a> links.
        !           510: 
        !           511: =back
        !           512: 
        !           513: =back
        !           514: 
        !           515: =cut
        !           516: 
        !           517: ##############################################
        !           518: ##############################################
        !           519: 
        !           520: sub process_outgoing_html {
        !           521:     # this should be called on all HTML outputs before displaying
        !           522:     # because it will filter out all non-HTML+LONCAPA tags.
        !           523:     # tags are not filtered at input stage for greater backwards
        !           524:     # compatibility.  note that this disregards course preference.
        !           525:     my $self = shift;
        !           526:     my $input = shift;
        !           527:     my $legacy = shift;
        !           528:     
        !           529:     my %html = %Apache::lonhtmlgateway::LONCAPA_ALLOWED_STANDARD_TAGS;
        !           530:     # entitize all tags that are not explicitly allowed
        !           531:     $input =~ s/\<(\/?\s*(\w+)[^\>\<]*)/
        !           532:         {($html{uc($2)}&&(length($1)<1000))?"\<$1":"\&lt;$1"}/ge;
        !           533:     $input =~ s/(\<?\s*(\w+)[^\<\>]*)\>/
        !           534:         {($html{uc($2)}&&(length($1)<1000))?"$1\>":"$1\&gt;"}/ge;
        !           535:     if ($legacy) {
        !           536:         unless (&contains_block_level_tags($self, $input)) {
        !           537:             $input = $self->legacy_newline_to_br($input); 
        !           538:         }
        !           539:         $input = $self->legacy_raw_href_to_link($input);
        !           540:     }
        !           541:     # at this point, we need to convert our own custom tags
        !           542:     # into the appropriate output
        !           543:     # see above for supported tags
        !           544:     my $output = "";
        !           545:     my $parser = HTML::LCParser->new(\$input);
        !           546:     while (my $token = $parser->get_token()) {
        !           547:     	if ($token->[0] eq 'T') {
        !           548:     	    $output .= $token->[1];
        !           549:     	} elsif ($token->[0] eq 'D' || $token->[0] eq 'C') {
        !           550:     	    $output .= $token->[1];
        !           551:     	} elsif ($token->[0] eq 'PI' || $token->[0] eq 'E') {
        !           552:     	    $output .= $token->[2];
        !           553:     	} elsif ($token->[0] eq 'S') {
        !           554:     	    my $tag = lc($token->[1]);
        !           555:     	    if (exists($custom_tag_parsers{$tag})) {
        !           556:     	        my $text = $parser->get_text();
        !           557:     	        $output .= $custom_tag_parsers{$tag}(
        !           558:     	            $self, $text, $self->{target});
        !           559:     	    } else {
        !           560:     	        $output .= $token->[4];
        !           561:     	    }
        !           562:     	}
        !           563:     }
        !           564: 
        !           565:     return $output;
        !           566: }
        !           567: 
        !           568: ##############################################
        !           569: ##############################################
        !           570: 
        !           571: =head1 STATIC CLASS METHODS
        !           572: 
        !           573: The following are static class methods that can be called
        !           574: by any object.
        !           575: 
        !           576: =over 4
        !           577: 
        !           578: =item contains_block_level_tags
        !           579: 
        !           580:     $bool = Apache::lonhtmlgateway::contains_block_level_tags($input);
        !           581:     
        !           582: Uses a regular expression to find, in the input data, any tags 
        !           583: described in %LONCAPA_ALLOWED_STANDARD_TAGS as block-level.
        !           584: Returns 1 if true, 0 if false. 
        !           585: 
        !           586: =cut
        !           587: 
        !           588: ##############################################
        !           589: ##############################################
        !           590: 
        !           591: sub contains_block_level_tags {
        !           592:     my $class = shift;
        !           593:     my $input = shift;
        !           594:     my @block_level_tags = @{&get_block_level_tags($class)};
        !           595:     foreach my $tag (@block_level_tags) {
        !           596:         if ($input =~ m/\<\/?\s*$tag[^\>\<]*/gi) {
        !           597:             # if your input loves this regular expression
        !           598:             # as much as i do, then return true.
        !           599:             # it searches for either a <tag> or <tag />
        !           600:             return 1;
        !           601:         }
        !           602:     }
        !           603:     return 0;
        !           604: }
        !           605: 
        !           606: ##############################################
        !           607: ##############################################
        !           608: 
        !           609: =item get_block_level_tags
        !           610: 
        !           611:     @tags = Apache::lonhtmlgateway::get_block_level_tags();
        !           612:     
        !           613: Return an array with any tags described in 
        !           614: %LONCAPA_ALLOWED_STANDARD_TAGS as block-level. Note that these
        !           615: tags are returned in no particular order, and the tag names
        !           616: are returned in uppercase.
        !           617: 
        !           618: 
        !           619: =cut
        !           620: 
        !           621: ##############################################
        !           622: ##############################################
        !           623: 
        !           624: sub get_block_level_tags {
        !           625:     my $class = shift;
        !           626:     my %html = %Apache::lonhtmlgateway::LONCAPA_ALLOWED_STANDARD_TAGS;
        !           627:     my @block = [];
        !           628:     foreach my $tag (keys(%html)) {
        !           629:         if ($html{$tag} == 2) {
        !           630:             push(@block, $tag);
        !           631:         }
        !           632:     }
        !           633:     return \@block;
        !           634: }
        !           635: 
        !           636: ##############################################
        !           637: ##############################################
        !           638: 
        !           639: =head2 Legacy Functions
        !           640: 
        !           641: These functions are intended to process input in the same or
        !           642: a similar way to how it was processed in LON-CAPA 2.x.
        !           643: 
        !           644: =item legacy_newline_to_br
        !           645: 
        !           646: I<(formerly Apache::lonfeedback::newline_to_br)>
        !           647: 
        !           648:     $converted = Apache::lonhtmlgateway::legacy_newline_to_br($input);
        !           649:     
        !           650: Parse the input using HTML::LCParser, and in any text nodes
        !           651: which contain "\n" characters, replace those characters with
        !           652: an HTML "<br />" tag.
        !           653: 
        !           654: =cut
        !           655: 
        !           656: ##############################################
        !           657: ##############################################
        !           658: 
        !           659: sub legacy_newline_to_br {
        !           660:     my $class = shift;
        !           661:     my $input = shift;
        !           662:     my $output;
        !           663:     my $parser = HTML::LCParser->new(\$input);
        !           664:     while (my $token = $parser->get_token()) {
        !           665:     	if ($token->[0] eq 'T') {
        !           666:     	    my $text = $token->[1];
        !           667:     	    $text =~ s/\n/\<br \/\>/g;
        !           668:     	    $output .= $text;
        !           669:     	} elsif ($token->[0] eq 'D' || $token->[0] eq 'C') {
        !           670:     	    $output .= $token->[1];
        !           671:     	} elsif ($token->[0] eq 'PI' || $token->[0] eq 'E') {
        !           672:     	    $output .= $token->[2];
        !           673:     	} elsif ($token->[0] eq 'S') {
        !           674:     	    $output .= $token->[4];
        !           675:     	}
        !           676:     }
        !           677:     return $output;
        !           678: }
        !           679: 
        !           680: ##############################################
        !           681: ##############################################
        !           682: 
        !           683: =item legacy_raw_href_to_link
        !           684: 
        !           685: I<(formerly Apache::lonhtmlcommon::raw_href_to_link)>
        !           686:     
        !           687:     $converted = Apache::lonhtmlgateway::legacy_raw_href_to_link($input);
        !           688:     
        !           689: Search for any links/URLs within the input text, and convert them
        !           690: to <a> tags whose content is embedded inside a <tt> tag.
        !           691: 
        !           692: =back
        !           693: 
        !           694: =cut
        !           695: 
        !           696: ##############################################
        !           697: ##############################################
        !           698: 
        !           699: sub legacy_raw_href_to_link {
        !           700:     my $class = shift;
        !           701:     my $input = shift;
        !           702:     $input =~ s/(https?\:\/\/[^\s\'\"\<]+)([\s\<]|$)/<a href="$1"><tt>$1<\/tt><\/a>$2/gi;
        !           703:     return $input;
        !           704: }
        !           705: 
        !           706: sub parse_algebra_tag {
        !           707:     my $self = shift;
        !           708:     my $input = shift;
        !           709:     # the <algebra> parser does NOT handle entities,
        !           710:     # unlike the general <m> parser; thus we run
        !           711:     # the content of this tag through HTML::Entities,
        !           712:     # decoding it first
        !           713:     $input = &HTML::Entities::decode($input);
        !           714:     return &Apache::lontexconvert::algebra($input,$self->{target});
        !           715: }
        !           716: 
        !           717: sub parse_mi_tag {
        !           718:     my $self = shift;
        !           719:     my $input = shift;
        !           720:     return &parse_m_tag($self, '$'.$input.'$');
        !           721: }
        !           722: 
        !           723: sub parse_md_tag {
        !           724:     my $self = shift;
        !           725:     my $input = shift;
        !           726:     return &parse_m_tag($self, '\['.$input.'\]');
        !           727: }
        !           728: 
        !           729: sub parse_m_tag {
        !           730:     my $self = shift;
        !           731:     my $input = shift;
        !           732:     return &Apache::lontexconvert::to_convert($input, $self->{target});
        !           733: }
        !           734: 
        !           735: sub parse_chem_tag {
        !           736:     my $self = shift;
        !           737:     my $input = shift;
        !           738:     my $target = $self->{target};
        !           739:     # as with the <algebra> tag, some portions of the
        !           740:     # <chem> input may be coming in encoded, especially
        !           741:     # arrows -- so decode it in HTML::Entities
        !           742:     $input = &HTML::Entities::decode($input);
        !           743:     my @tokens = split(/(\s\+|\->|<=>|<\-|\.)/,$input);
        !           744:     my $formula = '';
        !           745:     foreach my $token (@tokens) {
        !           746:     	if ($token eq '->' ) {
        !           747:     	    if ($target eq 'web') {
        !           748:     	        $formula .= '&#8594; ';
        !           749:     	    } else {
        !           750:     	        $formula .= '<m>\ensuremath{\rightarrow}</m> ';
        !           751:     	    }
        !           752:     	    next;
        !           753:     	}
        !           754:     	if ($token eq '<-' ) {
        !           755:     	    if ($target eq 'web') {
        !           756:     	        $formula .= '&#8592; ';
        !           757:     	    } else {
        !           758:     	        $formula .= '<m>\ensuremath{\leftarrow}</m> ';
        !           759:     	    }
        !           760:     	    next;
        !           761:     	}  
        !           762:     	if ($token eq '<=>') {
        !           763:     	    if ($target eq 'web') {
        !           764:     		$formula .= '&#8652; ';
        !           765:     	    } else {
        !           766:     		$formula .= '<m>\ensuremath{\rightleftharpoons}</m> ';
        !           767:     	    }
        !           768:     	    next;
        !           769:     	}
        !           770:     	if ($token eq '.') {
        !           771:     	  $formula =~ s/(\&nbsp\;| )$//;
        !           772:     	  $formula .= '&middot;';
        !           773:     	  next;
        !           774:     	}
        !           775:     	$token =~ /^\s*([\d|\/]*(?:&frac\d\d)?)(.*)/;
        !           776:             $formula .= $1 if ($1 ne '1');  # stoichiometric coefficient
        !           777:     	my $molecule = $2;
        !           778:     	# subscripts
        !           779:     	$molecule =~ s|(?<=[a-zA-Z\)\]\s])(\d+)|<sub>$1</sub>|g;
        !           780:     	# superscripts
        !           781:     	$molecule =~ s|\^(\d*[+\-]*)|<sup>$1</sup>|g;
        !           782:     	# strip whitespace
        !           783:     	$molecule =~ s/\s*//g;
        !           784:     	# forced space
        !           785:     	$molecule =~ s/_/ /g;
        !           786:     	$molecule =~ s/-/&minus;/g;
        !           787:     	$formula .= $molecule.'&nbsp;';
        !           788:     }
        !           789:     # get rid of trailing space
        !           790:     $formula =~ s/(\&nbsp\;| )$//;
        !           791:     return $formula;
        !           792: }
        !           793: 
        !           794: ##############################################
        !           795: ##############################################
        !           796: 
        !           797: =head1 AUTHORS
        !           798: 
        !           799: Phil Fazio
        !           800: 
        !           801: =head1 VERSION
        !           802: 
        !           803: $Id:$
        !           804: 
        !           805: =cut
        !           806: 
        !           807: ##############################################
        !           808: ##############################################

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>