Annotation of doc/gutshtml/SessionFou1.html, revision 1.1

1.1     ! www         1: <html>
        !             2: <head>
        !             3: <meta name=Title
        !             4: content="Session Four: XML Handler (Simple tags, Globals, Multiple Targets, Style Files) (Guy)">
        !             5: <meta http-equiv=Content-Type content="text/html; charset=macintosh">
        !             6: <link rel=Edit-Time-Data href="Session%20Fou1_files/editdata.mso">
        !             7: <title>Session Four: XML Handler (Simple tags, Globals, Multiple Targets, Style 
        !             8: Files) (Guy)</title>
        !             9: <style><!--
        !            10: .MsoHeader
        !            11: 	{tab-stops:center 3.0in right 6.0in;
        !            12: 	font-size:10.0pt;
        !            13: 	font-family:"Times New Roman";}
        !            14: .MsoPlainText
        !            15: 	{font-size:10.0pt;
        !            16: 	font-family:"Courier New";}
        !            17: .Section1
        !            18: 	{page:Section1;}
        !            19: .Section2
        !            20: 	{page:Section2;}
        !            21: -->
        !            22: </style>
        !            23: </head>
        !            24: <body bgcolor=#FFFFFF link=blue vlink=purple class="Normal" lang=EN-US>
        !            25: <div class=Section1> 
        !            26:   <h2>Session Four: XML Handler (Simple tags, Globals, Multiple Targets, Style 
        !            27:     Files) (Guy)</h2>
        !            28:   <h3><a name="_Toc421867121">XML Files</a></h3>
        !            29:   <p><span style='color:black'>All HTML / XML files are run through the lonxml 
        !            30:     handler before being served to a user. This allows us to rewrite many portion 
        !            31:     of a document and to support serverside tags. There are 2 ways to add new 
        !            32:     tags to the xml parsing engine, either through LON-CAPA style files or by 
        !            33:     writing Perl tag handlers for the desired tags. </span></p>
        !            34:   <p><span style='color:black'><b>Global Variables</b></span></p>
        !            35:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            36:     <i>$Apache::lonxml::debug</i></span><span
        !            37: style='color:black'> - debugging control </span></p>
        !            38:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            39:     <i>@Apache::lonxml::pwd</i></span><span
        !            40: style='color:black'> - path to the directory containing the file currently being 
        !            41:     processed </span></p>
        !            42:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            43:     <i>@Apache::lonxml::outputstack</i></span><span
        !            44: style='color:black'> </span></p>
        !            45:   <p><span style='color:black'><i>$Apache::lonxml::redirection</i></span><span
        !            46: style='color:black'> - these two are used for capturing a subset of the output 
        !            47:     for later processing, don't touch them directly use &amp;startredirection 
        !            48:     and &amp;endredirection </span></p>
        !            49:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            50:     <i>$Apache::lonxml::import</i></span><span
        !            51: style='color:black'> - controls whether the &lt;import&gt; tag actually does anything 
        !            52:     </span></p>
        !            53:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            54:     <i>@Apache::lonxml::extlinks</i></span><span
        !            55: style='color:black'> - a list of URLs that the user is allowed to look at because 
        !            56:     of the current resource (images, and links) </span></p>
        !            57:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            58:     <i>$Apache::lonxml::metamode</i></span><span
        !            59: style='color:black'> - some output is turned off, the meta target wants a specific 
        !            60:     subset, use &lt;output&gt; to guarentee that the catianed data will be in 
        !            61:     the parsing output </span></p>
        !            62:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            63:     <i>$Apache::lonxml::evaluate</i></span><span
        !            64: style='color:black'> - controls whether run::evaluate actually derefences variable 
        !            65:     references </span></p>
        !            66:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            67:     <i>%Apache::lonxml::insertlist</i></span><span
        !            68: style='color:black'> - data structure for edit mode, determines what tags can 
        !            69:     go into what other tags </span></p>
        !            70:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            71:     <i>@Apache::lonxml::namespace</i></span><span
        !            72: style='color:black'> - stores the list of tag namespaces used in the insertlist.tab 
        !            73:     file that are currently active, used only in edit mode. </span></p>
        !            74:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            75:     <i>$Apache::lonxml::registered</i></span><span
        !            76: style='color:black'> - set to 1 once the remote has been updated to know what 
        !            77:     resource we are looking at. </span></p>
        !            78:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            79:     <i>$Apache::lonxml::request</i></span><span
        !            80: style='color:black'> - current Apache request object, or undef </span></p>
        !            81:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            82:     <i>$Apache::lonxml::curdepth</i></span><span
        !            83: style='color:black'> - current depth of the overall parse depth. Will be a string 
        !            84:     like: 2_3_1 (first tag in the third second level tag in the second toplevel 
        !            85:     tag). It gets set by callsub, and can be used in Perl tag implementations. 
        !            86:     It relies upon the internal globals: <i>@Apache::lonxml::depthcounter</i></span><span
        !            87: style='color:black'>, <i>$Apache::lonxml::depth</i></span><span
        !            88: style='color:black'>, <i>$Apache::lonxml::olddepth</i></span><span
        !            89: style='color:black'> </span></p>
        !            90:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !            91:     <i>$Apache::lonxml::prevent_entity_encode</i></span><span
        !            92: style='color:black'> - By default the xmlparser will try to rencode any 8-bit 
        !            93:     characters into HTMLEntity Codes, If this is set to a true value it will be 
        !            94:     prevented. </span></p>
        !            95:   <p><span style='color:black'>In common usage, <i>$Apache::lonxml::prevent_entity_encode</i></span><span
        !            96: style='color:black'>, <i>$Apache::lonxml::evaluate</i></span><span
        !            97: style='color:black'>, <i>$Apache::lonxml::metamode</i></span><span
        !            98: style='color:black'>, <i>$Apache::lonxml::import</i></span><span
        !            99: style='color:black'>, should never be set to a value directly, but rather incremented 
        !           100:     when you want the effect on, and decremented when you want the effect off. 
        !           101:     </span></p>
        !           102:   <p><span style='color:black'><b>Notable Perl subroutines</b></span></p>
        !           103:   <p><span style='color:black'>If not specified these functions are in Apache::lonxml 
        !           104:     </span></p>
        !           105:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           106:     <i>xmlparse</i></span><span
        !           107: style='color:black'> - see the XMLPARSE figure - also not callable from inside 
        !           108:     a tag, if one needs to restart parsing, either create add a new LCParser to 
        !           109:     the parser stack parser using the newparser function, or call inner_xmlparser, 
        !           110:     see the xmlparse function in scripttag.pm </span></p>
        !           111:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           112:     <i>recurse</i></span><span
        !           113: style='color:black'> - acts just like <i>xmlparse</i></span><span
        !           114: style='color:black'>, except it doesn't do the style definition check it always 
        !           115:     calls <i>callsub</i></span><span style='color:black'> </span></p>
        !           116:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           117:     <i>callsub</i></span><span
        !           118: style='color:black'> - callsub looks if a perl subroutine is defined for the current 
        !           119:     tag and calls. Otherwise it just returns the tag as it was read in. It also 
        !           120:     will throw on a default editing interface unless the tag has a defined subroutine 
        !           121:     that either returns something or requests that call sub not add the editing 
        !           122:     interface. </span></p>
        !           123:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           124:     <i>afterburn</i></span><span
        !           125: style='color:black'> - called on the output of xmlparse, it can add highlights, 
        !           126:     anchors, and links to regular expersion matches to the output. </span></p>
        !           127:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           128:     <i>register_insert</i></span><span
        !           129: style='color:black'> - builds the %Apache::lonxml::insertlist structure of what 
        !           130:     tags can have what other tags inside. </span></p>
        !           131:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           132:     <i>whichuser</i></span><span
        !           133: style='color:black'> - returns a list of $symb, $courseid, $domain, $name that 
        !           134:     is correct for calls to lonnet functions for this setup. Uses form.grade_ 
        !           135:     parameters, if the user is allowed to mgr in the course </span></p>
        !           136:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           137:     <i>setup_globals</i></span><span
        !           138: style='color:black'> - initializes all lonxml globals when xmlparse is called. 
        !           139:     If you intend to create a new target you will likely need to tweak how the 
        !           140:     globals are setup upon start up. </span></p>
        !           141:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           142:     <i>init_safespace</i></span><span
        !           143: style='color:black'> - creates Holes to external functions, creates some global 
        !           144:     variables, and set the permitted operators of the global Safespace intepreter. 
        !           145:     </span></p>
        !           146:   <p><span style='color:black'><b>Functions Tag Handlers can use</b></span></p>
        !           147:   <p><span style='color:black'>If not specified these functions are in Apache::lonxml 
        !           148:     </span></p>
        !           149:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           150:     <i>debug</i></span><span
        !           151: style='color:black'> - a function to call to printout debugging messages. Will 
        !           152:     only print when Apache::lonxml::debug is set to 1 </span></p>
        !           153:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           154:     <i>warning</i></span><span
        !           155: style='color:black'> - a function to use for warning messages. The message will 
        !           156:     appear at the top of a resource when it is viewed in construction space only. 
        !           157:     </span></p>
        !           158:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           159:     <i>error</i></span><span
        !           160: style='color:black'> - a function to use for error messages. The message will 
        !           161:     appear at the top of a resource when it is viewed in construction space, and 
        !           162:     will message the resource author and course instructor, while informing the 
        !           163:     student that an error has occured otherwise. </span></p>
        !           164:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           165:     <i>get_all_text</i></span><span
        !           166: style='color:black'> - 2 args, tag to look for (need to use /tag to look for an 
        !           167:     end tag) and a HTML::TokeParser reference, it will repedelyt get text from 
        !           168:     the TokeParser until the requested tag is found. It will return all of the 
        !           169:     document it pulled form the TokeParser. (See Apache::scripttag::start_script 
        !           170:     for an example of usage.) </span></p>
        !           171:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           172:     <i>get_param</i></span><span
        !           173: style='color:black'> - 4 arguments, first is a scaler sting of the argument needed, 
        !           174:     second is a reference to the parser arguments stack, third is a reference 
        !           175:     to the Safe space, and fourth is an optional &quot;context&quot; value. This 
        !           176:     subroutine allows a tag to get a tag argument, after being interpolated inside 
        !           177:     the Safe space. This should be used if the tag might use a safe space variable 
        !           178:     reference for the tag argument. (See Apache::scripttag::start_script for an 
        !           179:     example.) This version only handles scalar variables. </span></p>
        !           180:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           181:     <i>get_param_var</i></span><span
        !           182: style='color:black'> - 4 arguments, first is a scaler sting of the argument needed, 
        !           183:     second is a reference to the parser arguments stack, third is a reference 
        !           184:     to the Safe space, and fourth is an optional &quot;context&quot; value. This 
        !           185:     subroutine allows a tag to get a tag argument, after being interpolated inside 
        !           186:     the Safe space. This should be used if the tag might use a safe space variable 
        !           187:     reference for the tag argument. (See Apache::scripttag::start_script for an 
        !           188:     example.) This version can handle list or hash variables properly. </span></p>
        !           189:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           190:     <i>description</i></span><span
        !           191: style='color:black'> - 1 argument, the token object. This will return the textual 
        !           192:     decription of the current tag from the insertlist.tab file. </span></p>
        !           193:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           194:     <i>whichuser</i></span><span
        !           195: style='color:black'> - 0 arguments. This will take a look at the current environment 
        !           196:     setting and return the current $symb, $courseid, $udom, $uname. You should 
        !           197:     always use this function if you want to determine who the current user is. 
        !           198:     (Since a instructor might be trying to view a students version of a resource.) 
        !           199:     </span></p>
        !           200:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           201:     <i>inner_xmlparse</i></span><span
        !           202: style='color:black'> - 6 arguments, the target, an array pointer to the current 
        !           203:     stack of tags, and array pointer to the current stack of tag arguments, an 
        !           204:     array pointer to the current stack of LCParser's, a pointer to the current 
        !           205:     Safe space, a pointer to the hash of current style definitions </span></p>
        !           206:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           207:     <i>newparser</i></span><span
        !           208: style='color:black'> - 3 args, first is a reference to the parser stack, second 
        !           209:     should be a reference to a string scaler containg the text the newparser should 
        !           210:     run over, third should be a scaler of the directory path the file the parser 
        !           211:     is parsing was in. (See Apache::scripttag::start_import for an example.) </span></p>
        !           212:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           213:     <i>register</i></span><span
        !           214: style='color:black'> - should be called in a file's BEGIN block. 2 arguments, 
        !           215:     a scaler string, and a list of strings. This allows a file to register what 
        !           216:     tags it handles, and what the namespace of those tags are. Example: </span></p>
        !           217:   <p><span style='font-family:"Courier New";color:black'>sub BEGIN {</span></p>
        !           218:   <p><span style='font-family:"Courier New";color:black'>&nbsp; &amp;Apache::lonxml::register('Apache::scripttag',('script','display'));</span></p>
        !           219:   <p><span style='font-family:"Courier New";color:black'>}</span></p>
        !           220:   <p><span style='color:black'>Would tell xmlparse that in Apache::scripttag it 
        !           221:     can find handlers for &lt;script&gt; and &lt;display&gt;, if one regsiters 
        !           222:     a tag that was already registered the previous one is remembered and will 
        !           223:     be restored on a deregister. </span></p>
        !           224:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           225:     <i>deregister</i></span><span
        !           226: style='color:black'> - used to remove a previously registered tag implementation. 
        !           227:     It will restore the previous registration if there was one. </span></p>
        !           228:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           229:     <i>startredirection</i></span><span
        !           230: style='color:black'> - used when a tag wants to save a portion of the document 
        !           231:     for its end tag to use, but wants the intervening document to be normally 
        !           232:     processed. (See Apache::scripttag::start_window for an example.) </span></p>
        !           233:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           234:     <i>endredirection</i></span><span
        !           235: style='color:black'> - used to stop preventing xmlparse from hiding output. The 
        !           236:     return value is everthing that xmlparse has processed since the corresponding 
        !           237:     startredirection. (See Apache::scripttag::end_window for an example.) </span></p>
        !           238:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           239:     <i>Apache::run::evaluate</i></span><span
        !           240: style='color:black'> - 3 args, first a string, second a reference to the Safe 
        !           241:     space, 3 a string to be evaluated before the first arg. This subroutine will 
        !           242:     do variable interpolation and simple function interpolations on the first 
        !           243:     argument. (See Apache::lonxml::inner_xmlparse for an example.) </span></p>
        !           244:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           245:     <i>Apache::run::run</i></span><span
        !           246: style='color:black'> - 2 args, first a string, second a reference to the Safe 
        !           247:     space. This handles passing the passed string into the Safe space for evaluation 
        !           248:     and then returns the result. (See Apache::scripttag::start_script for an example.)</span></p>
        !           249:   <h3><a name="_Toc421867122">Style Files</a></h3>
        !           250:   <p><span style='color:black'> <img width=432 height=255
        !           251: src="Session%20Fou1_files/image002.jpg" v:shapes="_x0000_i1025"> </span></p>
        !           252:   <p><span style='font-size:14.0pt;color:black'><b>Fig. 2.4.1</b></span><span
        !           253: style='font-size:14.0pt;color:black'> Ð Using a style file</span></p>
        !           254:   <p><span style='color:black'><b>Style File specific tags</b></span></p>
        !           255:   <p><span style='color:black'><b>&lt;definetag&gt;</b></span><span
        !           256: style='color:black'> - 2 arguments, <i>name</i></span><span style='color:black'> 
        !           257:     name of new tag being defined, if proceeded with a / defining an end tag, 
        !           258:     required; <i>parms</i></span><span style='color:black'> parameters of the 
        !           259:     new tag, the value of these parameters can be accesed by $parametername. </span></p>
        !           260:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           261:     <b>&lt;render&gt;</b></span><span
        !           262: style='color:black'> - define what the new tag does for a non meta target </span></p>
        !           263:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           264:     <b>&lt;meta&gt;</b></span><span
        !           265: style='color:black'> - define what the new tag does for a meta target </span></p>
        !           266:   <p><span style='color:black'>*&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 
        !           267:     <b>&lt;tex&gt; / &lt;web&gt; / &lt;latexsource&gt;</b></span><span style='color:black'> 
        !           268:     - define what a new tag does for a specific no meta target, all data inside 
        !           269:     a &lt;render&gt; is render to all targets except when surrounded by a specific 
        !           270:     target tags.</span><span style='font-size:16.0pt;color:black'> </span></p>
        !           271:   <p class=MsoHeader> <img width=432 height=243
        !           272: src="Session%20Fou1_files/image005.png" v:shapes="_x0000_i1026"> </p>
        !           273:   <p><span style='font-size:14.0pt'><b>Fig. 2.4.2</b></span><span
        !           274: style='font-size:14.0pt'> Ð The parser</span></p>
        !           275:   <h3><a name="_Toc421867123">HTML::LCParser - Alternative HTML::Parser interface</a></h3>
        !           276:   <p class=MsoPlainText>SYNOPSIS</p>
        !           277:   <p class=MsoPlainText>&nbsp;require HTML::LCParser;</p>
        !           278:   <p class=MsoPlainText>&nbsp;$p = HTML::LCParser-&gt;new(&quot;index.html&quot;) 
        !           279:     || die &quot;Can't open: $!&quot;;</p>
        !           280:   <p class=MsoPlainText>&nbsp;while (my $token = $p-&gt;get_token) {</p>
        !           281:   <p class=MsoPlainText>&nbsp;&nbsp;&nbsp;&nbsp; #...</p>
        !           282:   <p class=MsoPlainText>&nbsp;}</p>
        !           283:   <p class=MsoPlainText>DESCRIPTION</p>
        !           284:   <p class=MsoPlainText>The C&lt;HTML::LCParser&gt; is an alternative interface 
        !           285:     to the</p>
        !           286:   <p class=MsoPlainText>C&lt;HTML::Parser&gt; class.&nbsp; It is an C&lt;HTML::PullParser&gt; 
        !           287:     subclass.</p>
        !           288:   <p class=MsoPlainText>The following methods are available:</p>
        !           289:   <p class=MsoPlainText>* $p = HTML::LCParser-&gt;new( $file_or_doc );</p>
        !           290:   <p class=MsoPlainText>The object constructor argument is either a file name, 
        !           291:     a file handle</p>
        !           292:   <p class=MsoPlainText>object, or the complete document to be parsed.</p>
        !           293:   <p class=MsoPlainText>If the argument is a plain scalar, then it is taken as 
        !           294:     the name of a</p>
        !           295:   <p class=MsoPlainText>file to be opened and parsed.&nbsp; If the file can't 
        !           296:     be opened for</p>
        !           297:   <p class=MsoPlainText>reading, then the constructor will return an undefined 
        !           298:     value and $!</p>
        !           299:   <p class=MsoPlainText>will tell you why it failed.</p>
        !           300:   <p class=MsoPlainText>If the argument is a reference to a plain scalar, then 
        !           301:     this scalar is</p>
        !           302:   <p class=MsoPlainText>taken to be the literal document to parse.&nbsp; The value 
        !           303:     of this</p>
        !           304:   <p class=MsoPlainText>scalar should not be changed before all tokens have been 
        !           305:     extracted.</p>
        !           306:   <p class=MsoPlainText>Otherwise the argument is taken to be some object that 
        !           307:     the</p>
        !           308:   <p class=MsoPlainText>C&lt;HTML::LCParser&gt; can read() from when it needs 
        !           309:     more data.&nbsp; Typically</p>
        !           310:   <p class=MsoPlainText>it will be a filehandle of some kind.&nbsp; The stream 
        !           311:     will be read() until</p>
        !           312:   <p class=MsoPlainText>EOF, but not closed.</p>
        !           313:   <p class=MsoPlainText>It also will turn attr_encoded on by default.</p>
        !           314:   <p class=MsoPlainText>* $p-&gt;get_token</p>
        !           315:   <p class=MsoPlainText>This method will return the next I&lt;token&gt; found 
        !           316:     in the HTML document,</p>
        !           317:   <p class=MsoPlainText>or C&lt;undef&gt; at the end of the document.&nbsp; The 
        !           318:     token is returned as an</p>
        !           319:   <p class=MsoPlainText>array reference.&nbsp; The first element of the array 
        !           320:     will be a (mostly)</p>
        !           321:   <p class=MsoPlainText>single character string denoting the type of this token: 
        !           322:     &quot;S&quot; for start</p>
        !           323:   <p class=MsoPlainText>tag, &quot;E&quot; for end tag, &quot;T&quot; for text, 
        !           324:     &quot;C&quot; for comment, &quot;D&quot; for</p>
        !           325:   <p class=MsoPlainText>declaration, and &quot;PI&quot; for process instructions.&nbsp; 
        !           326:     The rest of the array</p>
        !           327:   <p class=MsoPlainText>is the same as the arguments passed to the corresponding 
        !           328:     HTML::Parser</p>
        !           329:   <p class=MsoPlainText>v2 compatible callbacks (see L&lt;HTML::Parser&gt;).&nbsp; 
        !           330:     In summary, returned</p>
        !           331:   <p class=MsoPlainText>tokens look like this:</p>
        !           332:   <p class=MsoPlainText>&nbsp; [&quot;S&quot;,&nbsp; $tag, $attr, $attrseq, $text, 
        !           333:     $line]</p>
        !           334:   <p class=MsoPlainText>&nbsp; [&quot;E&quot;,&nbsp; $tag, $text, $line]</p>
        !           335:   <p class=MsoPlainText>&nbsp; [&quot;T&quot;,&nbsp; $text, $is_data, $line]</p>
        !           336:   <p class=MsoPlainText>&nbsp; [&quot;C&quot;,&nbsp; $text, $line]</p>
        !           337:   <p class=MsoPlainText>&nbsp; [&quot;D&quot;,&nbsp; $text, $line]</p>
        !           338:   <p class=MsoPlainText>&nbsp; [&quot;PI&quot;, $token0, $text, $line]</p>
        !           339:   <p class=MsoPlainText>where $attr is a hash reference, $attrseq is an array 
        !           340:     reference and</p>
        !           341:   <p class=MsoPlainText>the rest are plain scalars.</p>
        !           342:   <p class=MsoPlainText>* $p-&gt;unget_token($token,...)</p>
        !           343:   <p class=MsoPlainText>If you find out you have read too many tokens you can 
        !           344:     push them back,</p>
        !           345:   <p class=MsoPlainText>so that they are returned the next time $p-&gt;get_token 
        !           346:     is called.</p>
        !           347:   <p class=MsoPlainText>* $p-&gt;get_tag( [$tag, ...] )</p>
        !           348:   <p class=MsoPlainText>This method returns the next start or end tag (skipping 
        !           349:     any other</p>
        !           350:   <p class=MsoPlainText>tokens), or C&lt;undef&gt; if there are no more tags in 
        !           351:     the document.&nbsp; If</p>
        !           352:   <p class=MsoPlainText>one or more arguments are given, then we skip tokens until 
        !           353:     one of the</p>
        !           354:   <p class=MsoPlainText>specified tag types is found.&nbsp; For example:</p>
        !           355:   <p class=MsoPlainText>&nbsp;&nbsp; $p-&gt;get_tag(&quot;font&quot;, &quot;/font&quot;);</p>
        !           356:   <p class=MsoPlainText>will find the next start or end tag for a font-element.</p>
        !           357:   <p class=MsoPlainText>The tag information is returned as an array reference 
        !           358:     in the same form</p>
        !           359:   <p class=MsoPlainText>as for $p-&gt;get_token above, but the type code (first 
        !           360:     element) is</p>
        !           361:   <p class=MsoPlainText>missing. A start tag will be returned like this:</p>
        !           362:   <p class=MsoPlainText>&nbsp; [$tag, $attr, $attrseq, $text]</p>
        !           363:   <p class=MsoPlainText>The tagname of end tags are prefixed with &quot;/&quot;, 
        !           364:     i.e. end tag is</p>
        !           365:   <p class=MsoPlainText>returned like this:</p>
        !           366:   <p class=MsoPlainText>&nbsp; [&quot;/$tag&quot;, $text]</p>
        !           367:   <p class=MsoPlainText>* $p-&gt;get_text( [$endtag] )</p>
        !           368:   <p class=MsoPlainText>This method returns all text found at the current position. 
        !           369:     It will</p>
        !           370:   <p class=MsoPlainText>return a zero length string if the next token is not text.&nbsp; 
        !           371:     The</p>
        !           372:   <p class=MsoPlainText>optional $endtag argument specifies that any text occurring 
        !           373:     before the</p>
        !           374:   <p class=MsoPlainText>given tag is to be returned. All entities are unmodified.</p>
        !           375:   <p class=MsoPlainText>The $p-&gt;{textify} attribute is a hash that defines 
        !           376:     how certain tags can</p>
        !           377:   <p class=MsoPlainText>be treated as text.&nbsp; If the name of a start tag matches 
        !           378:     a key in this</p>
        !           379:   <p class=MsoPlainText>hash then this tag is converted to text.&nbsp; The hash 
        !           380:     value is used to</p>
        !           381:   <p class=MsoPlainText>specify which tag attribute to obtain the text from.&nbsp; 
        !           382:     If this tag</p>
        !           383:   <p class=MsoPlainText>attribute is missing, then the upper case name of the 
        !           384:     tag enclosed in</p>
        !           385:   <p class=MsoPlainText>brackets is returned, e.g. &quot;[IMG]&quot;.&nbsp; The 
        !           386:     hash value can also be a</p>
        !           387:   <p class=MsoPlainText>subroutine reference.&nbsp; In this case the routine is 
        !           388:     called with the</p>
        !           389:   <p class=MsoPlainText>start tag token content as its argument and the return 
        !           390:     value is treated</p>
        !           391:   <p class=MsoPlainText>as the text.</p>
        !           392:   <p class=MsoPlainText>The default $p-&gt;{textify} value is:</p>
        !           393:   <p class=MsoPlainText>&nbsp; {img =&gt; &quot;alt&quot;, applet =&gt; &quot;alt&quot;}</p>
        !           394:   <p class=MsoPlainText>This means that &lt;IMG&gt; and &lt;APPLET&gt; tags are 
        !           395:     treated as text, and that</p>
        !           396:   <p class=MsoPlainText>the text to substitute can be found in the ALT attribute.</p>
        !           397:   <p class=MsoPlainText>* $p-&gt;get_trimmed_text( [$endtag] )</p>
        !           398:   <p class=MsoPlainText>Same as $p-&gt;get_text above, but will collapse any sequences 
        !           399:     of white</p>
        !           400:   <p class=MsoPlainText>space to a single space character.&nbsp; Leading and trailing 
        !           401:     white space is</p>
        !           402:   <p class=MsoPlainText>removed.</p>
        !           403:   <p class=MsoPlainText>EXAMPLES</p>
        !           404:   <p class=MsoPlainText>This example extracts all links from a document.&nbsp; 
        !           405:     It will print one</p>
        !           406:   <p class=MsoPlainText>line for each link, containing the URL and the textual 
        !           407:     description</p>
        !           408:   <p class=MsoPlainText>between the &lt;A&gt;...&lt;/A&gt; tags:</p>
        !           409:   <p class=MsoPlainText>&nbsp; use HTML::LCParser;</p>
        !           410:   <p class=MsoPlainText>&nbsp; $p = HTML::LCParser-&gt;new(shift||&quot;index.html&quot;);</p>
        !           411:   <p class=MsoPlainText>&nbsp; while (my $token = $p-&gt;get_tag(&quot;a&quot;)) 
        !           412:     {</p>
        !           413:   <p class=MsoPlainText>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; my $url = $token-&gt;[1]{href} 
        !           414:     || &quot;-&quot;;</p>
        !           415:   <p class=MsoPlainText>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; my $text = $p-&gt;get_trimmed_text(&quot;/a&quot;);</p>
        !           416:   <p class=MsoPlainText>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; print &quot;$url\t$text\n&quot;;</p>
        !           417:   <p class=MsoPlainText>&nbsp; }</p>
        !           418:   <p class=MsoPlainText>This example extract the &lt;TITLE&gt; from the document:</p>
        !           419:   <p class=MsoPlainText>&nbsp; use HTML::LCParser;</p>
        !           420:   <p class=MsoPlainText>&nbsp; $p = HTML::LCParser-&gt;new(shift||&quot;index.html&quot;);</p>
        !           421:   <p class=MsoPlainText>&nbsp; if ($p-&gt;get_tag(&quot;title&quot;)) {</p>
        !           422:   <p class=MsoPlainText>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; my $title = $p-&gt;get_trimmed_text;</p>
        !           423:   <p class=MsoPlainText>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; print &quot;Title: $title\n&quot;;</p>
        !           424:   <p class=MsoPlainText>&nbsp; }</p>
        !           425: </div>
        !           426: <br
        !           427: clear=ALL style='page-break-before:always;'>
        !           428: <div class=Section2> </div>
        !           429: </body>
        !           430: </html>

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>