### Annotation of doc/help/texxml2latex.pl, revision 1.12

1.1       bowersj2    1: #!/usr/bin/perl
2:
1.2       bowersj2    3: # The LearningOnline Network with CAPA
4: # Converts a texxml file into a single tex file
5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23: #
25: #
26: # http://www.lon-capa.org/
27: #
28: # 7-16-2002 Jeremy Bowers
29:
1.1       bowersj2   30: use strict;
31: use HTML::TokeParser;
32: use GDBM_File;
1.5       bowersj2   33: use File::Temp;
1.1       bowersj2   34:
35: # accept texxml document on standard in
36: my $p = HTML::TokeParser->new($ARGV[0] );
1.4       albertel   37: my $dirprefix = "../../loncom/html/adm/help/tex/"; 1.1 bowersj2 38: 1.10 albertel 39: my$include_filenames = ($ARGV[1] eq '--with-filenames'); 1.5 bowersj2 40: # Make myself a temp dir for processing POD 41: my$tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
42:
1.1       bowersj2   43: # Print the header
44: open (LATEX_FILE, $dirprefix . "Latex_Header.tex"); 45: print <LATEX_FILE>; 46: 1.11 albertel 47: sub escape_latex { 48: my ($string)=@_;
49:     $string=~s/\\/\\ensuremath{\\backslash}/g; 50:$string=~s/([^\\]|^)\%/$1\\\%/g; 51:$string=~s/([^\\]|^)\$/$1\\\$/g; 52:$string=~s/([^\\])\_/$1\\_/g; 53:$string=~s/\$\$/\$\\\$/g;
54:     $string=~s/\_\_/\_\\\_/g; 55:$string=~s/\#\#/\#\\\#/g;
56:     $string=~s/([^\\]|^)(\~|\^)/$1\\$2\\strut /g; 57:$string=~s/(>|<)/\\ensuremath\{$1\}/g; #more or less 58: #$string=&Apache::lonprintout::character_chart($string); 59: # any & or # leftover should be safe to just escape 60:$string=~s/([^\\]|^)\&/$1\\\&/g; 61:$string=~s/([^\\]|^)\#/$1\\\#/g; 62:$string=~s/\|/\$\\mid\$/g;
63:     return $string; 64: } 65: 1.1 bowersj2 66: while (my$token = $p->get_token()) 67: { 68: my$type = $token->[0]; 1.5 bowersj2 69: if ($type eq 'S') {
1.1       bowersj2   70: 	my $tag =$token->[1];
71: 	my $attr =$token->[2];
1.5       bowersj2   72: 	if ($tag eq 'section') { 1.1 bowersj2 73: my$title = $attr->{'name'}; 1.11 albertel 74: print "\\section{".&escape_latex($title)."}\n\n";
1.1       bowersj2   75: 	}
76:
1.5       bowersj2   77: 	if ($tag eq 'subsection') { 1.1 bowersj2 78: my$title = $attr->{'name'}; 1.12 ! www 79: print "\\subsection{".&escape_latex($title)."}\n\n";
1.1       bowersj2   80: 	}
81:
1.5       bowersj2   82: 	if ($tag eq 'subsubsection') { 1.1 bowersj2 83: my$title = $attr->{'name'}; 1.11 albertel 84: print "\\subsubsection{".&escape_latex($title)."}\n\n";
1.1       bowersj2   85: 	}
86:
1.5       bowersj2   87: 	if ($tag eq 'file') { 1.1 bowersj2 88: my$file = $attr->{'name'}; 1.9 bowersj2 89: open (LATEX_FILE,$dirprefix . $file) or 90: ($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build.");
1.10      albertel   91: 	    if ($include_filenames) { 1.11 albertel 92: print "\\textrm{File: \\bf ".&escape_latex($file)."}\\\\\n";
1.10      albertel   93: 	    }
1.1       bowersj2   94: 	    print <LATEX_FILE>;
1.3       bowersj2   95: 	    print "\n\n";
1.1       bowersj2   96: 	}
97:
1.5       bowersj2   98: 	if ($tag eq 'tex') { 1.3 bowersj2 99: print "\n\n"; 1.1 bowersj2 100: print$attr->{'content'};
1.3       bowersj2  101: 	    print "\n\n";
1.1       bowersj2  102: 	}
1.5       bowersj2  103:
104: 	if ($tag eq 'pod') { 105: my$file = $attr->{'file'}; 1.8 bowersj2 106: my$section = $attr->{'section'}; 1.5 bowersj2 107: if (!defined($section)) { $section = ''; } 1.6 bowersj2 108: else { 1.8 bowersj2 109:$section = "-section '$section'"; 1.6 bowersj2 110: } 1.8 bowersj2 111: my$h1level = $attr->{'h1level'}; 112: if (!defined($h1level)) { $h1level = '2'; } 1.5 bowersj2 113:$file = '../../loncom/' . $file; 1.8 bowersj2 114: my$filename = substr($file, rindex($file, '/') + 1);
115: 	    system ("cp $file$tmpdir\n");
1.9       bowersj2  116: 	    my $latexFile; 117: if (index($filename, '.') == -1) {
118: 		# pod2latex *insists* that either the extension of the
119: 		# file be .pl|.pm|.pod or that it be executable. Some
120: 		# extension-less files like "lonsql' are none-of-the-above.
121: 		system ("cd $tmpdir; mv$filename $filename.pm"); 122:$filename .= ".pm";
123: 		print STDERR $filename . "\n"; 124: } 1.8 bowersj2 125: system ("cd$tmpdir; pod2latex -h1level $h1level$section $filename\n"); 1.9 bowersj2 126:$latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
127: 	    open LATEX_FILE, $tmpdir . '/' .$latexFile or
128: 		($! = 1, die "Latex file$latexFile not found while trying to use pod2latex, ".
129: 		 "terminating build");
1.7       bowersj2  130: 	    # pod2latex inserts \labels and \indexs for every section,
131: 	    # which is horrible because the section names tend to get
132: 	    # reused a lot. This filters those out, so we need to do
133: 	    # create our own indexes.
134: 	    for (<LATEX_FILE>) {
1.8       bowersj2  135: 		$_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g; 1.7 bowersj2 136: print$_;
137: 	    }
1.5       bowersj2  138: 	    print "\n\n";
139: 	}
1.1       bowersj2  140:     }
141: }
142:
143: # Print out the footer.
144: open (LATEX_FILE, $dirprefix . "Latex_Footer.tex"); 145: print <LATEX_FILE>; 1.5 bowersj2 146: 147: # Remove the temp directory 148: system ("rm -rf$tmpdir");
1.8       bowersj2  149:
150: __END__
151:
152: =pod
153:
155:
156: texxml2latex.pl - core script that drives the help file assembly
157:   applications
158:
160:
161: LON-CAPA's help system is based on assembling various pieces into
162: LaTeX files for conversion into printed documents. The various pieces
164:
166:
167: X<help system, overview>LON-CAPA's help system is based on the idea of
168: assembling various pieces as needed to create documents for printing,
169: and using these various pieces for online help. LaTeX is the primary
170: language of the help system, because we can easily convert it to HTML,
171: and it makes the nicest printed documents.
172:
173: The scripts for the help system are stored in /docs/help in the CVS
174: repository.
175:
177:
178: The help system can draw from the following sources to create help
179: documents:
180:
181: =over 4
182:
183: =item * B<LaTeX fragments>: LaTeX fragments stored in
184: C</loncom/html/adm/help/tex> in the CVS repository (which end up in
185: C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
186: contains LaTeX-style markup, but is not a complete LaTeX file with
188:
189: =item * B<perl POD documentation>: POD documentation may be extracted
190: from perl modules used in LON-CAPA, using the syntax described in
191: podselect's man page.
192:
193: =back
194:
196:
197: The online aspect of the help system is covered in the documentation
198: for loncommon.pm; see L<Apache::loncommon>, look for
199: C<help_open_topic>.
200:
202:
204: rendering the help files structure in a way that allows the user to
205: click through to the underlying help files; see
207: example. It's not very good, but it's marginally better then nothing.
208:
210:
211: Offline documents are generated from XML documents which tell a
212: rendering script how to assemble the various LaTeX fragments into a
213: single LaTeX file, which is then rendered into PostScript and PDF
215:
216: =head1 texxml And Rendering texxml
217:
219:
220: X<texxml>
221: texxml is a little XML file format used to specify to the texxml2*.pl
222: scripts how to assemble the input sources into LaTeX documents. texxml
223: files end in the .texxml extension, and there is one texxml file per
224: final rendered document.
225:
226: The texxml format is as follows: There is a root <texxml> element,
227: with no attributes and the following children:
228:
229: =over 4
230:
231: =item * B<title>: The B<name> attribute of this tag is used as the
232:    title of the document in texxml2index.pl; it is ignored in
233:    texxml2latex.pl. If you don't intend to offer online-access
234:    to the rendered documents this may be skipped.
235:
236: =item * B<section>, B<subsection>, and B<subsubsection>: These create
237:    the corresponding environments in the output file. The B<name>
238:    attribute is used to determine the name of the section.
239:
240: =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
241:    filename. The file is assumed to be located in the
242:    C<loncom/html/adm/help/tex/> directory in the CVS repository. The
243:    C<.tex> is required.
244:
245: =item * B<tex>: The contents of the B<content> attribute are directly
246:    inserted into the rendered LaTeX file, followed by a paragraph
247:    break. This is generally used for little connective paragraphs in
248:    the documentation that don't make sense in the online help. See
249:    C<author.manual.texxml> for several example usages.
250:
251: =item * B<pod>: The B<file> attribute specified a file to draw the POD
252:    documentation out of. The B<section> attribute is a section
253:    specification matching the format specified in the man page of
254:    podselect. By default, all POD will be included. The file is
255:    assumed to be relative to the C<loncom> directory in the CVS
256:    repository; you are allowed to escape from that with .. if
257:    necessary. The B<h1level> attribute can be used to change
258:    the default depth of the headings; by default, this is set to 2,
259:    which makes =head1 a "subsection". Setting this higher can allow
260:    you to bundle several related pod files together; see
261:    developer.manual.texxml for examples.
262:
263: =back
264:
265: texxml2latex.pl will automatically include C<Latex_Header.tex> at the
266: beginning and C<Latex_Footer.tex> at the end, to make a complete
267: document LaTeX document.
268:
1.9       bowersj2  269: =head2 Rendering texxml
1.8       bowersj2  270:
1.8       bowersj2  272:
1.9       bowersj2  273: X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script
274: takes a .texxml file, and produces PostScript and PDF files. The LaTeX
276: C</loncom/html/adm/help/eps/> directory while rendering. Call it as
277: follows, from the C<doc/help> directory:
1.8       bowersj2  278:
279:  perl render.texxml.pl -- author.manual.texxml
280:
281: substituting the appropriate texxml file.
282:
1.8       bowersj2  284:
1.9       bowersj2  285: X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles
1.8       bowersj2  286: the final LaTeX file, outputting it on stout. Invoke it as follows:
287:
288:  perl texxml2latex.pl author.manual.texx
289:
290: Note that there is no error handling; if the script can not find a
291: .tex file, it is simply ignored. Generally, if a file is not in the
292: final render, it either could not be found, or you do not have
293: sufficient permissions with the current user to read it.
294:
1.8       bowersj2  296:
1.9       bowersj2  297: X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a
1.8       bowersj2  298: file that can be used online to access all the .tex files that are
299: specified in the .texxml file. For an example of how this looks
300: online, see
302:
304:
305: There are a couple of scripts that you may find useful for creating
306: texxml-based help:
307:
1.8       bowersj2  309:
1.9       bowersj2  310: X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a
1.8       bowersj2  311: monolithic .tex file into the small pieces LON-CAPA's help system
312: expects. Invoke it like this:
313:
314:  python latexSplitter.py monolithic.tex
315:
316: where C<monolithic.tex> is the .tex file you want to split into
317: pieces. This requires Python 2.1 or greater (2.0 may work); on many
318: modern RedHat installs this is installed by default under the
319: executable name C<python2>.
320:
321: Use the program by highlighting the desired section, give it a file
322: name in the textbox near the bottom, and hit the bottom button. The
323: program will remove that text from the textbox, and create a file in
324: the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
325: consistency, you should use underscores rather then spaces in the
326: filename, and note there are a few naming conventions for the .tex
327: files, which you can see just by listing the
329:
330: The idea behind this program is that if you are writing a big document
331: from scratch, you can use a "real" program like LyX to create the .tex
332: file, then easily split it with this program.
333:
1.8       bowersj2  335:
1.9       bowersj2  336: X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows
1.8       bowersj2  337: all the tex files that went into in sequence, allowing you to "edit"
338: the entire document as one entity. Note this is intended for simple
339: typo corrections and such in context, not major modification of the
340: document. Invoke it with
341:
342:  python simpleEdit.py author.manual.texxml
343:
344: Make your changes, and hit the "Save" button to save them.
345:
346: =head2 texxml LaTeX Feature Support
347:
349:
350: LaTeX has a cross-referencing system build around labeling points in
351: the document with \label, and referencing those labels with \ref. In a
352: complete LaTeX document, there's no problem because all \refs and
354: that is not in the current LaTeX fragment causes a TTH error when it
355: can't find the crossreference.
356:
357: The solution is to do the cross-references for TTH. When LON-CAPA is
358: installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
359: is executed, which extracts all the labels from the LaTeX fragments
360: and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash.
361: The C<lonhelp.pm> handler then replaces \refs with appropriate
362: HTML to provide a link to the referenced help file while online. Thus,
364:
366:
367: LaTeX has a popular index making package called MakeIndex. LON-CAPA's
368: help system supports this, so you can create indices using the \index
369: LaTeX command. In perl POD files, use the X command. Note that in both
1.9       bowersj2  370: cases the index text is not included in the render, so the index must
371: be included in addition to the indexed text, and need not match the
372: indexed text precisely.
1.8       bowersj2  373:
375:
376: Adopting a little bit from everybody who has included POD in their
377: documents to date, the help system is going to expect the following
378: format for POD documentation.
379:
381: as shown). The following paragraph should extremely briefly describe
382: what the module does and contains. Example:
383:
385:
386:  Apache::lonflunkstudent - provides interface to set all
387:    student assessments point score to 0
388:
389: Next should be a C<head1> titled C<SYNOPSIS> which contains a
390: paragraph or two description of the module.
391:
393:
394:  lonflunkstudent provides a handler to select a student and set all
395:  assignment values to zero, thereby flunking the student.
396:
397:  Routines for setting all assessments to some value are provided by
398:  this module, as well as some useful student taunting routines.
399:
400: Optionally, an C<OVERVIEW> section can be included. This can then be
401: extracted by the help system for the LON-CAPA subsystems overview
402: chapter. The overview should be a relatively high-level, but still
403: technical, overview of the module, sufficient to give the reader
404: enough context to understand what the module does, what it might be
405: useful for in other contexts, and what is going on in the code when it
407:
408: The remainder should be formatted as appropriate for the file, such
409: that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
1.9       bowersj2  410: useful API overview of the module. This may be anything from an
411: elaborate discussion of the data structures, algorithms, and design
412: principles that went into the module, or a simple listing of
413: what functions exist, how to call them, and what they return, as
414: appropriate.
1.8       bowersj2  415:
416: Routines that are private to the module should B<not> be documented;
417: document them in perl comments, or, as is the style of the time, not
418: at all, as is appropriate.
419:
420: Method and function names should be bolded when being
1.9       bowersj2  421: documented.
422:
423: Literal string such as filename should be enclosed in
1.8       bowersj2  424: the C command, like this: C</home/httpd/lonTabs/>.
1.9       bowersj2  425:
426: Indexing can be done with the X command in perldoc, and should be used
427: as appropriate. Do not include X commands in the headings, the output
428: from pod2latex screws up some regexes in texxml2latex.pl.
1.8       bowersj2  429:
430: =cut


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>