Annotation of doc/help/texxml2latex.pl, revision 1.12

1.1       bowersj2    1: #!/usr/bin/perl
                      2: 
1.2       bowersj2    3: # The LearningOnline Network with CAPA
                      4: # Converts a texxml file into a single tex file
                      5: #
                      6: # Copyright Michigan State University Board of Trustees
                      7: #
                      8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                      9: #
                     10: # LON-CAPA is free software; you can redistribute it and/or modify
                     11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
                     28: # 7-16-2002 Jeremy Bowers
                     29: 
1.1       bowersj2   30: use strict;
                     31: use HTML::TokeParser;
                     32: use GDBM_File;
1.5       bowersj2   33: use File::Temp;
1.1       bowersj2   34: 
                     35: # accept texxml document on standard in
                     36: my $p = HTML::TokeParser->new( $ARGV[0] );
1.4       albertel   37: my $dirprefix = "../../loncom/html/adm/help/tex/";
1.1       bowersj2   38: 
1.10      albertel   39: my $include_filenames = ($ARGV[1] eq '--with-filenames');
1.5       bowersj2   40: # Make myself a temp dir for processing POD
                     41: my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
                     42: 
1.1       bowersj2   43: # Print the header
                     44: open (LATEX_FILE, $dirprefix . "Latex_Header.tex");
                     45: print <LATEX_FILE>;
                     46: 
1.11      albertel   47: sub escape_latex {
                     48:     my ($string)=@_;
                     49:     $string=~s/\\/\\ensuremath{\\backslash}/g;
                     50:     $string=~s/([^\\]|^)\%/$1\\\%/g;
                     51:     $string=~s/([^\\]|^)\$/$1\\\$/g;
                     52:     $string=~s/([^\\])\_/$1\\_/g;
                     53:     $string=~s/\$\$/\$\\\$/g;
                     54:     $string=~s/\_\_/\_\\\_/g;
                     55:     $string=~s/\#\#/\#\\\#/g;
                     56:     $string=~s/([^\\]|^)(\~|\^)/$1\\$2\\strut /g;
                     57:     $string=~s/(>|<)/\\ensuremath\{$1\}/g; #more or less
                     58: #    $string=&Apache::lonprintout::character_chart($string);
                     59:     # any & or # leftover should be safe to just escape
                     60:     $string=~s/([^\\]|^)\&/$1\\\&/g;
                     61:     $string=~s/([^\\]|^)\#/$1\\\#/g;
                     62:     $string=~s/\|/\$\\mid\$/g;
                     63:     return $string;
                     64: }
                     65: 
1.1       bowersj2   66: while (my $token = $p->get_token())
                     67: {
                     68:     my $type = $token->[0];
1.5       bowersj2   69:     if ($type eq 'S') {
1.1       bowersj2   70: 	my $tag = $token->[1];
                     71: 	my $attr = $token->[2];
1.5       bowersj2   72: 	if ($tag eq 'section') {
1.1       bowersj2   73: 	    my $title = $attr->{'name'};
1.11      albertel   74: 	    print "\\section{".&escape_latex($title)."}\n\n";
1.1       bowersj2   75: 	}
                     76: 
1.5       bowersj2   77: 	if ($tag eq 'subsection') {
1.1       bowersj2   78: 	    my $title = $attr->{'name'};
1.12    ! www        79: 	    print "\\subsection{".&escape_latex($title)."}\n\n";
1.1       bowersj2   80: 	}
                     81: 
1.5       bowersj2   82: 	if ($tag eq 'subsubsection') {
1.1       bowersj2   83: 	    my $title = $attr->{'name'};
1.11      albertel   84: 	    print "\\subsubsection{".&escape_latex($title)."}\n\n";
1.1       bowersj2   85: 	}
                     86: 
1.5       bowersj2   87: 	if ($tag eq 'file') {
1.1       bowersj2   88: 	    my $file = $attr->{'name'};
1.9       bowersj2   89: 	    open (LATEX_FILE, $dirprefix . $file) or 
                     90: 		($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build.");
1.10      albertel   91: 	    if ($include_filenames) {
1.11      albertel   92: 		print "\\textrm{File: \\bf ".&escape_latex($file)."}\\\\\n";
1.10      albertel   93: 	    }
1.1       bowersj2   94: 	    print <LATEX_FILE>;
1.3       bowersj2   95: 	    print "\n\n";
1.1       bowersj2   96: 	}
                     97: 
1.5       bowersj2   98: 	if ($tag eq 'tex') {
1.3       bowersj2   99: 	    print "\n\n";
1.1       bowersj2  100: 	    print $attr->{'content'};
1.3       bowersj2  101: 	    print "\n\n";
1.1       bowersj2  102: 	}
1.5       bowersj2  103: 
                    104: 	if ($tag eq 'pod') {
                    105: 	    my $file = $attr->{'file'};
1.8       bowersj2  106: 	    my $section = $attr->{'section'};	    
1.5       bowersj2  107: 	    if (!defined($section)) { $section = ''; }
1.6       bowersj2  108: 	    else { 
1.8       bowersj2  109: 		$section = "-section '$section'";
1.6       bowersj2  110: 	    }
1.8       bowersj2  111: 	    my $h1level = $attr->{'h1level'};
                    112: 	    if (!defined($h1level)) { $h1level = '2'; }
1.5       bowersj2  113: 	    $file = '../../loncom/' . $file;
1.8       bowersj2  114: 	    my $filename = substr($file, rindex($file, '/') + 1);
                    115: 	    system ("cp $file $tmpdir\n");
1.9       bowersj2  116: 	    my $latexFile;
                    117: 	    if (index($filename, '.') == -1) {
                    118: 		# pod2latex *insists* that either the extension of the
                    119: 		# file be .pl|.pm|.pod or that it be executable. Some
                    120: 		# extension-less files like "lonsql' are none-of-the-above.
                    121: 		system ("cd $tmpdir; mv $filename $filename.pm");
                    122: 		$filename .= ".pm";
                    123: 		print STDERR $filename . "\n";
                    124: 	    }
1.8       bowersj2  125: 	    system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n");
1.9       bowersj2  126: 	    $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
                    127: 	    open LATEX_FILE, $tmpdir . '/' . $latexFile or
                    128: 		($! = 1, die "Latex file $latexFile not found while trying to use pod2latex, ".
                    129: 		 "terminating build");
1.7       bowersj2  130: 	    # pod2latex inserts \labels and \indexs for every section,
                    131: 	    # which is horrible because the section names tend to get
                    132: 	    # reused a lot. This filters those out, so we need to do
                    133: 	    # create our own indexes.
                    134: 	    for (<LATEX_FILE>) {
1.8       bowersj2  135: 		$_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g;
1.7       bowersj2  136: 		print $_;
                    137: 	    }
1.5       bowersj2  138: 	    print "\n\n";
                    139: 	}
1.1       bowersj2  140:     }
                    141: }
                    142: 
                    143: # Print out the footer.
                    144: open (LATEX_FILE, $dirprefix . "Latex_Footer.tex");
                    145: print <LATEX_FILE>;
1.5       bowersj2  146: 
                    147: # Remove the temp directory
                    148: system ("rm -rf $tmpdir");
1.8       bowersj2  149: 
                    150: __END__
                    151: 
                    152: =pod
                    153: 
                    154: =head1 NAME
                    155: 
                    156: texxml2latex.pl - core script that drives the help file assembly
                    157:   applications
                    158: 
                    159: =head1 SYNOPSIS
                    160: 
                    161: LON-CAPA's help system is based on assembling various pieces into
                    162: LaTeX files for conversion into printed documents. The various pieces
                    163: can also be used as online help.
                    164: 
                    165: =head1 OVERVIEW
                    166: 
                    167: X<help system, overview>LON-CAPA's help system is based on the idea of
                    168: assembling various pieces as needed to create documents for printing,
                    169: and using these various pieces for online help. LaTeX is the primary
                    170: language of the help system, because we can easily convert it to HTML,
                    171: and it makes the nicest printed documents.
                    172: 
                    173: The scripts for the help system are stored in /docs/help in the CVS
                    174: repository.
                    175: 
                    176: =head2 Data Sources
                    177: 
                    178: The help system can draw from the following sources to create help
                    179: documents:
                    180: 
                    181: =over 4
                    182: 
                    183: =item * B<LaTeX fragments>: LaTeX fragments stored in
                    184: C</loncom/html/adm/help/tex> in the CVS repository (which end up in
                    185: C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
                    186: contains LaTeX-style markup, but is not a complete LaTeX file with
                    187: header and footer.
                    188: 
                    189: =item * B<perl POD documentation>: POD documentation may be extracted
                    190: from perl modules used in LON-CAPA, using the syntax described in
                    191: podselect's man page.
                    192: 
                    193: =back
                    194: 
                    195: =head2 Online Help
                    196: 
                    197: The online aspect of the help system is covered in the documentation
                    198: for loncommon.pm; see L<Apache::loncommon>, look for
                    199: C<help_open_topic>.
                    200: 
                    201: Online help can only come from LaTeX fragments.
                    202: 
                    203: Access to the printed documents is partially provided online by
                    204: rendering the help files structure in a way that allows the user to
                    205: click through to the underlying help files; see 
                    206: L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an
                    207: example. It's not very good, but it's marginally better then nothing.
                    208: 
                    209: =head2 Offline Documents 
                    210: 
                    211: Offline documents are generated from XML documents which tell a
                    212: rendering script how to assemble the various LaTeX fragments into a
                    213: single LaTeX file, which is then rendered into PostScript and PDF
                    214: files, suitable for download and printing. 
                    215: 
                    216: =head1 texxml And Rendering texxml
                    217: 
                    218: =head2 texxml 
                    219: 
                    220: X<texxml>
                    221: texxml is a little XML file format used to specify to the texxml2*.pl
                    222: scripts how to assemble the input sources into LaTeX documents. texxml
                    223: files end in the .texxml extension, and there is one texxml file per
                    224: final rendered document.
                    225: 
                    226: The texxml format is as follows: There is a root <texxml> element,
                    227: with no attributes and the following children:
                    228: 
                    229: =over 4
                    230: 
                    231: =item * B<title>: The B<name> attribute of this tag is used as the
                    232:    title of the document in texxml2index.pl; it is ignored in 
                    233:    texxml2latex.pl. If you don't intend to offer online-access
                    234:    to the rendered documents this may be skipped.
                    235: 
                    236: =item * B<section>, B<subsection>, and B<subsubsection>: These create
                    237:    the corresponding environments in the output file. The B<name>
                    238:    attribute is used to determine the name of the section.
                    239: 
                    240: =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
                    241:    filename. The file is assumed to be located in the
                    242:    C<loncom/html/adm/help/tex/> directory in the CVS repository. The
                    243:    C<.tex> is required.
                    244: 
                    245: =item * B<tex>: The contents of the B<content> attribute are directly
                    246:    inserted into the rendered LaTeX file, followed by a paragraph
                    247:    break. This is generally used for little connective paragraphs in
                    248:    the documentation that don't make sense in the online help. See
                    249:    C<author.manual.texxml> for several example usages.
                    250: 
                    251: =item * B<pod>: The B<file> attribute specified a file to draw the POD
                    252:    documentation out of. The B<section> attribute is a section
                    253:    specification matching the format specified in the man page of
                    254:    podselect. By default, all POD will be included. The file is
                    255:    assumed to be relative to the C<loncom> directory in the CVS
                    256:    repository; you are allowed to escape from that with .. if
                    257:    necessary. The B<h1level> attribute can be used to change 
                    258:    the default depth of the headings; by default, this is set to 2,
                    259:    which makes =head1 a "subsection". Setting this higher can allow
                    260:    you to bundle several related pod files together; see 
                    261:    developer.manual.texxml for examples.
                    262: 
                    263: =back
                    264: 
                    265: texxml2latex.pl will automatically include C<Latex_Header.tex> at the
                    266: beginning and C<Latex_Footer.tex> at the end, to make a complete
                    267: document LaTeX document.
                    268: 
1.9       bowersj2  269: =head2 Rendering texxml 
1.8       bowersj2  270: 
1.9       bowersj2  271: =head3 render.texxml.pl 
1.8       bowersj2  272: 
1.9       bowersj2  273: X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script
                    274: takes a .texxml file, and produces PostScript and PDF files. The LaTeX
                    275: files will be given access to .eps files in the
                    276: C</loncom/html/adm/help/eps/> directory while rendering. Call it as
                    277: follows, from the C<doc/help> directory:
1.8       bowersj2  278: 
                    279:  perl render.texxml.pl -- author.manual.texxml
                    280: 
                    281: substituting the appropriate texxml file.
                    282: 
1.9       bowersj2  283: =head3 texxml2latex.pl 
1.8       bowersj2  284: 
1.9       bowersj2  285: X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles
1.8       bowersj2  286: the final LaTeX file, outputting it on stout. Invoke it as follows:
                    287: 
                    288:  perl texxml2latex.pl author.manual.texx
                    289: 
                    290: Note that there is no error handling; if the script can not find a
                    291: .tex file, it is simply ignored. Generally, if a file is not in the
                    292: final render, it either could not be found, or you do not have
                    293: sufficient permissions with the current user to read it.
                    294: 
1.9       bowersj2  295: =head3 texxml2index.pl 
1.8       bowersj2  296: 
1.9       bowersj2  297: X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a
1.8       bowersj2  298: file that can be used online to access all the .tex files that are
                    299: specified in the .texxml file. For an example of how this looks
                    300: online, see
                    301: C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>.
                    302: 
                    303: =head2 texxml support
                    304: 
                    305: There are a couple of scripts that you may find useful for creating
                    306: texxml-based help:
                    307: 
1.9       bowersj2  308: =head3 latexSplitter.py 
1.8       bowersj2  309: 
1.9       bowersj2  310: X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a
1.8       bowersj2  311: monolithic .tex file into the small pieces LON-CAPA's help system
                    312: expects. Invoke it like this:
                    313: 
                    314:  python latexSplitter.py monolithic.tex
                    315: 
                    316: where C<monolithic.tex> is the .tex file you want to split into
                    317: pieces. This requires Python 2.1 or greater (2.0 may work); on many
                    318: modern RedHat installs this is installed by default under the
                    319: executable name C<python2>.
                    320: 
                    321: Use the program by highlighting the desired section, give it a file
                    322: name in the textbox near the bottom, and hit the bottom button. The
                    323: program will remove that text from the textbox, and create a file in
                    324: the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
                    325: consistency, you should use underscores rather then spaces in the
                    326: filename, and note there are a few naming conventions for the .tex
                    327: files, which you can see just by listing the
                    328: C<loncom/html/adm/help/tex/> directory.
                    329: 
                    330: The idea behind this program is that if you are writing a big document
                    331: from scratch, you can use a "real" program like LyX to create the .tex
                    332: file, then easily split it with this program.
                    333: 
1.9       bowersj2  334: =head3 simpleEdit.py 
1.8       bowersj2  335: 
1.9       bowersj2  336: X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows
1.8       bowersj2  337: all the tex files that went into in sequence, allowing you to "edit"
                    338: the entire document as one entity. Note this is intended for simple
                    339: typo corrections and such in context, not major modification of the
                    340: document. Invoke it with 
                    341: 
                    342:  python simpleEdit.py author.manual.texxml
                    343: 
                    344: Make your changes, and hit the "Save" button to save them.
                    345: 
                    346: =head2 texxml LaTeX Feature Support
                    347: 
                    348: =head3 Cross-referencing
                    349: 
                    350: LaTeX has a cross-referencing system build around labeling points in
                    351: the document with \label, and referencing those labels with \ref. In a
                    352: complete LaTeX document, there's no problem because all \refs and
                    353: \labels are present. However, for the online help, \ref'ing something
                    354: that is not in the current LaTeX fragment causes a TTH error when it
                    355: can't find the crossreference.
                    356: 
                    357: The solution is to do the cross-references for TTH. When LON-CAPA is
                    358: installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
                    359: is executed, which extracts all the labels from the LaTeX fragments
                    360: and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash. 
                    361: The C<lonhelp.pm> handler then replaces \refs with appropriate
                    362: HTML to provide a link to the referenced help file while online. Thus,
                    363: you can freely use references, even in online help.
                    364: 
                    365: =head3 Indexing
                    366: 
                    367: LaTeX has a popular index making package called MakeIndex. LON-CAPA's
                    368: help system supports this, so you can create indices using the \index
                    369: LaTeX command. In perl POD files, use the X command. Note that in both
1.9       bowersj2  370: cases the index text is not included in the render, so the index must 
                    371: be included in addition to the indexed text, and need not match the 
                    372: indexed text precisely.
1.8       bowersj2  373: 
                    374: =head1 Writing POD: Style
                    375: 
                    376: Adopting a little bit from everybody who has included POD in their
                    377: documents to date, the help system is going to expect the following
                    378: format for POD documentation.
                    379: 
                    380: The POD should start with a C<=head1> with the title C<NAME> (in caps
                    381: as shown). The following paragraph should extremely briefly describe
                    382: what the module does and contains. Example:
                    383: 
                    384:  =head1 NAME
                    385: 
                    386:  Apache::lonflunkstudent - provides interface to set all
                    387:    student assessments point score to 0
                    388: 
                    389: Next should be a C<head1> titled C<SYNOPSIS> which contains a
                    390: paragraph or two description of the module.
                    391: 
                    392:  =head1 SYNOPSIS
                    393: 
                    394:  lonflunkstudent provides a handler to select a student and set all
                    395:  assignment values to zero, thereby flunking the student.
                    396: 
                    397:  Routines for setting all assessments to some value are provided by
                    398:  this module, as well as some useful student taunting routines.
                    399: 
                    400: Optionally, an C<OVERVIEW> section can be included. This can then be
                    401: extracted by the help system for the LON-CAPA subsystems overview
                    402: chapter. The overview should be a relatively high-level, but still
                    403: technical, overview of the module, sufficient to give the reader
                    404: enough context to understand what the module does, what it might be
                    405: useful for in other contexts, and what is going on in the code when it
                    406: is read.
                    407: 
                    408: The remainder should be formatted as appropriate for the file, such
                    409: that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
1.9       bowersj2  410: useful API overview of the module. This may be anything from an 
                    411: elaborate discussion of the data structures, algorithms, and design 
                    412: principles that went into the module, or a simple listing of 
                    413: what functions exist, how to call them, and what they return, as
                    414: appropriate.
1.8       bowersj2  415: 
                    416: Routines that are private to the module should B<not> be documented;
                    417: document them in perl comments, or, as is the style of the time, not
                    418: at all, as is appropriate.
                    419: 
                    420: Method and function names should be bolded when being
1.9       bowersj2  421: documented. 
                    422: 
                    423: Literal string such as filename should be enclosed in
1.8       bowersj2  424: the C command, like this: C</home/httpd/lonTabs/>. 
1.9       bowersj2  425: 
                    426: Indexing can be done with the X command in perldoc, and should be used 
                    427: as appropriate. Do not include X commands in the headings, the output 
                    428: from pod2latex screws up some regexes in texxml2latex.pl.
1.8       bowersj2  429: 
                    430: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>