File:  [LON-CAPA] / doc / help / texxml2latex.pl
Revision 1.8: download - view: text, annotated - select for diffs
Mon Jul 21 20:32:08 2003 UTC (20 years, 9 months ago) by bowersj2
Branches: MAIN
CVS tags: version_1_0_0, version_0_99_5, version_0_99_4, HEAD
Improvements to pod support, documentation added in texxml2latex.pl
instead of seperate README file.

    1: #!/usr/bin/perl
    2: 
    3: # The LearningOnline Network with CAPA
    4: # Converts a texxml file into a single tex file
    5: #
    6: # Copyright Michigan State University Board of Trustees
    7: #
    8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    9: #
   10: # LON-CAPA is free software; you can redistribute it and/or modify
   11: # it under the terms of the GNU General Public License as published by
   12: # the Free Software Foundation; either version 2 of the License, or
   13: # (at your option) any later version.
   14: #
   15: # LON-CAPA is distributed in the hope that it will be useful,
   16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: # GNU General Public License for more details.
   19: #
   20: # You should have received a copy of the GNU General Public License
   21: # along with LON-CAPA; if not, write to the Free Software
   22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23: #
   24: # /home/httpd/html/adm/gpl.txt
   25: #
   26: # http://www.lon-capa.org/
   27: #
   28: # 7-16-2002 Jeremy Bowers
   29: 
   30: use strict;
   31: use HTML::TokeParser;
   32: use GDBM_File;
   33: use File::Temp;
   34: 
   35: # accept texxml document on standard in
   36: my $p = HTML::TokeParser->new( $ARGV[0] );
   37: my $dirprefix = "../../loncom/html/adm/help/tex/";
   38: 
   39: # Make myself a temp dir for processing POD
   40: my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
   41: 
   42: # Print the header
   43: open (LATEX_FILE, $dirprefix . "Latex_Header.tex");
   44: print <LATEX_FILE>;
   45: 
   46: while (my $token = $p->get_token())
   47: {
   48:     my $type = $token->[0];
   49:     if ($type eq 'S') {
   50: 	my $tag = $token->[1];
   51: 	my $attr = $token->[2];
   52: 	if ($tag eq 'section') {
   53: 	    my $title = $attr->{'name'};
   54: 	    print "\\section{$title}\n\n";
   55: 	}
   56: 
   57: 	if ($tag eq 'subsection') {
   58: 	    my $title = $attr->{'name'};
   59: 	    print "\\subsection{$title}\n\n";
   60: 	}
   61: 
   62: 	if ($tag eq 'subsubsection') {
   63: 	    my $title = $attr->{'name'};
   64: 	    print "\\subsubsection{$title}\n\n";
   65: 	}
   66: 
   67: 	if ($tag eq 'file') {
   68: 	    my $file = $attr->{'name'};
   69: 	    open (LATEX_FILE, $dirprefix . $file);
   70: 	    print <LATEX_FILE>;
   71: 	    print "\n\n";
   72: 	}
   73: 
   74: 	if ($tag eq 'tex') {
   75: 	    print "\n\n";
   76: 	    print $attr->{'content'};
   77: 	    print "\n\n";
   78: 	}
   79: 
   80: 	if ($tag eq 'pod') {
   81: 	    my $file = $attr->{'file'};
   82: 	    my $section = $attr->{'section'};	    
   83: 	    if (!defined($section)) { $section = ''; }
   84: 	    else { 
   85: 		$section = "-section '$section'";
   86: 	    }
   87: 	    my $h1level = $attr->{'h1level'};
   88: 	    if (!defined($h1level)) { $h1level = '2'; }
   89: 	    $file = '../../loncom/' . $file;
   90: 	    my $filename = substr($file, rindex($file, '/') + 1);
   91: 	    system ("cp $file $tmpdir\n");
   92: 	    system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n");
   93: 	    my $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
   94: 	    open LATEX_FILE, $tmpdir . '/' . $latexFile;
   95: 	    # pod2latex inserts \labels and \indexs for every section,
   96: 	    # which is horrible because the section names tend to get
   97: 	    # reused a lot. This filters those out, so we need to do
   98: 	    # create our own indexes.
   99: 	    for (<LATEX_FILE>) {
  100: 		$_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g;
  101: 		print $_;
  102: 	    }
  103: 	    print "\n\n";
  104: 	}
  105:     }
  106: }
  107: 
  108: # Print out the footer.
  109: open (LATEX_FILE, $dirprefix . "Latex_Footer.tex");
  110: print <LATEX_FILE>;
  111: 
  112: # Remove the temp directory
  113: system ("rm -rf $tmpdir");
  114: 
  115: __END__
  116: 
  117: =pod
  118: 
  119: =head1 NAME
  120: 
  121: texxml2latex.pl - core script that drives the help file assembly
  122:   applications
  123: 
  124: =head1 SYNOPSIS
  125: 
  126: LON-CAPA's help system is based on assembling various pieces into
  127: LaTeX files for conversion into printed documents. The various pieces
  128: can also be used as online help.
  129: 
  130: =head1 OVERVIEW
  131: 
  132: X<help system, overview>LON-CAPA's help system is based on the idea of
  133: assembling various pieces as needed to create documents for printing,
  134: and using these various pieces for online help. LaTeX is the primary
  135: language of the help system, because we can easily convert it to HTML,
  136: and it makes the nicest printed documents.
  137: 
  138: The scripts for the help system are stored in /docs/help in the CVS
  139: repository.
  140: 
  141: =head2 Data Sources
  142: 
  143: The help system can draw from the following sources to create help
  144: documents:
  145: 
  146: =over 4
  147: 
  148: =item * B<LaTeX fragments>: LaTeX fragments stored in
  149: C</loncom/html/adm/help/tex> in the CVS repository (which end up in
  150: C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
  151: contains LaTeX-style markup, but is not a complete LaTeX file with
  152: header and footer.
  153: 
  154: =item * B<perl POD documentation>: POD documentation may be extracted
  155: from perl modules used in LON-CAPA, using the syntax described in
  156: podselect's man page.
  157: 
  158: =back
  159: 
  160: =head2 Online Help
  161: 
  162: The online aspect of the help system is covered in the documentation
  163: for loncommon.pm; see L<Apache::loncommon>, look for
  164: C<help_open_topic>.
  165: 
  166: Online help can only come from LaTeX fragments.
  167: 
  168: Access to the printed documents is partially provided online by
  169: rendering the help files structure in a way that allows the user to
  170: click through to the underlying help files; see 
  171: L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an
  172: example. It's not very good, but it's marginally better then nothing.
  173: 
  174: =head2 Offline Documents 
  175: 
  176: Offline documents are generated from XML documents which tell a
  177: rendering script how to assemble the various LaTeX fragments into a
  178: single LaTeX file, which is then rendered into PostScript and PDF
  179: files, suitable for download and printing. 
  180: 
  181: =head1 texxml And Rendering texxml
  182: 
  183: =head2 texxml 
  184: 
  185: X<texxml>
  186: texxml is a little XML file format used to specify to the texxml2*.pl
  187: scripts how to assemble the input sources into LaTeX documents. texxml
  188: files end in the .texxml extension, and there is one texxml file per
  189: final rendered document.
  190: 
  191: The texxml format is as follows: There is a root <texxml> element,
  192: with no attributes and the following children:
  193: 
  194: =over 4
  195: 
  196: =item * B<title>: The B<name> attribute of this tag is used as the
  197:    title of the document in texxml2index.pl; it is ignored in 
  198:    texxml2latex.pl. If you don't intend to offer online-access
  199:    to the rendered documents this may be skipped.
  200: 
  201: =item * B<section>, B<subsection>, and B<subsubsection>: These create
  202:    the corresponding environments in the output file. The B<name>
  203:    attribute is used to determine the name of the section.
  204: 
  205: =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
  206:    filename. The file is assumed to be located in the
  207:    C<loncom/html/adm/help/tex/> directory in the CVS repository. The
  208:    C<.tex> is required.
  209: 
  210: =item * B<tex>: The contents of the B<content> attribute are directly
  211:    inserted into the rendered LaTeX file, followed by a paragraph
  212:    break. This is generally used for little connective paragraphs in
  213:    the documentation that don't make sense in the online help. See
  214:    C<author.manual.texxml> for several example usages.
  215: 
  216: =item * B<pod>: The B<file> attribute specified a file to draw the POD
  217:    documentation out of. The B<section> attribute is a section
  218:    specification matching the format specified in the man page of
  219:    podselect. By default, all POD will be included. The file is
  220:    assumed to be relative to the C<loncom> directory in the CVS
  221:    repository; you are allowed to escape from that with .. if
  222:    necessary. The B<h1level> attribute can be used to change 
  223:    the default depth of the headings; by default, this is set to 2,
  224:    which makes =head1 a "subsection". Setting this higher can allow
  225:    you to bundle several related pod files together; see 
  226:    developer.manual.texxml for examples.
  227: 
  228: =back
  229: 
  230: texxml2latex.pl will automatically include C<Latex_Header.tex> at the
  231: beginning and C<Latex_Footer.tex> at the end, to make a complete
  232: document LaTeX document.
  233: 
  234: =head2 Rendering texxml X<texxml, rendering>
  235: 
  236: =head3 render.texxml.pl X<render.texxml.pl>
  237: 
  238: The C<render.texxml.pl> script takes a .texxml file, and produces
  239: PostScript and PDF files. The LaTeX files will be given access to .eps
  240: files in the C</loncom/html/adm/help/eps/> directory while
  241: rendering. Call it as follows, from the C<doc/help> directory:
  242: 
  243:  perl render.texxml.pl -- author.manual.texxml
  244: 
  245: substituting the appropriate texxml file.
  246: 
  247: =head3 texxml2latex.pl X<texxml2latex.pl>
  248: 
  249: texxml2latex.pl is a perl script that takes texxml in and assembles
  250: the final LaTeX file, outputting it on stout. Invoke it as follows:
  251: 
  252:  perl texxml2latex.pl author.manual.texx
  253: 
  254: Note that there is no error handling; if the script can not find a
  255: .tex file, it is simply ignored. Generally, if a file is not in the
  256: final render, it either could not be found, or you do not have
  257: sufficient permissions with the current user to read it.
  258: 
  259: =head3 texxml2index.pl X<texxml2index.pl>
  260: 
  261: texxml2index.pl is a perl script that takes texxml in and assembles a
  262: file that can be used online to access all the .tex files that are
  263: specified in the .texxml file. For an example of how this looks
  264: online, see
  265: C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>.
  266: 
  267: =head2 texxml support
  268: 
  269: There are a couple of scripts that you may find useful for creating
  270: texxml-based help:
  271: 
  272: =head3 latexSplitter.py X<latexSplitter.py>
  273: 
  274: latexSplitter.py is a Python script that helps you seperate a
  275: monolithic .tex file into the small pieces LON-CAPA's help system
  276: expects. Invoke it like this:
  277: 
  278:  python latexSplitter.py monolithic.tex
  279: 
  280: where C<monolithic.tex> is the .tex file you want to split into
  281: pieces. This requires Python 2.1 or greater (2.0 may work); on many
  282: modern RedHat installs this is installed by default under the
  283: executable name C<python2>.
  284: 
  285: Use the program by highlighting the desired section, give it a file
  286: name in the textbox near the bottom, and hit the bottom button. The
  287: program will remove that text from the textbox, and create a file in
  288: the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
  289: consistency, you should use underscores rather then spaces in the
  290: filename, and note there are a few naming conventions for the .tex
  291: files, which you can see just by listing the
  292: C<loncom/html/adm/help/tex/> directory.
  293: 
  294: The idea behind this program is that if you are writing a big document
  295: from scratch, you can use a "real" program like LyX to create the .tex
  296: file, then easily split it with this program.
  297: 
  298: =head3 simpleEdit.py X<simpleEdit.py>
  299: 
  300: simpleEdit.py is a python script that takes a .texxml file and shows
  301: all the tex files that went into in sequence, allowing you to "edit"
  302: the entire document as one entity. Note this is intended for simple
  303: typo corrections and such in context, not major modification of the
  304: document. Invoke it with 
  305: 
  306:  python simpleEdit.py author.manual.texxml
  307: 
  308: Make your changes, and hit the "Save" button to save them.
  309: 
  310: =head2 texxml LaTeX Feature Support
  311: 
  312: =head3 Cross-referencing
  313: 
  314: LaTeX has a cross-referencing system build around labeling points in
  315: the document with \label, and referencing those labels with \ref. In a
  316: complete LaTeX document, there's no problem because all \refs and
  317: \labels are present. However, for the online help, \ref'ing something
  318: that is not in the current LaTeX fragment causes a TTH error when it
  319: can't find the crossreference.
  320: 
  321: The solution is to do the cross-references for TTH. When LON-CAPA is
  322: installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
  323: is executed, which extracts all the labels from the LaTeX fragments
  324: and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash. 
  325: The C<lonhelp.pm> handler then replaces \refs with appropriate
  326: HTML to provide a link to the referenced help file while online. Thus,
  327: you can freely use references, even in online help.
  328: 
  329: =head3 Indexing
  330: 
  331: LaTeX has a popular index making package called MakeIndex. LON-CAPA's
  332: help system supports this, so you can create indices using the \index
  333: LaTeX command. In perl POD files, use the X command. Note that in both
  334: cases the index text is not included in the render, so you need to
  335: specify the exact index.
  336: 
  337: =head1 Writing POD: Style
  338: 
  339: Adopting a little bit from everybody who has included POD in their
  340: documents to date, the help system is going to expect the following
  341: format for POD documentation.
  342: 
  343: The POD should start with a C<=head1> with the title C<NAME> (in caps
  344: as shown). The following paragraph should extremely briefly describe
  345: what the module does and contains. Example:
  346: 
  347:  =head1 NAME
  348: 
  349:  Apache::lonflunkstudent - provides interface to set all
  350:    student assessments point score to 0
  351: 
  352: Next should be a C<head1> titled C<SYNOPSIS> which contains a
  353: paragraph or two description of the module.
  354: 
  355:  =head1 SYNOPSIS
  356: 
  357:  lonflunkstudent provides a handler to select a student and set all
  358:  assignment values to zero, thereby flunking the student.
  359: 
  360:  Routines for setting all assessments to some value are provided by
  361:  this module, as well as some useful student taunting routines.
  362: 
  363: Optionally, an C<OVERVIEW> section can be included. This can then be
  364: extracted by the help system for the LON-CAPA subsystems overview
  365: chapter. The overview should be a relatively high-level, but still
  366: technical, overview of the module, sufficient to give the reader
  367: enough context to understand what the module does, what it might be
  368: useful for in other contexts, and what is going on in the code when it
  369: is read.
  370: 
  371: The remainder should be formatted as appropriate for the file, such
  372: that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
  373: useful API overview of the module.
  374: 
  375: Routines that are private to the module should B<not> be documented;
  376: document them in perl comments, or, as is the style of the time, not
  377: at all, as is appropriate.
  378: 
  379: Method and function names should be bolded when being
  380: documented. Indexing should be done as appropriate, using the X
  381: perldoc command. Literal string such as filename should be enclosed in
  382: the C command, like this: C</home/httpd/lonTabs/>. 
  383: 
  384: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>