doc/help/texxml2latex.pl - view

File: [LON-CAPA] / doc / help / texxml2latex.pl
Revision 1.12: download - view: text, annotated - select for diffs
Wed Mar 16 21:31:27 2005 UTC (20 years, 4 months ago) by www
Branches: MAIN
CVS tags: version_2_9_X, version_2_9_99_0, version_2_9_1, version_2_9_0, version_2_8_X, version_2_8_99_1, version_2_8_99_0, version_2_8_2, version_2_8_1, version_2_8_0, version_2_7_X, version_2_7_99_1, version_2_7_99_0, version_2_7_1, version_2_7_0, version_2_6_X, version_2_6_99_1, version_2_6_99_0, version_2_6_3, version_2_6_2, version_2_6_1, version_2_6_0, version_2_5_X, version_2_5_99_1, version_2_5_99_0, version_2_5_2, version_2_5_1, version_2_5_0, version_2_4_X, version_2_4_99_0, version_2_4_2, version_2_4_1, version_2_4_0, version_2_3_X, version_2_3_99_0, version_2_3_2, version_2_3_1, version_2_3_0, version_2_2_X, version_2_2_99_1, version_2_2_99_0, version_2_2_2, version_2_2_1, version_2_2_0, version_2_1_X, version_2_1_99_3, version_2_1_99_2, version_2_1_99_1, version_2_1_99_0, version_2_1_3, version_2_1_2, version_2_1_1, version_2_1_0, version_2_12_X, version_2_11_X, version_2_11_6_msu, version_2_11_6, version_2_11_5_msu, version_2_11_5, version_2_11_4_uiuc, version_2_11_4_msu, version_2_11_4, version_2_11_3_uiuc, version_2_11_3_msu, version_2_11_3, version_2_11_2_uiuc, version_2_11_2_msu, version_2_11_2_educog, version_2_11_2, version_2_11_1, version_2_11_0_RC3, version_2_11_0_RC2, version_2_11_0_RC1, version_2_11_0, version_2_10_X, version_2_10_1, version_2_10_0_RC2, version_2_10_0_RC1, version_2_10_0, version_2_0_X, version_2_0_99_1, version_2_0_2, version_2_0_1, version_2_0_0, version_1_99_3, version_1_99_2, version_1_99_1_tmcc, version_1_99_1, version_1_99_0_tmcc, version_1_99_0, version_1_3_3, loncapaMITrelate_1, language_hyphenation_merge, language_hyphenation, bz6209-base, bz6209, HEAD, GCI_3, GCI_2, GCI_1, BZ4492-merge, BZ4492-feature_horizontal_radioresponse, BZ4492-feature_Support_horizontal_radioresponse, BZ4492-Support_horizontal_radioresponse

Bug #4015: Inserted garbage characters into TeX file for every new subsection.

#!/usr/bin/perl # The LearningOnline Network with CAPA # Converts a texxml file into a single tex file # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # 7-16-2002 Jeremy Bowers use strict; use HTML::TokeParser; use GDBM_File; use File::Temp; # accept texxml document on standard in my $p = HTML::TokeParser->new( $ARGV[0] ); my $dirprefix = "../../loncom/html/adm/help/tex/"; my $include_filenames = ($ARGV[1] eq '--with-filenames'); # Make myself a temp dir for processing POD my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1); # Print the header open (LATEX_FILE, $dirprefix . "Latex_Header.tex"); print <LATEX_FILE>; sub escape_latex { my ($string)=@_; $string=~s/\\/\\ensuremath{\\backslash}/g; $string=~s/([^\\]|^)\%/$1\\\%/g; $string=~s/([^\\]|^)\$/$1\\\$/g; $string=~s/([^\\])\_/$1\\_/g; $string=~s/\$\$/\$\\\$/g; $string=~s/\_\_/\_\\\_/g; $string=~s/\#\#/\#\\\#/g; $string=~s/([^\\]|^)(\~|\^)/$1\\$2\\strut /g; $string=~s/(>|<)/\\ensuremath\{$1\}/g; #more or less # $string=&Apache::lonprintout::character_chart($string); # any & or # leftover should be safe to just escape $string=~s/([^\\]|^)\&/$1\\\&/g; $string=~s/([^\\]|^)\#/$1\\\#/g; $string=~s/\|/\$\\mid\$/g; return $string; } while (my $token = $p->get_token()) { my $type = $token->[0]; if ($type eq 'S') { my $tag = $token->[1]; my $attr = $token->[2]; if ($tag eq 'section') { my $title = $attr->{'name'}; print "\\section{".&escape_latex($title)."}\n\n"; } if ($tag eq 'subsection') { my $title = $attr->{'name'}; print "\\subsection{".&escape_latex($title)."}\n\n"; } if ($tag eq 'subsubsection') { my $title = $attr->{'name'}; print "\\subsubsection{".&escape_latex($title)."}\n\n"; } if ($tag eq 'file') { my $file = $attr->{'name'}; open (LATEX_FILE, $dirprefix . $file) or ($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build."); if ($include_filenames) { print "\\textrm{File: \\bf ".&escape_latex($file)."}\\\\\n"; } print <LATEX_FILE>; print "\n\n"; } if ($tag eq 'tex') { print "\n\n"; print $attr->{'content'}; print "\n\n"; } if ($tag eq 'pod') { my $file = $attr->{'file'}; my $section = $attr->{'section'}; if (!defined($section)) { $section = ''; } else { $section = "-section '$section'"; } my $h1level = $attr->{'h1level'}; if (!defined($h1level)) { $h1level = '2'; } $file = '../../loncom/' . $file; my $filename = substr($file, rindex($file, '/') + 1); system ("cp $file $tmpdir\n"); my $latexFile; if (index($filename, '.') == -1) { # pod2latex *insists* that either the extension of the # file be .pl|.pm|.pod or that it be executable. Some # extension-less files like "lonsql' are none-of-the-above. system ("cd $tmpdir; mv $filename $filename.pm"); $filename .= ".pm"; print STDERR $filename . "\n"; } system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n"); $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex'; open LATEX_FILE, $tmpdir . '/' . $latexFile or ($! = 1, die "Latex file $latexFile not found while trying to use pod2latex, ". "terminating build"); # pod2latex inserts \labels and \indexs for every section, # which is horrible because the section names tend to get # reused a lot. This filters those out, so we need to do # create our own indexes. for (<LATEX_FILE>) { $_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g; print $_; } print "\n\n"; } } } # Print out the footer. open (LATEX_FILE, $dirprefix . "Latex_Footer.tex"); print <LATEX_FILE>; # Remove the temp directory system ("rm -rf $tmpdir"); __END__ =pod =head1 NAME texxml2latex.pl - core script that drives the help file assembly applications =head1 SYNOPSIS LON-CAPA's help system is based on assembling various pieces into LaTeX files for conversion into printed documents. The various pieces can also be used as online help. =head1 OVERVIEW X<help system, overview>LON-CAPA's help system is based on the idea of assembling various pieces as needed to create documents for printing, and using these various pieces for online help. LaTeX is the primary language of the help system, because we can easily convert it to HTML, and it makes the nicest printed documents. The scripts for the help system are stored in /docs/help in the CVS repository. =head2 Data Sources The help system can draw from the following sources to create help documents: =over 4 =item * B<LaTeX fragments>: LaTeX fragments stored in C</loncom/html/adm/help/tex> in the CVS repository (which end up in C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that contains LaTeX-style markup, but is not a complete LaTeX file with header and footer. =item * B<perl POD documentation>: POD documentation may be extracted from perl modules used in LON-CAPA, using the syntax described in podselect's man page. =back =head2 Online Help The online aspect of the help system is covered in the documentation for loncommon.pm; see L<Apache::loncommon>, look for C<help_open_topic>. Online help can only come from LaTeX fragments. Access to the printed documents is partially provided online by rendering the help files structure in a way that allows the user to click through to the underlying help files; see L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an example. It's not very good, but it's marginally better then nothing. =head2 Offline Documents Offline documents are generated from XML documents which tell a rendering script how to assemble the various LaTeX fragments into a single LaTeX file, which is then rendered into PostScript and PDF files, suitable for download and printing. =head1 texxml And Rendering texxml =head2 texxml X<texxml> texxml is a little XML file format used to specify to the texxml2*.pl scripts how to assemble the input sources into LaTeX documents. texxml files end in the .texxml extension, and there is one texxml file per final rendered document. The texxml format is as follows: There is a root <texxml> element, with no attributes and the following children: =over 4 =item * B<title>: The B<name> attribute of this tag is used as the title of the document in texxml2index.pl; it is ignored in texxml2latex.pl. If you don't intend to offer online-access to the rendered documents this may be skipped. =item * B<section>, B<subsection>, and B<subsubsection>: These create the corresponding environments in the output file. The B<name> attribute is used to determine the name of the section. =item * B<file>: The C<name> attribute specifies a LaTeX fragment by filename. The file is assumed to be located in the C<loncom/html/adm/help/tex/> directory in the CVS repository. The C<.tex> is required. =item * B<tex>: The contents of the B<content> attribute are directly inserted into the rendered LaTeX file, followed by a paragraph break. This is generally used for little connective paragraphs in the documentation that don't make sense in the online help. See C<author.manual.texxml> for several example usages. =item * B<pod>: The B<file> attribute specified a file to draw the POD documentation out of. The B<section> attribute is a section specification matching the format specified in the man page of podselect. By default, all POD will be included. The file is assumed to be relative to the C<loncom> directory in the CVS repository; you are allowed to escape from that with .. if necessary. The B<h1level> attribute can be used to change the default depth of the headings; by default, this is set to 2, which makes =head1 a "subsection". Setting this higher can allow you to bundle several related pod files together; see developer.manual.texxml for examples. =back texxml2latex.pl will automatically include C<Latex_Header.tex> at the beginning and C<Latex_Footer.tex> at the end, to make a complete document LaTeX document. =head2 Rendering texxml =head3 render.texxml.pl X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script takes a .texxml file, and produces PostScript and PDF files. The LaTeX files will be given access to .eps files in the C</loncom/html/adm/help/eps/> directory while rendering. Call it as follows, from the C<doc/help> directory: perl render.texxml.pl -- author.manual.texxml substituting the appropriate texxml file. =head3 texxml2latex.pl X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles the final LaTeX file, outputting it on stout. Invoke it as follows: perl texxml2latex.pl author.manual.texx Note that there is no error handling; if the script can not find a .tex file, it is simply ignored. Generally, if a file is not in the final render, it either could not be found, or you do not have sufficient permissions with the current user to read it. =head3 texxml2index.pl X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a file that can be used online to access all the .tex files that are specified in the .texxml file. For an example of how this looks online, see C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>. =head2 texxml support There are a couple of scripts that you may find useful for creating texxml-based help: =head3 latexSplitter.py X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a monolithic .tex file into the small pieces LON-CAPA's help system expects. Invoke it like this: python latexSplitter.py monolithic.tex where C<monolithic.tex> is the .tex file you want to split into pieces. This requires Python 2.1 or greater (2.0 may work); on many modern RedHat installs this is installed by default under the executable name C<python2>. Use the program by highlighting the desired section, give it a file name in the textbox near the bottom, and hit the bottom button. The program will remove that text from the textbox, and create a file in the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For consistency, you should use underscores rather then spaces in the filename, and note there are a few naming conventions for the .tex files, which you can see just by listing the C<loncom/html/adm/help/tex/> directory. The idea behind this program is that if you are writing a big document from scratch, you can use a "real" program like LyX to create the .tex file, then easily split it with this program. =head3 simpleEdit.py X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows all the tex files that went into in sequence, allowing you to "edit" the entire document as one entity. Note this is intended for simple typo corrections and such in context, not major modification of the document. Invoke it with python simpleEdit.py author.manual.texxml Make your changes, and hit the "Save" button to save them. =head2 texxml LaTeX Feature Support =head3 Cross-referencing LaTeX has a cross-referencing system build around labeling points in the document with \label, and referencing those labels with \ref. In a complete LaTeX document, there's no problem because all \refs and \labels are present. However, for the online help, \ref'ing something that is not in the current LaTeX fragment causes a TTH error when it can't find the crossreference. The solution is to do the cross-references for TTH. When LON-CAPA is installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script is executed, which extracts all the labels from the LaTeX fragments and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash. The C<lonhelp.pm> handler then replaces \refs with appropriate HTML to provide a link to the referenced help file while online. Thus, you can freely use references, even in online help. =head3 Indexing LaTeX has a popular index making package called MakeIndex. LON-CAPA's help system supports this, so you can create indices using the \index LaTeX command. In perl POD files, use the X command. Note that in both cases the index text is not included in the render, so the index must be included in addition to the indexed text, and need not match the indexed text precisely. =head1 Writing POD: Style Adopting a little bit from everybody who has included POD in their documents to date, the help system is going to expect the following format for POD documentation. The POD should start with a C<=head1> with the title C<NAME> (in caps as shown). The following paragraph should extremely briefly describe what the module does and contains. Example: =head1 NAME Apache::lonflunkstudent - provides interface to set all student assessments point score to 0 Next should be a C<head1> titled C<SYNOPSIS> which contains a paragraph or two description of the module. =head1 SYNOPSIS lonflunkstudent provides a handler to select a student and set all assignment values to zero, thereby flunking the student. Routines for setting all assessments to some value are provided by this module, as well as some useful student taunting routines. Optionally, an C<OVERVIEW> section can be included. This can then be extracted by the help system for the LON-CAPA subsystems overview chapter. The overview should be a relatively high-level, but still technical, overview of the module, sufficient to give the reader enough context to understand what the module does, what it might be useful for in other contexts, and what is going on in the code when it is read. The remainder should be formatted as appropriate for the file, such that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a useful API overview of the module. This may be anything from an elaborate discussion of the data structures, algorithms, and design principles that went into the module, or a simple listing of what functions exist, how to call them, and what they return, as appropriate. Routines that are private to the module should B<not> be documented; document them in perl comments, or, as is the style of the time, not at all, as is appropriate. Method and function names should be bolded when being documented. Literal string such as filename should be enclosed in the C command, like this: C</home/httpd/lonTabs/>. Indexing can be done with the X command in perldoc, and should be used as appropriate. Do not include X commands in the headings, the output from pod2latex screws up some regexes in texxml2latex.pl. =cut