doc/help/texxml2latex.pl - diff

Return to texxml2latex.pl CVS log

Up to [LON-CAPA] / doc / help

Diff for /doc/help/texxml2latex.pl between versions 1.6 and 1.12

-version 1.6, 2003/07/18 20:58:33
+version 1.12, 2005/03/16 21:31:27
  Line 36  use File::Temp;
  my $p = HTML::TokeParser->new( $ARGV[0] );
  my $dirprefix = "../../loncom/html/adm/help/tex/";
+ my $include_filenames = ($ARGV[1] eq '--with-filenames');
  # Make myself a temp dir for processing POD
  my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
- Line 43  my $tmpdir = File::Temp::tempdir('loncap
+ Line 44  my $tmpdir = File::Temp::tempdir('loncap
  open (LATEX_FILE, $dirprefix . "Latex_Header.tex");
  print <LATEX_FILE>;
+ sub escape_latex {
+     my ($string)=@_;
+     $string=~s/\\/\\ensuremath{\\backslash}/g;
+     $string=~s/([^\\]|^)\%/$1\\\%/g;
+     $string=~s/([^\\]|^)\$/$1\\\$/g;
+     $string=~s/([^\\])\_/$1\\_/g;
+     $string=~s/\$\$/\$\\\$/g;
+     $string=~s/\_\_/\_\\\_/g;
+     $string=~s/\#\#/\#\\\#/g;
+     $string=~s/([^\\]|^)(\~|\^)/$1\\$2\\strut /g;
+     $string=~s/(>|<)/\\ensuremath\{$1\}/g; #more or less
+ #    $string=&Apache::lonprintout::character_chart($string);
+     # any & or # leftover should be safe to just escape
+     $string=~s/([^\\]|^)\&/$1\\\&/g;
+     $string=~s/([^\\]|^)\#/$1\\\#/g;
+     $string=~s/\|/\$\\mid\$/g;
+     return $string;
+ }
  while (my $token = $p->get_token())
  {
      my $type = $token->[0];
- Line 51  while (my $token = $p->get_token())
+ Line 71  while (my $token = $p->get_token())
  	my $attr = $token->[2];
  	if ($tag eq 'section') {
  	    my $title = $attr->{'name'};
- 	    print "\\section{$title}\n\n";
+ 	    print "\\section{".&escape_latex($title)."}\n\n";
  	}
  	if ($tag eq 'subsection') {
  	    my $title = $attr->{'name'};
- 	    print "\\subsection{$title}\n\n";
+ 	    print "\\subsection{".&escape_latex($title)."}\n\n";
  	}
  	if ($tag eq 'subsubsection') {
  	    my $title = $attr->{'name'};
- 	    print "\\subsubsection{$title}\n\n";
+ 	    print "\\subsubsection{".&escape_latex($title)."}\n\n";
  	}
  	if ($tag eq 'file') {
  	    my $file = $attr->{'name'};
- 	    open (LATEX_FILE, $dirprefix . $file);
+ 	    open (LATEX_FILE, $dirprefix . $file) or
+ 		($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build.");
+ 	    if ($include_filenames) {
+ 		print "\\textrm{File: \\bf ".&escape_latex($file)."}\\\\\n";
+ 	    }
  	    print <LATEX_FILE>;
  	    print "\n\n";
  	}
- Line 79  while (my $token = $p->get_token())
+ Line 103  while (my $token = $p->get_token())
  	if ($tag eq 'pod') {
  	    my $file = $attr->{'file'};
  	    my $section = $attr->{'section'};
  	    if (!defined($section)) { $section = ''; }
  	    else {
- 		$section = "-section $section";
+ 		$section = "-section '$section'";
- 		# Escape the pipes so they are considered ORs in the
- 		# RE for podselect's "section" option, and not
- 		# pipes by the shell:
- 		$section =~ s/\|/\\\|/g;
  	    }
+ 	    my $h1level = $attr->{'h1level'};
+ 	    if (!defined($h1level)) { $h1level = '2'; }
  	    $file = '../../loncom/' . $file;
- 	    my $tempfile = 't' . substr($file, rindex($file, '/') + 1);
+ 	    my $filename = substr($file, rindex($file, '/') + 1);
- 	    system ("cp $file $tmpdir");
+ 	    system ("cp $file $tmpdir\n");
- 	    # The "echo" command is necessary; pod2latex can't
+ 	    my $latexFile;
- 	    # handle a perl file that *starts* with pod.
+ 	    if (index($filename, '.') == -1) {
- 	    system ("echo > $tmpdir/$tempfile; cat $file | podselect $section >> $tmpdir/$tempfile; cd $tmpdir; pod2latex -h1level 2 $tempfile");
+ 		# pod2latex *insists* that either the extension of the
- 	    my $latexFile = substr($tempfile, 0, rindex($tempfile, '.')) . '.tex';
+ 		# file be .pl|.pm|.pod or that it be executable. Some
- 	    open LATEX_FILE, $tmpdir . '/' . $latexFile;
+ 		# extension-less files like "lonsql' are none-of-the-above.
- 	    print <LATEX_FILE>;
+ 		system ("cd $tmpdir; mv $filename $filename.pm");
+ 		$filename .= ".pm";
+ 		print STDERR $filename . "\n";
+ 	    }
+ 	    system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n");
+ 	    $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
+ 	    open LATEX_FILE, $tmpdir . '/' . $latexFile or
+ 		($! = 1, die "Latex file $latexFile not found while trying to use pod2latex, ".
+ 		 "terminating build");
+ 	    # pod2latex inserts \labels and \indexs for every section,
+ 	    # which is horrible because the section names tend to get
+ 	    # reused a lot. This filters those out, so we need to do
+ 	    # create our own indexes.
+ 	    for (<LATEX_FILE>) {
+ 		$_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g;
+ 		print $_;
+ 	    }
  	    print "\n\n";
  	}
      }
- Line 108  print <LATEX_FILE>;
+ Line 146  print <LATEX_FILE>;
  # Remove the temp directory
  system ("rm -rf $tmpdir");
+ __END__
+ =pod
+ =head1 NAME
+ texxml2latex.pl - core script that drives the help file assembly
+   applications
+ =head1 SYNOPSIS
+ LON-CAPA's help system is based on assembling various pieces into
+ LaTeX files for conversion into printed documents. The various pieces
+ can also be used as online help.
+ =head1 OVERVIEW
+ X<help system, overview>LON-CAPA's help system is based on the idea of
+ assembling various pieces as needed to create documents for printing,
+ and using these various pieces for online help. LaTeX is the primary
+ language of the help system, because we can easily convert it to HTML,
+ and it makes the nicest printed documents.
+ The scripts for the help system are stored in /docs/help in the CVS
+ repository.
+ =head2 Data Sources
+ The help system can draw from the following sources to create help
+ documents:
+ =over 4
+ =item * B<LaTeX fragments>: LaTeX fragments stored in
+ C</loncom/html/adm/help/tex> in the CVS repository (which end up in
+ C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
+ contains LaTeX-style markup, but is not a complete LaTeX file with
+ header and footer.
+ =item * B<perl POD documentation>: POD documentation may be extracted
+ from perl modules used in LON-CAPA, using the syntax described in
+ podselect's man page.
+ =back
+ =head2 Online Help
+ The online aspect of the help system is covered in the documentation
+ for loncommon.pm; see L<Apache::loncommon>, look for
+ C<help_open_topic>.
+ Online help can only come from LaTeX fragments.
+ Access to the printed documents is partially provided online by
+ rendering the help files structure in a way that allows the user to
+ click through to the underlying help files; see
+ L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an
+ example. It's not very good, but it's marginally better then nothing.
+ =head2 Offline Documents
+ Offline documents are generated from XML documents which tell a
+ rendering script how to assemble the various LaTeX fragments into a
+ single LaTeX file, which is then rendered into PostScript and PDF
+ files, suitable for download and printing.
+ =head1 texxml And Rendering texxml
+ =head2 texxml
+ X<texxml>
+ texxml is a little XML file format used to specify to the texxml2*.pl
+ scripts how to assemble the input sources into LaTeX documents. texxml
+ files end in the .texxml extension, and there is one texxml file per
+ final rendered document.
+ The texxml format is as follows: There is a root <texxml> element,
+ with no attributes and the following children:
+ =over 4
+ =item * B<title>: The B<name> attribute of this tag is used as the
+    title of the document in texxml2index.pl; it is ignored in
+    texxml2latex.pl. If you don't intend to offer online-access
+    to the rendered documents this may be skipped.
+ =item * B<section>, B<subsection>, and B<subsubsection>: These create
+    the corresponding environments in the output file. The B<name>
+    attribute is used to determine the name of the section.
+ =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
+    filename. The file is assumed to be located in the
+    C<loncom/html/adm/help/tex/> directory in the CVS repository. The
+    C<.tex> is required.
+ =item * B<tex>: The contents of the B<content> attribute are directly
+    inserted into the rendered LaTeX file, followed by a paragraph
+    break. This is generally used for little connective paragraphs in
+    the documentation that don't make sense in the online help. See
+    C<author.manual.texxml> for several example usages.
+ =item * B<pod>: The B<file> attribute specified a file to draw the POD
+    documentation out of. The B<section> attribute is a section
+    specification matching the format specified in the man page of
+    podselect. By default, all POD will be included. The file is
+    assumed to be relative to the C<loncom> directory in the CVS
+    repository; you are allowed to escape from that with .. if
+    necessary. The B<h1level> attribute can be used to change
+    the default depth of the headings; by default, this is set to 2,
+    which makes =head1 a "subsection". Setting this higher can allow
+    you to bundle several related pod files together; see
+    developer.manual.texxml for examples.
+ =back
+ texxml2latex.pl will automatically include C<Latex_Header.tex> at the
+ beginning and C<Latex_Footer.tex> at the end, to make a complete
+ document LaTeX document.
+ =head2 Rendering texxml
+ =head3 render.texxml.pl
+ X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script
+ takes a .texxml file, and produces PostScript and PDF files. The LaTeX
+ files will be given access to .eps files in the
+ C</loncom/html/adm/help/eps/> directory while rendering. Call it as
+ follows, from the C<doc/help> directory:
+  perl render.texxml.pl -- author.manual.texxml
+ substituting the appropriate texxml file.
+ =head3 texxml2latex.pl
+ X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles
+ the final LaTeX file, outputting it on stout. Invoke it as follows:
+  perl texxml2latex.pl author.manual.texx
+ Note that there is no error handling; if the script can not find a
+ .tex file, it is simply ignored. Generally, if a file is not in the
+ final render, it either could not be found, or you do not have
+ sufficient permissions with the current user to read it.
+ =head3 texxml2index.pl
+ X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a
+ file that can be used online to access all the .tex files that are
+ specified in the .texxml file. For an example of how this looks
+ online, see
+ C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>.
+ =head2 texxml support
+ There are a couple of scripts that you may find useful for creating
+ texxml-based help:
+ =head3 latexSplitter.py
+ X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a
+ monolithic .tex file into the small pieces LON-CAPA's help system
+ expects. Invoke it like this:
+  python latexSplitter.py monolithic.tex
+ where C<monolithic.tex> is the .tex file you want to split into
+ pieces. This requires Python 2.1 or greater (2.0 may work); on many
+ modern RedHat installs this is installed by default under the
+ executable name C<python2>.
+ Use the program by highlighting the desired section, give it a file
+ name in the textbox near the bottom, and hit the bottom button. The
+ program will remove that text from the textbox, and create a file in
+ the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
+ consistency, you should use underscores rather then spaces in the
+ filename, and note there are a few naming conventions for the .tex
+ files, which you can see just by listing the
+ C<loncom/html/adm/help/tex/> directory.
+ The idea behind this program is that if you are writing a big document
+ from scratch, you can use a "real" program like LyX to create the .tex
+ file, then easily split it with this program.
+ =head3 simpleEdit.py
+ X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows
+ all the tex files that went into in sequence, allowing you to "edit"
+ the entire document as one entity. Note this is intended for simple
+ typo corrections and such in context, not major modification of the
+ document. Invoke it with
+  python simpleEdit.py author.manual.texxml
+ Make your changes, and hit the "Save" button to save them.
+ =head2 texxml LaTeX Feature Support
+ =head3 Cross-referencing
+ LaTeX has a cross-referencing system build around labeling points in
+ the document with \label, and referencing those labels with \ref. In a
+ complete LaTeX document, there's no problem because all \refs and
+ \labels are present. However, for the online help, \ref'ing something
+ that is not in the current LaTeX fragment causes a TTH error when it
+ can't find the crossreference.
+ The solution is to do the cross-references for TTH. When LON-CAPA is
+ installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
+ is executed, which extracts all the labels from the LaTeX fragments
+ and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash.
+ The C<lonhelp.pm> handler then replaces \refs with appropriate
+ HTML to provide a link to the referenced help file while online. Thus,
+ you can freely use references, even in online help.
+ =head3 Indexing
+ LaTeX has a popular index making package called MakeIndex. LON-CAPA's
+ help system supports this, so you can create indices using the \index
+ LaTeX command. In perl POD files, use the X command. Note that in both
+ cases the index text is not included in the render, so the index must
+ be included in addition to the indexed text, and need not match the
+ indexed text precisely.
+ =head1 Writing POD: Style
+ Adopting a little bit from everybody who has included POD in their
+ documents to date, the help system is going to expect the following
+ format for POD documentation.
+ The POD should start with a C<=head1> with the title C<NAME> (in caps
+ as shown). The following paragraph should extremely briefly describe
+ what the module does and contains. Example:
+  =head1 NAME
+  Apache::lonflunkstudent - provides interface to set all
+    student assessments point score to 0
+ Next should be a C<head1> titled C<SYNOPSIS> which contains a
+ paragraph or two description of the module.
+  =head1 SYNOPSIS
+  lonflunkstudent provides a handler to select a student and set all
+  assignment values to zero, thereby flunking the student.
+  Routines for setting all assessments to some value are provided by
+  this module, as well as some useful student taunting routines.
+ Optionally, an C<OVERVIEW> section can be included. This can then be
+ extracted by the help system for the LON-CAPA subsystems overview
+ chapter. The overview should be a relatively high-level, but still
+ technical, overview of the module, sufficient to give the reader
+ enough context to understand what the module does, what it might be
+ useful for in other contexts, and what is going on in the code when it
+ is read.
+ The remainder should be formatted as appropriate for the file, such
+ that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
+ useful API overview of the module. This may be anything from an
+ elaborate discussion of the data structures, algorithms, and design
+ principles that went into the module, or a simple listing of
+ what functions exist, how to call them, and what they return, as
+ appropriate.
+ Routines that are private to the module should B<not> be documented;
+ document them in perl comments, or, as is the style of the time, not
+ at all, as is appropriate.
+ Method and function names should be bolded when being
+ documented.
+ Literal string such as filename should be enclosed in
+ the C command, like this: C</home/httpd/lonTabs/>.
+ Indexing can be done with the X command in perldoc, and should be used
+ as appropriate. Do not include X commands in the headings, the output
+ from pod2latex screws up some regexes in texxml2latex.pl.
+ =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.6
changed lines
	Added in v.1.12