Diff for /doc/help/texxml2latex.pl between versions 1.6 and 1.12

version 1.6, 2003/07/18 20:58:33 version 1.12, 2005/03/16 21:31:27
Line 36  use File::Temp; Line 36  use File::Temp;
 my $p = HTML::TokeParser->new( $ARGV[0] );  my $p = HTML::TokeParser->new( $ARGV[0] );
 my $dirprefix = "../../loncom/html/adm/help/tex/";  my $dirprefix = "../../loncom/html/adm/help/tex/";
   
   my $include_filenames = ($ARGV[1] eq '--with-filenames');
 # Make myself a temp dir for processing POD  # Make myself a temp dir for processing POD
 my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);  my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
   
Line 43  my $tmpdir = File::Temp::tempdir('loncap Line 44  my $tmpdir = File::Temp::tempdir('loncap
 open (LATEX_FILE, $dirprefix . "Latex_Header.tex");  open (LATEX_FILE, $dirprefix . "Latex_Header.tex");
 print <LATEX_FILE>;  print <LATEX_FILE>;
   
   sub escape_latex {
       my ($string)=@_;
       $string=~s/\\/\\ensuremath{\\backslash}/g;
       $string=~s/([^\\]|^)\%/$1\\\%/g;
       $string=~s/([^\\]|^)\$/$1\\\$/g;
       $string=~s/([^\\])\_/$1\\_/g;
       $string=~s/\$\$/\$\\\$/g;
       $string=~s/\_\_/\_\\\_/g;
       $string=~s/\#\#/\#\\\#/g;
       $string=~s/([^\\]|^)(\~|\^)/$1\\$2\\strut /g;
       $string=~s/(>|<)/\\ensuremath\{$1\}/g; #more or less
   #    $string=&Apache::lonprintout::character_chart($string);
       # any & or # leftover should be safe to just escape
       $string=~s/([^\\]|^)\&/$1\\\&/g;
       $string=~s/([^\\]|^)\#/$1\\\#/g;
       $string=~s/\|/\$\\mid\$/g;
       return $string;
   }
   
 while (my $token = $p->get_token())  while (my $token = $p->get_token())
 {  {
     my $type = $token->[0];      my $type = $token->[0];
Line 51  while (my $token = $p->get_token()) Line 71  while (my $token = $p->get_token())
  my $attr = $token->[2];   my $attr = $token->[2];
  if ($tag eq 'section') {   if ($tag eq 'section') {
     my $title = $attr->{'name'};      my $title = $attr->{'name'};
     print "\\section{$title}\n\n";      print "\\section{".&escape_latex($title)."}\n\n";
  }   }
   
  if ($tag eq 'subsection') {   if ($tag eq 'subsection') {
     my $title = $attr->{'name'};      my $title = $attr->{'name'};
     print "\\subsection{$title}\n\n";      print "\\subsection{".&escape_latex($title)."}\n\n";
  }   }
   
  if ($tag eq 'subsubsection') {   if ($tag eq 'subsubsection') {
     my $title = $attr->{'name'};      my $title = $attr->{'name'};
     print "\\subsubsection{$title}\n\n";      print "\\subsubsection{".&escape_latex($title)."}\n\n";
  }   }
   
  if ($tag eq 'file') {   if ($tag eq 'file') {
     my $file = $attr->{'name'};      my $file = $attr->{'name'};
     open (LATEX_FILE, $dirprefix . $file);      open (LATEX_FILE, $dirprefix . $file) or 
    ($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build.");
       if ($include_filenames) {
    print "\\textrm{File: \\bf ".&escape_latex($file)."}\\\\\n";
       }
     print <LATEX_FILE>;      print <LATEX_FILE>;
     print "\n\n";      print "\n\n";
  }   }
Line 79  while (my $token = $p->get_token()) Line 103  while (my $token = $p->get_token())
   
  if ($tag eq 'pod') {   if ($tag eq 'pod') {
     my $file = $attr->{'file'};      my $file = $attr->{'file'};
     my $section = $attr->{'section'};      my $section = $attr->{'section'};    
     if (!defined($section)) { $section = ''; }      if (!defined($section)) { $section = ''; }
     else {       else { 
  $section = "-section $section";   $section = "-section '$section'";
  # Escape the pipes so they are considered ORs in the  
  # RE for podselect's "section" option, and not   
  # pipes by the shell:  
  $section =~ s/\|/\\\|/g;  
     }      }
       my $h1level = $attr->{'h1level'};
       if (!defined($h1level)) { $h1level = '2'; }
     $file = '../../loncom/' . $file;      $file = '../../loncom/' . $file;
     my $tempfile = 't' . substr($file, rindex($file, '/') + 1);      my $filename = substr($file, rindex($file, '/') + 1);
     system ("cp $file $tmpdir");      system ("cp $file $tmpdir\n");
     # The "echo" command is necessary; pod2latex can't      my $latexFile;
     # handle a perl file that *starts* with pod.      if (index($filename, '.') == -1) {
     system ("echo > $tmpdir/$tempfile; cat $file | podselect $section >> $tmpdir/$tempfile; cd $tmpdir; pod2latex -h1level 2 $tempfile");   # pod2latex *insists* that either the extension of the
     my $latexFile = substr($tempfile, 0, rindex($tempfile, '.')) . '.tex';   # file be .pl|.pm|.pod or that it be executable. Some
     open LATEX_FILE, $tmpdir . '/' . $latexFile;   # extension-less files like "lonsql' are none-of-the-above.
     print <LATEX_FILE>;   system ("cd $tmpdir; mv $filename $filename.pm");
    $filename .= ".pm";
    print STDERR $filename . "\n";
       }
       system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n");
       $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
       open LATEX_FILE, $tmpdir . '/' . $latexFile or
    ($! = 1, die "Latex file $latexFile not found while trying to use pod2latex, ".
    "terminating build");
       # pod2latex inserts \labels and \indexs for every section,
       # which is horrible because the section names tend to get
       # reused a lot. This filters those out, so we need to do
       # create our own indexes.
       for (<LATEX_FILE>) {
    $_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g;
    print $_;
       }
     print "\n\n";      print "\n\n";
  }   }
     }      }
Line 108  print <LATEX_FILE>; Line 146  print <LATEX_FILE>;
   
 # Remove the temp directory  # Remove the temp directory
 system ("rm -rf $tmpdir");  system ("rm -rf $tmpdir");
   
   __END__
   
   =pod
   
   =head1 NAME
   
   texxml2latex.pl - core script that drives the help file assembly
     applications
   
   =head1 SYNOPSIS
   
   LON-CAPA's help system is based on assembling various pieces into
   LaTeX files for conversion into printed documents. The various pieces
   can also be used as online help.
   
   =head1 OVERVIEW
   
   X<help system, overview>LON-CAPA's help system is based on the idea of
   assembling various pieces as needed to create documents for printing,
   and using these various pieces for online help. LaTeX is the primary
   language of the help system, because we can easily convert it to HTML,
   and it makes the nicest printed documents.
   
   The scripts for the help system are stored in /docs/help in the CVS
   repository.
   
   =head2 Data Sources
   
   The help system can draw from the following sources to create help
   documents:
   
   =over 4
   
   =item * B<LaTeX fragments>: LaTeX fragments stored in
   C</loncom/html/adm/help/tex> in the CVS repository (which end up in
   C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
   contains LaTeX-style markup, but is not a complete LaTeX file with
   header and footer.
   
   =item * B<perl POD documentation>: POD documentation may be extracted
   from perl modules used in LON-CAPA, using the syntax described in
   podselect's man page.
   
   =back
   
   =head2 Online Help
   
   The online aspect of the help system is covered in the documentation
   for loncommon.pm; see L<Apache::loncommon>, look for
   C<help_open_topic>.
   
   Online help can only come from LaTeX fragments.
   
   Access to the printed documents is partially provided online by
   rendering the help files structure in a way that allows the user to
   click through to the underlying help files; see 
   L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an
   example. It's not very good, but it's marginally better then nothing.
   
   =head2 Offline Documents 
   
   Offline documents are generated from XML documents which tell a
   rendering script how to assemble the various LaTeX fragments into a
   single LaTeX file, which is then rendered into PostScript and PDF
   files, suitable for download and printing. 
   
   =head1 texxml And Rendering texxml
   
   =head2 texxml 
   
   X<texxml>
   texxml is a little XML file format used to specify to the texxml2*.pl
   scripts how to assemble the input sources into LaTeX documents. texxml
   files end in the .texxml extension, and there is one texxml file per
   final rendered document.
   
   The texxml format is as follows: There is a root <texxml> element,
   with no attributes and the following children:
   
   =over 4
   
   =item * B<title>: The B<name> attribute of this tag is used as the
      title of the document in texxml2index.pl; it is ignored in 
      texxml2latex.pl. If you don't intend to offer online-access
      to the rendered documents this may be skipped.
   
   =item * B<section>, B<subsection>, and B<subsubsection>: These create
      the corresponding environments in the output file. The B<name>
      attribute is used to determine the name of the section.
   
   =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
      filename. The file is assumed to be located in the
      C<loncom/html/adm/help/tex/> directory in the CVS repository. The
      C<.tex> is required.
   
   =item * B<tex>: The contents of the B<content> attribute are directly
      inserted into the rendered LaTeX file, followed by a paragraph
      break. This is generally used for little connective paragraphs in
      the documentation that don't make sense in the online help. See
      C<author.manual.texxml> for several example usages.
   
   =item * B<pod>: The B<file> attribute specified a file to draw the POD
      documentation out of. The B<section> attribute is a section
      specification matching the format specified in the man page of
      podselect. By default, all POD will be included. The file is
      assumed to be relative to the C<loncom> directory in the CVS
      repository; you are allowed to escape from that with .. if
      necessary. The B<h1level> attribute can be used to change 
      the default depth of the headings; by default, this is set to 2,
      which makes =head1 a "subsection". Setting this higher can allow
      you to bundle several related pod files together; see 
      developer.manual.texxml for examples.
   
   =back
   
   texxml2latex.pl will automatically include C<Latex_Header.tex> at the
   beginning and C<Latex_Footer.tex> at the end, to make a complete
   document LaTeX document.
   
   =head2 Rendering texxml 
   
   =head3 render.texxml.pl 
   
   X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script
   takes a .texxml file, and produces PostScript and PDF files. The LaTeX
   files will be given access to .eps files in the
   C</loncom/html/adm/help/eps/> directory while rendering. Call it as
   follows, from the C<doc/help> directory:
   
    perl render.texxml.pl -- author.manual.texxml
   
   substituting the appropriate texxml file.
   
   =head3 texxml2latex.pl 
   
   X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles
   the final LaTeX file, outputting it on stout. Invoke it as follows:
   
    perl texxml2latex.pl author.manual.texx
   
   Note that there is no error handling; if the script can not find a
   .tex file, it is simply ignored. Generally, if a file is not in the
   final render, it either could not be found, or you do not have
   sufficient permissions with the current user to read it.
   
   =head3 texxml2index.pl 
   
   X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a
   file that can be used online to access all the .tex files that are
   specified in the .texxml file. For an example of how this looks
   online, see
   C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>.
   
   =head2 texxml support
   
   There are a couple of scripts that you may find useful for creating
   texxml-based help:
   
   =head3 latexSplitter.py 
   
   X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a
   monolithic .tex file into the small pieces LON-CAPA's help system
   expects. Invoke it like this:
   
    python latexSplitter.py monolithic.tex
   
   where C<monolithic.tex> is the .tex file you want to split into
   pieces. This requires Python 2.1 or greater (2.0 may work); on many
   modern RedHat installs this is installed by default under the
   executable name C<python2>.
   
   Use the program by highlighting the desired section, give it a file
   name in the textbox near the bottom, and hit the bottom button. The
   program will remove that text from the textbox, and create a file in
   the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
   consistency, you should use underscores rather then spaces in the
   filename, and note there are a few naming conventions for the .tex
   files, which you can see just by listing the
   C<loncom/html/adm/help/tex/> directory.
   
   The idea behind this program is that if you are writing a big document
   from scratch, you can use a "real" program like LyX to create the .tex
   file, then easily split it with this program.
   
   =head3 simpleEdit.py 
   
   X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows
   all the tex files that went into in sequence, allowing you to "edit"
   the entire document as one entity. Note this is intended for simple
   typo corrections and such in context, not major modification of the
   document. Invoke it with 
   
    python simpleEdit.py author.manual.texxml
   
   Make your changes, and hit the "Save" button to save them.
   
   =head2 texxml LaTeX Feature Support
   
   =head3 Cross-referencing
   
   LaTeX has a cross-referencing system build around labeling points in
   the document with \label, and referencing those labels with \ref. In a
   complete LaTeX document, there's no problem because all \refs and
   \labels are present. However, for the online help, \ref'ing something
   that is not in the current LaTeX fragment causes a TTH error when it
   can't find the crossreference.
   
   The solution is to do the cross-references for TTH. When LON-CAPA is
   installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
   is executed, which extracts all the labels from the LaTeX fragments
   and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash. 
   The C<lonhelp.pm> handler then replaces \refs with appropriate
   HTML to provide a link to the referenced help file while online. Thus,
   you can freely use references, even in online help.
   
   =head3 Indexing
   
   LaTeX has a popular index making package called MakeIndex. LON-CAPA's
   help system supports this, so you can create indices using the \index
   LaTeX command. In perl POD files, use the X command. Note that in both
   cases the index text is not included in the render, so the index must 
   be included in addition to the indexed text, and need not match the 
   indexed text precisely.
   
   =head1 Writing POD: Style
   
   Adopting a little bit from everybody who has included POD in their
   documents to date, the help system is going to expect the following
   format for POD documentation.
   
   The POD should start with a C<=head1> with the title C<NAME> (in caps
   as shown). The following paragraph should extremely briefly describe
   what the module does and contains. Example:
   
    =head1 NAME
   
    Apache::lonflunkstudent - provides interface to set all
      student assessments point score to 0
   
   Next should be a C<head1> titled C<SYNOPSIS> which contains a
   paragraph or two description of the module.
   
    =head1 SYNOPSIS
   
    lonflunkstudent provides a handler to select a student and set all
    assignment values to zero, thereby flunking the student.
   
    Routines for setting all assessments to some value are provided by
    this module, as well as some useful student taunting routines.
   
   Optionally, an C<OVERVIEW> section can be included. This can then be
   extracted by the help system for the LON-CAPA subsystems overview
   chapter. The overview should be a relatively high-level, but still
   technical, overview of the module, sufficient to give the reader
   enough context to understand what the module does, what it might be
   useful for in other contexts, and what is going on in the code when it
   is read.
   
   The remainder should be formatted as appropriate for the file, such
   that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
   useful API overview of the module. This may be anything from an 
   elaborate discussion of the data structures, algorithms, and design 
   principles that went into the module, or a simple listing of 
   what functions exist, how to call them, and what they return, as
   appropriate.
   
   Routines that are private to the module should B<not> be documented;
   document them in perl comments, or, as is the style of the time, not
   at all, as is appropriate.
   
   Method and function names should be bolded when being
   documented. 
   
   Literal string such as filename should be enclosed in
   the C command, like this: C</home/httpd/lonTabs/>. 
   
   Indexing can be done with the X command in perldoc, and should be used 
   as appropriate. Do not include X commands in the headings, the output 
   from pod2latex screws up some regexes in texxml2latex.pl.
   
   =cut

Removed from v.1.6  
changed lines
  Added in v.1.12


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>