File:  [LON-CAPA] / loncom / interface / entities.pm
Revision 1.12.2.1: download - view: text, annotated - select for diffs
Thu Dec 11 14:03:14 2008 UTC (15 years, 4 months ago) by raeburn
Branches: version_2_8_X
CVS tags: version_2_8_2, version_2_8_1, version_2_8_0, version_2_7_99_1, version_2_7_99_0, GCI_1
- Version line.

# The LearningOnline Network
# entity -> tex.
#
# $Id: entities.pm,v 1.12.2.1 2008/12/11 14:03:14 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
# http://www.lon-capa.org/
#
#
package Apache::entities;
use strict;
#
#   This file contains a table driven entity-->latex converter.
#
#  Assumptions:
#   The number of entities in a resource is small compared with the
#   number of possible entities that might be translated.
#   Therefore the strategy is to match a general entity pattern
#   &.+; over and over, pull out the match look it up in an entity -> tex hash
#   and do the replacement.
#
#  In order to simplify the hash, the following reductions are done:
#   &#d+; have the &# and ; stripped and is converted to an int.
#   &#.+; have the &#x and ; stripped and is converted to an int as a hex
#                             value.
#   All others have the & and ; stripped.


#  The hash:  Add new conversions here; leave off the leading & and the trailing ;
#  all numeric entities need only appear as their decimal versions
#  (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
#
#  This entity table is mercilessly cribbed from the  HTML pocket reference
#  table starting at pg 82.  In most cases the LaTeX equivalent codes come from
#  the original massive regular expression replacements originally by 
#  A. Sakharuk in lonprintout.pm
#
#  I also want to acknowledge
#   ISO Character entities and their LaTeX equivalents by 
#      Vidar Bronken Gundersen, and Rune Mathisen
#    http://www.bitjungle.com/isoent-ref.pdf
#

#  Note numerical entities are essentially unicode character codes.
#
package Apache::entities;

my %entities = (

    #  ---- ASCII code page: ----------------

    # Translation to empty strings:

    7        => "",
    9        => "",
    10       => "",
    13       => "",
    
    # Translations to simple characters:

    32       => " ",
    33       => "!",
    34       => '"',
    'quot'   => '"',
    35       => '\\#',
    36       => '\\$',
    37       => '\%',
    38       => '\&',
    'amp'    => '\&',
    39       => '\'',		# Apostrophe
    40       => '(',
    41       => ')',
    42       => '*',
    43       => '+',
    44       => ',',		#  comma
    45       => '-',
    46       => '.',
    47       => '/',
    48       => '0',
    49       => '1',
    50       => '2',
    51       => '3',
    52       => '4',
    53       => '5',
    54       => '6',
    55       => '7',
    56       => '8',
    57       => '9',
    58       => ':',
    59       => ';',
    60       => '\ensuremath{<}',
    'lt'     => '\ensuremath{<}',
    61       => '\ensuremath{=}',
    62       => '\ensuremath{>}',
    'gt'     => '\ensuremath{>}',
    63       => '?',
    64       => '@',
    65       => 'A',
    66       => 'B',
    67       => 'C',
    68       => 'D',
    69       => 'E',
    70       => 'F',
    71       => 'G',
    72       => 'H',
    73       => 'I',
    74       => 'J',
    75       => 'K',
    76       => 'L',
    77       => 'M',
    78       => 'N',
    79       => 'O',
    80       => 'P',
    81       => 'Q',
    82       => 'R',
    83       => 'S',
    84       => 'T',
    85       => 'U',
    86       => 'V',
    87       => 'W',
    88       => 'X',
    89       => 'Y',
    90       => 'Z',
    91       => '[',
    92       => '\ensuremath{\setminus}', # \setminus is \ with special spacing.
    93       => ']',
    94       => '\ensuremath{\wedge}',
    95       => '\underline{\makebox[2mm]{\strut}}', # Underline 2mm of space for _
    96       => '`',
    97       => 'a',
    98       => 'b',
    99       => 'c',
    100      => 'd',
    101      => 'e',
    102      => 'f',
    103      => 'g',
    104      => 'h', 
    105      => 'i',
    106      => 'j',
    107      => 'k',
    108      => 'l',
    109      => 'm',
    110      => 'n',
    111      => 'o',
    112      => 'p',
    113      => 'q',
    114      => 'r',
    115      => 's',
    116      => 't',
    117      => 'u',
    118      => 'v',
    119      => 'w',
    120      => 'x',
    121      => 'y',
    122      => 'z',
    123      => '\{',
    124      => '|',
    125      => '\}',
    126      => '\~',

    #   Controls and Latin-1 supplement.  Note that some entities that have
    #   visible effect are not printing unicode characters.  Specifically
    #   &#130;-&#160;

    130     => ',',
    131     => '\ensuremath{f}',
    132     => ',,',		# Low double left quotes.
    133     => '\ensuremath{\ldots}',
    134     => '\ensuremath{\dagger}',
    135     => '\ensuremath{\ddagger}',
    136     => '\ensuremath{\wedge}',
    137     => '\textperthousand ',
    138     => '\v{S}',
    139     => '\ensuremath{<}',
    140     => '{\OE}',
    
    #  There's a gap here in my entity table

    145     => '`',
    146     => '\'',
    147     => '``',
    148     => '\'\'',
    149     => '\ensuremath{\bullet}',
    150     => '--',
    151     => '---',
    152     => '\ensuremath{\sim}',
    153     => '\texttrademark',
    154     => '\v{s}',
    155     => '\ensuremath{>}',
    156     => '\oe ',
    
    # Another short gap:

    159     => '\"Y',
    160     => '~',
    'nbsp'  => '~',
    161     => '\textexclamdown ',
    'iexcl' => '\textexclamdown ',
    162     => '\textcent ',
    'cent'  => '\textcent ',
    163     => '\pounds ',
    'pound' => '\pounds ',
    164     => '\textcurrency ',
    'curren' => '\textcurrency ',
    165     => '\textyen ',
    'yen'   => '\textyen ',
    166     => '\textbrokenbar ',
    'brvbar' => '\textbrokenbar ',
    167     => '\textsection ',
    'sect'  => '\textsection ',
    168     => '\"{}',
    'uml'   => '\"{}',
    169     => '\copyright ',
    'copy'  => '\copyright ',
    170     => '\textordfeminine ',
    'ordf'  => '\textordfeminine ',
    171     => '\ensuremath{\ll}', # approximation of left angle quote.
    'laquo' => '\ensuremath{\ll}', #   ""
    172     => '\ensuremath{\neg}',
    'not'   => '\ensuremath{\neg}',
    173     => ' - ',
    'shy'   => ' - ',
    174     => '\textregistered ',
    'reg'   => '\textregistered ',
    175     => '\ensuremath{^{-}}',
    'macr'  => '\ensuremath{^{-}}',
    176     => '\ensuremath{^{\circ}}',
    'deg'   => '\ensuremath{^{\circ}}',
    177     => '\ensuremath{\pm}',
    'plusmn' => '\ensuremath{\pm}',
    178     => '\ensuremath{^2}',
    'sup2'  => '\ensuremath{^2}',
    179     => '\ensuremath{^3}',
    'sup3'  => '\ensuremath{^3}',
    180     => "\\'{}",
    'acute' => "\\'{}",
    181     => '\ensuremath{\mu}',
    'micro' => '\ensuremath{\mu}',
    182     => '\P ',
    para    => '\P ',
    183     => '\ensuremath{\cdot}',
    'middot' => '\ensuremath{\cdot}',
    184     => '\c{\strut}',
    'cedil' => '\c{\strut}',
    185     => '\ensuremath{^1}',
    sup1    => '\ensuremath{^1}',
    186     => '\textordmasculine ',
    'ordm'  => '\textordmasculine ',
    187     => '\ensuremath{\gg}',
    'raquo' => '\ensuremath{\gg}',
    188     => '\textonequarter ',
    'frac14' => '\textonequarter ',
    189     => '\textonehalf' ,
    'frac12' => '\textonehalf' ,
    190     => '\textthreequarters ',
    'frac34' => '\textthreequarters ',
    191     =>  '\textquestiondown ',
    'iquest' => '\textquestiondown ',
    192     => '\\`{A}',
    'Agrave' => '\\`{A}',
    193     => "\\'{A}",
    'Aacute' => "\\'{A}",
    194     => '\^{A}',
    'Acirc' => '\^{A}',
    195     => '\~{A}',
    'Atilde'=> '\~{A}',
    196     => '\\"{A}',
    'Auml'  => '\\"{A}',
    197     => '{\AA}',
    'Aring' => '{\AA}',
    198     => '{\AE}',
    'AElig' => '{\AE}',
    199     => '\c{c}',
    'Ccedil'=> '\c{c}',
     200   =>  '\\`{E}',
    'Egrave'=> '\\`{E}',
    201     => "\\'{E}",
    'Eacute'=> "\\'{E}",
    202     => '\\^{E}',
    'Ecirc' => '\\^{E}',
    203     => '\\"{E}',
    'Euml'  => '\\"{E}',
    204     => '\\`{I}',
    'Igrave'=> '\\`{I}',
    205     => "\\'{I}",
    'Iacute'=> "\\'{I}",
    206     => '\\^{I}',
    'Icirc' => '\\^{I}',
    207     => '\\"{I}',
    'Iuml'  => '\\"{I}',
    208     => '\DH',
    'ETH'   => '\DH',
    209     => '\~{N}',
    'Ntilde'=> '\~{N}',
    210     => '\\`{O}',
    'Ograve'=> '\\`{O}',
    211     => "\\'{O}",
    'Oacute'=> "\\'{O}",
    212     => '\\^{O}',
    'Ocirc' => '\\^{O}',
    213     => '\~{O}',
    'Otilde'=> '\~{O}',
    214     => '\\"{O}',
    'Ouml'  => '\\"{O}',
    215     => '\ensuremath{\times}',
    'times' => '\ensuremath{\times}',
    216     => '\O',
    'Oslash'=> '\O',
    217     => '\\`{U}',
    'Ugrave'=> '\\`{U}',
    218     => "\\'{U}",
    'Uacute'=> "\\'{U}",
    219     => '\\^{U}',
    'Ucirc' => '\\^{U}',
    220     => '\\"{U}',
    'Uuml'  => '\\"{U}',
    221     => "\\'{Y}",
    'Yacute'=> "\\'{Y}",
    223     => '{\ss}',
    'szlig' => '{\ss}',
    224     => '\\`{a}',
    'agrave'=> '\\`{a}',
    225     => "\\'{a}",
    'aacute'=> "\\'{a}",
    226     => '\\^{a}',
    'acirc' => '\\^{a}',
    227     => '\\~{a}',
    'atilde'=> '\\~{a}',
    228     => '\\"{a}',
    'auml'  => '\\"{a}',
    229     => '\aa',
    'aring' => '\aa',
    230     => '\ae',
    'aelig' => '\ae',
    231     => '\c{c}',
    'ccedil'=> '\c{c}',
    232     => '\\`{e}',
    'egrave'=> '\\`{e}',
    233     => "\\'{e}",
    'eacute'=> "\\'{e}",
    234     => '\\^{e}',
    'ecirc' => '\\^{e}',
    235     => '\\"{e}',
    'euml'  => '\\"{e}',
    236     => '\\`{i}',
    'igrave'=> '\\`{i}',
    237     => "\\'{i}",
    'iacute'=> "\\'{i}",
    238     => '\\^{i}',
    'icirc' => '\\^{i}',
    239     => '\\"{i}',
    'iuml'  => '\\"{i}',
    241     => '\\~{n}',
    'ntilde'=> '\\~{n}',
    242     => '\\`{o}',
    'ograve'=> '\\`{o}',
    243     => "\\'{o}",
    'oacute'=> "\\'{o}",
    244     => '\\^{o}',
    'ocirc' => '\\^{o}',
    245     => '\\~{o}',
    'otilde'=> '\\~{o}',
    246     => '\\"{o}',
    'ouml'  => '\\"{o}',
    247     => '\ensuremath{\div}',
    'divide'=> '\ensuremath{\div}',
    248     => '{\o}',
    'oslash'=> '{\o}',
    249     => '\\`{u}',
    'ugrave'=> '\\`{u}',
    250     => "\\'{u}",
    'uacute'=> "\\'{u}",
    251     => '\\^{u}',
    'ucirc' => '\\^{u}',
    252     => '\\"{u}',
    'uuml'  => '\\"{u}',
    253     => "\\'{y}",
    'yacute'=> "\\'{y}",
    255     => '\\"{y}',
    'yuml'  => '\\"{y}',

    # hbar entity number comes from the unicode charater:
    # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
    # ISO also documents a 'planck' entity.

    295     => '\ensuremath{\hbar}',
    'planck' => '\ensuremath{\hbar}',

    # Latin extended-A HTML 4.01 entities:

    338      => '\OE',
    'OElig'  => '\OE',
    339      => '\oe',
    'oelig'  => '\oe',
    352      => '\v{S}',
    'Scaron' => '\v{S}',
    353      => '\v{s}',
    'scaron' => '\v{s}',
    376      => '\\"{Y}',
    'Yuml'   => '\\"{Y}', 


    # Latin extended B HTML 4.01 entities

    402      => '\ensuremath{f}',
    'fnof'   => '\ensuremath{f}',

    # Spacing modifier letters:
    
    710      => '\^{}',
    'circ'   => '\^{}',
    732      => '\~{}',
    'tilde'  => '\~{}',

    # Greek uppercase:

    913      => '\ensuremath{\mathrm{A}}',
    'Alpha'  => '\ensuremath{\mathrm{A}}',
    914      => '\ensuremath{\mathrm{B}}',
    'Beta'   => '\ensuremath{\mathrm{B}}',
    915      => '\ensuremath{\Gamma}',
    'Gamma'  => '\ensuremath{\Gamma}',
    916      => '\ensuremath{\Delta}',
    'Delta'  => '\ensuremath{\Delta}',
    917      => '\ensuremath{\mathrm{E}}',
    'Epsilon'=> '\ensuremath{\mathrm{E}}',
    918      => '\ensuremath{\mathrm{Z}}',
    'Zeta'   => '\ensuremath{\mathrm{Z}}',
    919      => '\ensuremath{\mathrm{H}}',
    'Eta'    => '\ensuremath{\mathrm{H}}',
    920      => '\ensuremath{\Theta}',
    'Theta'  => '\ensuremath{\Theta}',
    921      => '\ensuremath{\mathrm{I}}',
    'Iota'   => '\ensuremath{\mathrm{I}}',
    922      => '\ensuremath{\mathrm{K}}',
    'Kappa'  => '\ensuremath{\mathrm{K}}',
    923      => '\ensuremath{\Lambda}',
    'Lambda' => '\ensuremath{\Lambda}',
    924      => '\ensuremath{\mathrm{M}}',
    'Mu'     => '\ensuremath{\mathrm{M}}',
    925      => '\ensuremath{\mathrm{N}}',
    'Nu'     => '\ensuremath{\mathrm{N}}',
    926      => '\ensuremath{\mathrm{\Xi}}',
    'Xi'     => '\ensuremath{\mathrm{\Xi}}',
    927      => '\ensuremath{\mathrm{O}}',
    'Omicron'=> '\ensuremath{\mathrm{O}}',
    928      => '\ensuremath{\Pi}',
    'Pi'     => '\ensuremath{\Pi}',
    929      => '\ensuremath{\mathrm{P}}',
    'Rho'    => '\ensuremath{\mathrm{P}}',
   
    # Skips 930

    931      => '\ensuremath{\Sigma}',
    'Sigma'  => '\ensuremath{\Sigma}',
    932      => '\ensuremath{\mathrm{T}}',
    'Tau'    => '\ensuremath{\mathrm{T}}',
    933      => '\ensuremath{\Upsilon}',
    'Upsilon'=> '\ensuremath{\Upsilon}',
    934      => '\ensuremath{\Phi}',
    'Phi'    => '\ensuremath{\Phi}',
    935      => '\ensuremath{\mathrm{X}}',
    'Chi'    => '\ensuremath{\mathrm{X}}',
    936      => '\ensuremath{\Psi}',
    'Psi'    => '\ensuremath{\Psi}',
    937      => '\ensuremath{\Omega}',
    'Omega'  => '\ensuremath{\Omega}',


    # Greek lowercase:

    945      => '\ensuremath{\alpha}',
    'alpha'  => '\ensuremath{\alpha}',
    946      => '\ensuremath{\beta}',
    'beta'   => '\ensuremath{\beta}',
    947      => '\ensuremath{\gamma}',
    'gamma'  => '\ensuremath{\gamma}',
    948      => '\ensuremath{\delta}',
    'delta'  => '\ensuremath{\delta}',
    949      => '\ensuremath{\epsilon}',
    'epsilon'=> '\ensuremath{\epsilon}',
    950      => '\ensuremath{\zeta}',
    'zeta'   => '\ensuremath{\zeta}',
    951      => '\ensuremath{\eta}',
    'eta'    => '\ensuremath{\eta}',
    952      => '\ensuremath{\theta}',
    'theta'  => '\ensuremath{\theta}',
    953      => '\ensuremath{\iota}',
    'iota'   => '\ensuremath{\iota}',
    954      => '\ensuremath{\kappa}',
    'kappa'  => '\ensuremath{\kappa}',
    955      => '\ensuremath{\lambda}',
    'lambda' => '\ensuremath{\lambda}',
    956      => '\ensuremath{\mu}',
    'mu'     => '\ensuremath{\mu}',
    957      => '\ensuremath{\nu}',
    'nu'     => '\ensuremath{\nu}',
    958      => '\ensuremath{\xi}',
    'xi'     => '\ensuremath{\xi}',
    959      => '\ensuremath{o}',
    'omicron'=> '\ensuremath{o}',
    960      => '\ensuremath{\pi}',
    'pi'     => '\ensuremath{\pi}',
    961      => '\ensuremath{\rho}',
    'rho'    => '\ensuremath{\rho}',
    962      => '\ensuremath{\varsigma}',
    'sigmaf' => '\ensuremath{\varsigma}',
    963      => '\ensuremath{\sigma}',
    'sigma'  => '\ensuremath{\sigma}',
    964      => '\ensuremath{\tau}',
    'tau'    => '\ensuremath{\tau}',
    965      => '\ensuremath{\upsilon}',
    'upsilon'=> '\ensuremath{\upsilon}',
    966      => '\ensuremath{\phi}',
    'phi'    => '\ensuremath{\phi}',
    967      => '\ensuremath{\chi}',
    'chi'    => '\ensuremath{\chi}',
    968      => '\ensuremath{\psi}',
    'psi'    => '\ensuremath{\psi}',
    969      => '\ensuremath{\omega}',
    'omega'  => '\ensuremath{\omega}',
    977      => '\ensuremath{\vartheta}',
    'thetasym'=>'\ensuremath{\vartheta}',
    978      => '\ensuremath{\mathit{\Upsilon}}',
    'upsih'  => '\ensuremath{\mathit{\Upsilon}}',
    982      => '\ensuremath{\varpi}',
    'piv'    => '\ensuremath{\varpi}',

    
    # The general punctuation set:

    8194,    => '\hspace{.5em}',
    'enspc'  => '\hspace{.5em}',
    8195     => '\hspace{1.0em}',
    'emspc'  => '\hspace{1.0em}',
    8201     => '\hspace{0.167em}',
    'thinsp' => '\hspace{0.167em}',
    8204     => '{}',
    'zwnj'   => '{}',
    8205     => '',
    'zwj'    => '',
    8206     => '',
    'lrm'    => '',
    8207     => '',
    'rlm'    => '',
    8211     => '--',
    'ndash'  => '--',
    8212     => '---',
    'mdash'  => '---',
    8216     => '`',
    'lsquo'  => '`',
    8217     => "'",
    'rsquo'  => "'",
    8218     => '\quotesinglbase',
    'sbquo'  => '\quotesinglbase',
    8220     => '``',
    'ldquo'  => '``',
    8221     => "''",
    'rdquo'  => "''",
    8222     => '\quotedblbase',
    'bdquo'  => '\quotedblbase',
    8224     => '\ensuremath{\dagger}',
    'dagger' => '\ensuremath{\dagger}',
    '8225'   => '\ensuremath{\ddag}',
    'Dagger' => '\ensuremath{\ddag}',
    8226     => '\textbullet',
    'bull'   => '\textbullet',
    8230     => '\textellipsis',
    'hellep' => '\textellipsis',
    8240     => '\textperthousand',
    permil   => '\textperthousand',
    8242     => '\textquotesingle',
    'prime'  => '\textquotesingle',
    8243     => '\textquotedbl',
    'Prime'  => '\textquotedbl',
    8249     => '\guilsinglleft',
    'lsaquo' => '\guilsinglleft',
    8250     => '\guilsinglright',
    'rsaquo' => '\guilsinglright',
    8254     => '\textasciimacron',
    oline    => '\textasciimacron',
    8260     => '\textfractionsolidus',
    'frasl'  => '\textfractionsolidus',
    8364     => '\texteuro',
    'euro'   => '\texteuro',

    # Letter like symbols

    
    8472     => '\ensuremath{\wp}',
    'weierp' => '\ensuremath{\wp}',
    8465     => '\ensuremath{\Im}',
    'image'  => '\ensuremath{\Im}',
    8476     => '\ensuremath{\Re}',
    'real'   => '\ensuremath{\Re}',
    8482     => '\texttrademark',
    'trade'  => '\texttrademark',
    8501     => '\ensuremath{\aleph}',
    'alefsym'=> '\ensuremath{\aleph}',

    # Arrows and then some (harpoons from Hon Kie).

    8592     => '\textleftarrow',
    'larr'   => '\textleftarrow',
    8593     => '\textuparrow',
    'uarr'   => '\textuparrow',
    8594     => '\textrightarrow',
    'rarr'   => '\textrightarrow',
    8595     => '\textdownarrow',
    'darr'   => '\textdownarrow',
    8596     => '\ensuremath{\leftrightarrow}',
    'harr'   => '\ensuremath{\leftrightarrow}',
    8598     => '\ensuremath{\nwarrow}',
    8599     => '\ensuremath{\nearrow}',
    8600     => '\ensuremath{\searrow}',
    8601     => '\ensuremath{\swarrow}',
    8605     => '\ensuremath{\leadsto}',
    8614     => '\ensuremath{\mapsto}',
    8617     => '\ensuremath{\hookleftarrow}',
    8618     => '\ensuremath{\hookrightarrow}',
    8629     => '\ensuremath{\hookleftarrow}', # not an exact match but best I know.
    'crarr'  => '\ensuremath{\hookleftarrow}', # not an exact match but best I know.
    8636     => '\ensuremath{\leftharpoonup}',
    8637     => '\ensuremath{\leftharpoondown}',
    8640     => '\ensuremath{\rightharpoonup}',
    8641     => '\ensuremath{\rightharpoondown}',
    8652     => '\ensuremath{\rightleftharpoons}',
    8656     => '\ensuremath{\Leftarrow}',
    'lArr'   => '\ensuremath{\Leftarrow}',
    8657     => '\ensuremath{\Uparrow}',
    'uArr'   => '\ensuremath{\Uparrow}',
    8658     => '\ensuremath{\Rightarrow}',
    'rArr'   => '\ensuremath{\Rightarrow}',
    8659     => '\ensuremath{\Downarrow}',
    'dArr'   => '\ensuremath{\Downarrow}',
    8660     => '\ensuremath{\Leftrightarrow}',
    'hArr'   => '\ensuremath{\Leftrightarrow}',
    8661     => '\ensuremath{\Updownarrow}',
    'vArr'   => '\ensuremath{\Updownarrow}',
    8666     => '\ensuremath{\Lleftarrow}',
    'lAarr'   => '\ensuremath{\Lleftarrow}',
    8667     => '\ensuremath{\Rrightarrow}',
    'rAarr'  => '\ensuremath{\Rrightarrow}',
    8669     => '\ensuremath{\rightsquigarrow}',
    'rarrw'  => '\ensuremath{\rightsquigarrow}',
    

    # Mathematical operators.
	
    
    'forall' => '\ensuremath{\forall}',
    8704     => '\ensuremath{\forall}',
    'comp'   => '\ensuremath{\complement}',
    8705     => '\ensuremath{\complement}',
    'part'   => '\ensuremath{\partial}',
    8706     => '\ensuremath{\partial}',
    'exist'  => '\ensuremath{\exists}',
    8707     => '\ensuremath{\exists}',
    'nexist' => '\ensuremath{\nexists}',
    8708     => '\ensuremath{\nexists}',
    'empty'  => '\ensuremath{\emptyset}',
    8709     => '\ensuremath{\emptyset}',
    8710     => '\ensuremath{\Delta}',
    'nabla'  => '\ensuremath{\nabla}',
    8711     => '\ensuremath{\nabla}',
    'isin'   => '\ensuremath{\in}',
    8712     => '\ensuremath{\in}',
    'notin'  => '\ensuremath{\notin}',
    8713     => '\ensuremath{\notin}',
    ni       => '\ensuremath{\ni}',
    8715     => '\ensuremath{\ni}',
    8716     => '\ensuremath{\not\ni}',
    'prod'   => '\ensuremath{\prod}',
    8719     => '\ensuremath{\prod}',
    8720     => '\ensuremath{\coprod}',
    'sum'    => '\ensuremath{\sum}',
    8721     => '\ensuremath{\sum}',
    'minus'  => '\ensuremath{-}',
    8722     => '\ensuremath{-}',
    8723     => '\ensuremath{\mp}',
    8724     => '\ensuremath{\dotplus}',
    8725     => '\ensuremath{\diagup}',
    8726     => '\ensuremath{\smallsetminus}',
    'lowast' => '\ensuremath{*}',
    8727     => '\ensuremath{*}',
    8728     => '\ensuremath{\circ}',
    8729     => '\ensuremath{\bullet}',
    'radic'  => '\ensuremath{\surd}',
    8730     => '\ensuremath{\surd}',
    8731     => '\ensuremath{\sqrt[3]{}}',
    8732     => '\ensuremath{\sqrt[4]{}}',
    'prop'   => '\ensuremath{\propto}',
    8733     => '\ensuremath{\propto}',
    'infin'  => '\ensuremath{\infty}',
    8734     => '\ensuremath{\infty}',
#
#   The items below require the isoent latex package which I can't find at least for FC5.
#   Temporarily commented out.
#
#    'ang90'  => '\ensuremath{\sqangle}',
#    8735     => '\ensuremath{\sqangle}',
    'ang'    => '\ensuremath{\angle}',
    8736     => '\ensuremath{\angle}',
    'angmsd' => '\ensuremath{\measuredangle}',
    8737     => '\ensuremath{\measuredangle}',
    'angsph' => '\ensuremath{\sphericalangle}',
    8738     => '\ensuremath{\sphericalangle}',
    8739     => '\ensuremath{\vert}',
    8740     => '\ensuremath{\Vert}',
    'and'    => '\ensuremath{\land}',
    8743     => '\ensuremath{\land}',
    'or'     => '\ensuremath{\lor}',
    8744     => '\ensuremath{\lor}',
    'cap'    => '\ensuremath{\cap}',
    8745     => '\ensuremath{\cap}',
    'cup'    => '\ensuremath{\cup}',
    8746     => '\ensuremath{\cup}',
    'int'    => '\ensuremath{\int}',
    8747     => '\ensuremath{\int}',
    'conint' => '\ensuremath{\oint}',
    8750     => '\ensuremath{\oint}',
    'there4' => '\ensuremath{\therefore}',
    8756     => '\ensuremath{\therefore}',
    'becaus' => '\ensuremath{\because}',
    8757     => '\ensuremath{\because}',
    8758     => '\ensuremath{:}',
    8759     => '\ensuremath{::}',
    'sim'    => '\ensuremath{\sim}',
    8764     => '\ensuremath{\sim}',
    8765     => '\ensuremath{\backsim}',
    'wreath' => '\ensuremath{\wr}',
    8768     => '\ensuremath{\wr}',
    'nsim'   => '\ensuremath{\not\sim}',
    8769     => '\ensuremath{\not\sim}',
#    'asymp'  => '\ensuremath{\asymp}',  &asymp; is actually a different glyph.
    8771     => '\ensuremath{\asymp}',
    8772     => '\ensuremath{\not\asymp}',
    'cong'   => '\ensuremath{\cong}',
    8773     => '\ensuremath{\cong}',
    8775     => '\ensuremath{\ncong}',
    8778     => '\ensuremath{\approxeq}',
    8784     => '\ensuremath{\doteq}',
    8785     => '\ensuremath{\doteqdot}',
    8786     => '\ensuremath{\fallingdotseq}',
    8787     => '\ensuremath{\risingdotseq}',
    8788     => '\ensuremath{:=}',
    8789     => '\ensuremath{=:}',
    8790     => '\ensuremath{\eqcirc}',
    8791     => '\ensuremath{\circeq}',
    'wedgeq' => '\ensuremath{\stackrel{\wedge}{=}}',
    8792     => '\ensuremath{\stackrel{\wedge}{=}}',
    8794     => '\ensuremath{\stackrel{\vee}{=}}',
    8795     => '\ensuremath{\stackrel{\star}{=}}',
    8796     => '\ensuremath{\triangleq}',
    8797     => '\ensuremath{\stackrel{def}{=}}',
    8798     => '\ensuremath{\stackrel{m}{=}}',
    8799     => '\ensuremath{\stackrel{?}{=}}',
    'ne'     => '\ensuremath{\neq}',
    8800     => '\ensuremath{\neq}',
    'equiv'  => '\ensuremath{\equiv}',
    8801     => '\ensuremath{\equiv}',
    8802     => '\ensuremath{\not\equiv}',
    'le'     => '\ensuremath{\leq}',
    8804     => '\ensuremath{\leq}',
    'ge'     => '\ensuremath{\geq}',
    8805     => '\ensuremath{\geq}',
    8806     => '\ensuremath{\leqq}',
    8807     => '\ensuremath{\geqq}',
    8810     => '\ensuremath{\ll}',
    8811     => '\ensuremath{\gg}',
    'twixt'  => '\ensuremath{\between}',
    8812     => '\ensuremath{\between}',
    8813     => '\ensuremath{\not\asymp}',
    8814     => '\ensuremath{\not<}',
    8815     => '\ensuremath{\not>}',
    8816     => '\ensuremath{\not\leqslant}',
    8817     => '\ensuremath{\not\geqslant}',
    8818     => '\ensuremath{\lesssim}',
    8819     => '\ensuremath{\gtrsim}',
    8820     => '\ensuremath{\stackrel{<}{>}}',
    8821     => '\ensuremath{\stackrel{>}{<}}',
    8826     => '\ensuremath{\prec}',
    8827     => '\ensuremath{\succ}',
    8828     => '\ensuremath{\preceq}',
    8829     => '\ensuremath{\succeq}',
    8830     => '\ensuremath{\not\prec}',
    8831     => '\ensuremath{\not\succ}',
    'sub'    => '\ensuremath{\subset}',
    8834     => '\ensuremath{\subset}',
    'sup'    => '\ensuremath{\supset}',
    8835     => '\ensuremath{\supset}',
    'nsub'   => '\ensuremath{\not\subset}',
    8836     => '\ensuremath{\not\subset}',
    8837     => '\ensuremath{\not\supset}',
    'sube'   => '\ensuremath{\subseteq}',
    8838     => '\ensuremath{\subseteq}',
    'supe'   => '\ensuremath{\supseteq}',
    8839     => '\ensuremath{\supseteq}',
    8840     => '\ensuremath{\nsubseteq}',
    8841     => '\ensuremath{\nsupseteq}',
    8842     => '\ensuremath{\subsetneq}',
    8843     => '\ensuremath{\supsetneq}',
    8847     => '\ensuremath{\sqsubset}',
    8848     => '\ensuremath{\sqsupset}',
    8849     => '\ensuremath{\sqsubseteq}',
    8850     => '\ensuremath{\sqsupseteq}',
    8851     => '\ensuremath{\sqcap}',
    8852     => '\ensuremath{\sqcup}',
    'oplus'  => '\ensuremath{\oplus}',
    8853     => '\ensuremath{\oplus}',
    8854     => '\ensuremath{\ominus}',
    'otimes' => '\ensuremath{\otimes}',
    8855     => '\ensuremath{\otimes}',
    8856     => '\ensuremath{\oslash}',
    8857     => '\ensuremath{\odot}',
    8858     => '\ensuremath{\circledcirc}',
    8859     => '\ensuremath{\circledast}',
    8861     => '\ensuremath{\ominus}', # Close enough for government work.
    8862     => '\ensuremath{\boxplus}',
    8863     => '\ensuremath{\boxminus}',
    8864     => '\ensuremath{\boxtimes}',
    8865     => '\ensuremath{\boxdot}',
    'vdash'  => '\ensuremath{\vdash}',
    8866     => '\ensuremath{\vdash}',
    'dashv'  => '\ensuremath{\dashv}',
    8867     => '\ensuremath{\dashv}',
    'perp'   => '\ensuremath{\perp}',
    8869     => '\ensuremath{\perp}',
    8871     => '\ensuremath{\models}',
    8872     => '\ensuremath{\vDash}',    
    8873     => '\ensuremath{\Vdash}',
    8874     => '\ensuremath{\Vvdash}',
    8876     => '\ensuremath{\nvdash}',
    8877     => '\ensuremath{\nvDash}',
    8878     => '\ensuremath{\nVdash}',
    8880     => '\ensuremath{\prec}',
    8881     => '\ensuremath{\succ}',
    8882     => '\ensuremath{\vartriangleleft}',
    8883     => '\ensuremath{\vartriangleright}',
    8884     => '\ensuremath{\trianglelefteq}',
    8885     => '\ensuremath{\trianglerighteq}',
    8891     => '\ensuremath{\veebar}',
    8896     => '\ensuremath{\land}',
    8897     => '\ensuremath{\lor}',
    8898     => '\ensuremath{\cap}',
    8899     => '\ensuremath{\cup}',
    8900     => '\ensuremath{\diamond}',
    'sdot'   => '\ensuremath{\cdot}',
    8901     => '\ensuremath{\cdot}',
    8902     => '\ensuremath{\star}',
    8903     => '\ensuremath{\divideontimes}',
    8904     => '\ensuremath{\bowtie}',
    8905     => '\ensuremath{\ltimes}',
    8906     => '\ensuremath{\rtimes}',
    8907     => '\ensuremath{\leftthreetimes}',
    8908     => '\ensuremath{\rightthreetimes}',
    8909     => '\ensuremath{\simeq}',
    8910     => '\ensuremath{\curlyvee}',
    8911     => '\ensuremath{\curlywedge}',
    8912     => '\ensuremath{\Subset}',
    8913     => '\ensuremath{\Supset}',
    8914     => '\ensuremath{\Cap}',
    8915     => '\ensuremath{\Cup}',
    8916     => '\ensuremath{\pitchfork}',
    8918     => '\ensuremath{\lessdot}',
    8919     => '\ensuremath{\gtrdot}',
    8920     => '\ensuremath{\lll}',
    8921     => '\ensuremath{\ggg}',
    8922     => '\ensuremath{\gtreqless}',
    8923     => '\ensuremath{\lesseqgtr}',
    8924     => '\ensuremath{\eqslantless}',
    8925     => '\ensuremath{\eqslantgtr}',
    8926     => '\ensuremath{\curlyeqprec}',
    8927     => '\ensuremath{\curlyeqsucc}',
    8928     => '\ensuremath{\not\preccurlyeq}',
    8929     => '\ensuremath{\not\succcurlyeq}',
    8930     => '\ensuremath{\not\sqsupseteq}',
    8931     => '\ensuremath{\not\sqsubseteq}',
    8938     => '\ensuremath{\not\vartriangleleft}',
    8939     => '\ensuremath{\not\vartriangleright}',
    8940     => '\ensuremath{\not\trianglelefteq}',
    8941     => '\ensuremath{\not\trianglerighteq}',
    8942     => '\ensuremath{\vdots}',
    8960     => '\ensuremath{\varnothing}',
    'lceil'  => '\ensuremath{\lceil}',
    8968     => '\ensuremath{\lceil}',
    'rceil'  => '\ensuremath{\rceil}',
    8969     => '\ensuremath{\rceil}',
    'lfloor' => '\ensuremath{\lfloor}',
    8970     => '\ensuremath{\lfloor}',
    'rfloor' => '\ensuremath{\rfloor}',
    8971     => '\ensuremath{\rfloor}',
    'lang'   => '\ensuremath{\langle}',
    9001     => '\ensuremath{\langle}',
    'rang'   => '\ensuremath{\rangle}',
    9002     => '\ensuremath{\rangle}',
    'loz'    => '\ensuremath{\lozenge}',
    9674     => '\ensuremath{\lozenge}',
    'spades' => '\ensuremath{\spadesuit}',
    9824     => '\ensuremath{\spadesuit}',
    9825     => '\ensuremath{\heartsuit}',
    9826     => '\ensuremath{\diamondsuit}',
    'clubs'  => '\ensuremath{\clubsuit}',
    9827     => '\ensuremath{\clubsuit}',
    'diams'  => '\ensuremath{\blacklozenge}',
    9830     => '\ensuremath{\blacklozenge}'
    
);

#  There are some named entities that don't have a good
#  latex equivalent, these are converted to utf-8 via this table
#  of entity name -> unicode number.

my  %utf_table = (
    'THORN'  => 222,
    'thorn'  => 254,
    'eth'    => 240,
    'hearts' => 9829
);

# 
#  Convert a numerical entity (that does not exist in our hash)
#  to its UTF-8 equivalent representation.
#  This allows us to support, to some extent, any entity for which
#  dvipdf can find a gylph (given that LaTeX is now UTF-8 clean).
#
# Parameters:
#   unicode  - The unicode for the character.  This is assumed to
#              be a decimal value
# Returns:
#   The UTF-8 equiavalent of the value.
#
sub entity_to_utf8 {
    my ($unicode) = @_;
    my $result =  pack("U", $unicode);
    return $result;
}


#
#  Convert an entity to the corresponding LateX if possible.
#  If not possible, and the entity is numeric,
#  the entity is treated like a Unicode character and converted
#  to UTF-8 which should display as long as dvipdf can find the
#  appropriate glyph.
#
#  The entity is assumed to have already had the 
#  &# ;  or & ; removed
#
# Parameters:
#   entity    - Name of entity to convert.
# Returns:
#  One of the following:
#   - Latex string that produces the entity.
#   - UTF-8 equivalent of a numeric entity for which we don't have a latex string.
#   - ' ' for text entities for which there's no latex equivalent.
#
sub entity_to_latex {
    my ($entity) = @_;

    # Try to look up the entity (text or numeric) in the hash:



    my $latex = $entities{"$entity"};
    if (defined $latex) {
	return $latex;
    }
    # If the text is purely numeric we can do the UTF-8 conversion:
    # Otherwise there are a few textual entities that don't have good latex
    # which can be converted to unicode:
    #
    if ($entity =~ /^\d+$/) {
	return &entity_to_utf8($entity);
    } else {
	my $result = $utf_table{"$entity"};
	if (defined $result) {
	    return &entity_to_utf8($result);
	}
    }
    #  Can't do the conversion`< ...

    return " ";
}

#
#  Convert all the entities in a string.
#  We locate all the entities, pass them into entity_to_latex and 
#  and replace occurences in the input string.
#  The assumption is that there are few entities in any string/document
#  so this looping is not too bad.  The advantage of looping vs. regexping is
#  that we now can use lookup tables for the translation in entity_to_latex above.
#
# Parameters:
#   input   - Input string/document
# Returns
#   input with entities replaced by latexable stuff (UTF-8 encodings or
#   latex control strings to produce the entity.
#
#
sub replace_entities {
    my ($input)  = @_;
    my $start;
    my $end;
    my $entity;
    my $latex;
    
    # First the &#nnn; entities:

    while ($input =~ /(&\#\d+;)/) {
	($start) = @-;
	($end)   = @+;
	$entity  = substr($input, $start+2, $end-$start-3);
	$latex = &entity_to_latex($entity);
	substr($input, $start, $end-$start) = $latex;
    }

    # Hexadecimal entities:

    while ($input =~ /&\#x(\d|[a-f,A-f])+;/) {
	($start) = @-;
	($end)   = @+;
	$entity  = "0" . substr($input, $start+2, $end-$start-3); # 0xhexnumber
	$latex = &entity_to_latex(hex($entity));
	substr($input, $start, $end-$start) = $latex;
    }


    # Now the &text; entites;
    
    while ($input =~/(&\w+;)/) {
	($start) = @-;
	($end)   = @+;
	$entity   = substr($input, $start+1, $end-$start-2);
	$latex    = &entity_to_latex($entity);
	substr($input, $start, $end-$start) = $latex;
	
   }
    return $input;
}

1; 

__END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>