File:  [LON-CAPA] / loncom / interface / entities.pm
Revision 1.7: download - view: text, annotated - select for diffs
Tue Apr 15 10:10:10 2008 UTC (16 years ago) by foxr
Branches: MAIN
CVS tags: HEAD
Correct errors in code logic and table.

# The LearningOnline Network
# entity -> tex.
#
# $Id:
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
# http://www.lon-capa.org/
#
#
package Apache::entities;
use strict;
#
#   This file contains a table driven entity-->latex converter.
#
#  Assumptions:
#   The number of entities in a resource is small compared with the
#   number of possible entities that might be translated.
#   Therefore the strategy is to match a general entity pattern
#   &.+; over and over, pull out the match look it up in an entity -> tex hash
#   and do the replacement.
#
#  In order to simplify the hash, the following reductions are done:
#   &#d+; have the &# and ; stripped and is converted to an int.
#   &#.+; have the &#x and ; stripped and is converted to an int as a hex
#                             value.
#   All others have the & and ; stripped.


#  The hash:  Add new conversions here; leave off the leading & and the trailing ;
#  all numeric entities need only appear as their decimal versions
#  (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
#
#  This entity table is mercilessly cribbed from the  HTML pocket reference
#  table starting at pg 82.  In most cases the LaTeX equivalent codes come from
#  the original massive regular expression replacements originally by 
#  A. Sakharuk in lonprintout.pm
#
#  I also want to acknowledge
#   ISO Character entities and their LaTeX equivalents by 
#      Vidar Bronken Gundersen, and Rune Mathisen
#    http://www.bitjungle.com/isoent-ref.pdf
#

#  Note numerical entities are essentially unicode character codes.
#
package Apache::entities;

my %entities = (

    #  ---- ASCII code page: ----------------

    # Translation to empty strings:

    7        => "",
    9        => "",
    10       => "",
    13       => "",
    
    # Translations to simple characters:

    32       => " ",
    33       => "!",
    34       => '"',
    'quot'   => '"',
    35       => '\\\#',
    36       => '\\\$',
    37       => '\\%',
    38       => '\\&',
    'amp'    => '\\&',
    39       => '\'',		# Apostrophe
    40       => '(',
    41       => ')',
    42       => '\*',
    43       => '\+',
    44       => ',',		#  comma
    45       => '-',
    46       => '\.',
    47       => '\/',
    48       => '0',
    49       => '1',
    50       => '2',
    51       => '3',
    52       => '4',
    53       => '5',
    54       => '6',
    55       => '7',
    56       => '8',
    57       => '9',
    58       => ':',
    59       => ';',
    60       => '\\ensuremath\{<\}',
    'lt'     => '\\ensuremath\{<\}',
    61       => '\\ensuremath\{=\}',
    62       => '\\ensuremath\{>\}',
    'gt'     => '\\ensuremath\{>\}',
    63       => '\?',
    64       => '@',
    65       => 'A',
    66       => 'B',
    67       => 'C',
    68       => 'D',
    69       => 'E',
    70       => 'F',
    71       => 'G',
    72       => 'H',
    73       => 'I',
    74       => 'J',
    75       => 'K',
    76       => 'L',
    77       => 'M',
    78       => 'N',
    79       => 'O',
    80       => 'P',
    81       => 'Q',
    82       => 'R',
    83       => 'S',
    84       => 'T',
    85       => 'U',
    86       => 'V',
    87       => 'W',
    88       => 'X',
    89       => 'Y',
    90       => 'Z',
    91       => '[',
    92       => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing.
    93       => ']',
    94       => '\\ensuremath\{\\wedge\}',
    95       => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _
    96       => '`',
    97       => 'a',
    98       => 'b',
    99       => 'c',
    100      => 'd',
    101      => 'e',
    102      => 'f',
    103      => 'g',
    104      => 'h', 
    105      => 'i',
    106      => 'j',
    107      => 'k',
    108      => 'l',
    109      => 'm',
    110      => 'n',
    111      => 'o',
    112      => 'p',
    113      => 'q',
    114      => 'r',
    115      => 's',
    116      => 't',
    117      => 'u',
    118      => 'v',
    119      => 'w',
    120      => 'x',
    121      => 'y',
    122      => 'z',
    123      => '\\{',
    124      => '\|',
    125      => '\\}',
    126      => '\~',

    #   Controls and Latin-1 supplement.  Note that some entities that have
    #   visible effect are not printing unicode characters.  Specifically
    #   &#130;-&#160;

    130     => ',',
    131     => '\\textflorin ',
    132     => ',,',		# Low double left quotes.
    133     => '\\ensuremath\{\\ldots\}',
    134     => '\\ensuremath\{\\dagger\}',
    135     => '\\ensuremath\{\\ddagger\}',
    136     => '\\ensuremath\{\\wedge\}',
    137     => '\\textperthousand ',
    138     => '\\v\{S\}',
    139     => '\\ensuremath\{<\}',
    140     => '\{\\OE\}',
    
    #  There's a gap here in my entity table

    145     => '\`',
    146     => '\'',
    147     => '\`\`',
    148     => '\'\'',
    149     => '\\ensuremath\{\\bullet\}',
    150     => '--',
    151     => '---',
    152     => '\\ensuremath\{\\sim\}',
    153     => '\\texttrademark',
    154     => '\\v\{s\}',
    155     => '\\ensuremath\{>\}',
    156     => '\\oe ',
    
    # Another short gap:

    159     => '\\"Y',
    160     => '~',
    'nbsp'  => '~',
    161     => '\\textexclamdown ',
    'iexcl' => '\\textexclamdown ',
    162     => '\\textcent ',
    'cent'  => '\\textcent ',
    163     => '\\pounds ',
    'pound' => '\\pounds ',
    164     => '\\textcurrency ',
    'curren' => '\\textcurrency ',
    165     => '\\textyen ',
    'yen'   => '\\textyen ',
    166     => '\\textbrokenbar ',
    'brvbar' => '\\textbrokenbar ',
    167     => '\\textsection ',
    'sect'  => '\\textsection ',
    168     => '\\texthighdieresis ',
    'uml'   => '\\texthighdieresis ',
    169     => '\\copyright ',
    'copy'  => '\\copyright ',
    170     => '\\textordfeminine ',
    'ordf'  => '\\textordfeminine ',
    171     => '\\ensuremath\{\ll\}', # approximation of left angle quote.
    'laquo' => '\\ensuremath\{\ll\}', #   ""
    172     => '\\ensuremath\{\\neg\}',
    'not'   => '\\ensuremath\{\\neg\}',
    173     => ' - ',
    'shy'   => ' - ',
    174     => '\\textregistered ',
    'reg'   => '\\textregistered ',
    175     => '\\ensuremath\{^\{-\}\}',
    'macr'  => '\\ensuremath\{^\{-\}\}',
    176     => '\\ensuremath\{^\{\\circ\}\}',
    'deg'   => '\\ensuremath\{^\{\\circ\}\}',
    177     => '\\ensuremath\{\\pm\}',
    'plusmn' => '\\ensuremath\{\\pm\}',
    178     => '\\ensuremath\{^2\}',
    'sup2'  => '\\ensuremath\{^2\}',
    179     => '\\ensuremath\{^3\}',
    'sup3'  => '\\ensuremath\{^3\}',
    180     => '\\textacute ',
    'acute' => '\\textacute ',
    181     => '\\ensuremath\{\\mu\}',
    'micro' => '\\ensuremath\{\\mu\}',
    182     => '\\P ',
    para    => '\\P ',
    183     => '\\ensuremath\{\\cdot\}',
    'middot' => '\\ensuremath\{\\cdot\}',
    184     => '\\c\{\\strut\}',
    'cedil' => '\\c\{\\strut\}',
    185     => '\\ensuremath\{^1\}',
    sup1    => '\\ensuremath\{^1\}',
    186     => '\\textordmasculine ',
    'ordm'  => '\\textordmasculine ',
    187     => '\\ensuremath\{\\gg\}',
    'raquo' => '\\ensuremath\{\\gg\}',
    188     => '\\textonequarter ',
    'frac14' => '\\textonequarter ',
    189     => '\\textonehalf' ,
    'frac12' => '\\textonehalf' ,
    190     => '\\textthreequarters ',
    'frac34' => '\\textthreequarters ',
    191     =>  '\\textquestiondown ',
    'iquest' => '\\textquestiondown ',
    192     => '\\\`\{A\}',
    'Agrave' => '\\\`\{A\}',
    193     => '\\\'\{A\}',
    'Aacute' => '\\\'\{A\}',
    194     => '\\^\{A\}',
    'Acirc' => '\\^\{A\}',
    195     => '\\~{A}',
    'Atilde'=> '\\~{A}',
    196     => '\\\"{A}',
    'Auml'  => '\\\"{A}',
    197     => '{\\AA}',
    'Aring' => '{\\AA}',
    198     => '{\\AE}',
    'AElig' => '{\\AE}',
    199     => '\\c{c}',
    'Ccedil'=> '\\c{c}',
    '200'   => '\\\`{E}',
    'Egrave'=> '\\\`{E}',
    201     => '\\\'{E}',
    'Eacute'=> '\\\'{E}',
    202     => '\\\^{E}',
    'Ecirc' => '\\\^{E}',
    203     => '\\\"{E}',
    'Euml'  => '\\\"{E}',
    204     => '\\\`{I}',
    'Igrave'=> '\\\`{I}',
    205     => '\\\'{I}',
    'Iacute'=> '\\\'{I}',
    206     => '\\\^{I}',
    'Icirc' => '\\\^{I}',
    207     => '\\\"{I}',
    'Iuml'  => '\\\"{I}',
    208     => '\\OE',
    'ETH'   => '\\OE',
    209     => '\\~{N}',
    'Ntilde'=> '\\~{N}',
    210     => '\\\`{O}',
    'Ograve'=> '\\\`{O}',
    211     => '\\\'{O}',
    'Oacute'=> '\\\'{O}',
    212     => '\\\^{O}',
    'Ocirc' => '\\\^{O}',
    213     => '\\~{O}',
    'Otilde'=> '\\~{O}',
    214     => '\\\"{O}',
    'Ouml'  => '\\\"{O}',
    215     => '\\ensuremath\{\\times\}',
    'times' => '\\ensuremath\{\\times\}',
    216     => '\\O',
    'Oslash'=> '\\O',
    217     => '\\\`{U}',
    'Ugrave'=> '\\\`{U}',
    218     => '\\\'{U}',
    'Uacute'=> '\\\'{U}',
    219     => '\\\^{U}',
    'Ucirc' => '\\\^{U}',
    220     => '\\\"{U}',
    'Uuml'  => '\\\"{U}',
    221     => '\\\'{Y}',
    'Yacute'=> '\\\'{Y}',
    222     => '\\TH',
    'THORN' => '\\TH',
    223     => '{\\sz}',
    'szlig' => '{\\sz}',
    224     => '\\\`{a}',
    'agrave'=> '\\\`{a}',
    225     => '\\\'{a}',
    'aacute'=> '\\\'{a}',
    226     => '\\\^{a}',
    'acirc' => '\\\^{a}',
    227     => '\\\~{a}',
    'atilde'=> '\\\~{a}',
    228     => '\\\"{a}',
    'auml'  => '\\\"{a}',
    229     => '\\aa',
    'aring' => '\\aa',
    230     => '\\ae',
    'aelig' => '\\ae',
    231     => '\\c{c}',
    'ccedil'=> '\\c{c}',
    232     => '\\\`{e}',
    'egrave'=> '\\\`{e}',
    233     => '\\\'{e}',
    'eacute'=> '\\\'{e}',
    234     => '\\\^{e}',
    'ecirc' => '\\\^{e}',
    235     => '\\\"{e}',
    'euml'  => '\\\"{e}',
    236     => '\\\`{i}',
    'igrave'=> '\\\`{i}',
    237     => '\\\'{i}',
    'iacute'=> '\\\'{i}',
    238     => '\\\^{i}',
    'icirc' => '\\\^{i}',
    239     => '\\\"{i}',
    'iuml'  => '\\\"{i}',
    240     => '\\dh',
    'eth'   => '\\dh',
    241     => '\\\~{n}',
    'ntilde'=> '\\\~{n}',
    242     => '\\\`{o}',
    'ograve'=> '\\\`{o}',
    243     => '\\\'{o}',
    'oacute'=> '\\\'{o}',
    244     => '\\\^{o}',
    'ocirc' => '\\\^{o}',
    245     => '\\\~{o}',
    'otilde'=> '\\\~{o}',
    246     => '\\\"{o}',
    'ouml'  => '\\\"{o}',
    247     => '\\ensuremath\{\\div\}',
    'divide'=> '\\ensuremath\{\\div\}',
    248     => '{\\o}',
    'oslash'=> '{\\o}',
    249     => '\\\`{u}',
    'ugrave'=> '\\\`{u}',
    250     => '\\\'{u}',
    'uacute'=> '\\\'{u}',
    251     => '\\\^{u}',
    'ucirc' => '\\\^{u}',
    252     => '\\\"{u}',
    'uuml'  => '\\\"{u}',
    253     => '\\\'{y}',
    'yacute'=> '\\\'{y}',
    254     => '\\th',
    'thorn' => '\\th',
    255     => '\\\"{y}',
    'yuml'  => '\\\"{y}',

    # hbar entity number comes from the unicode charater:
    # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
    # ISO also documents a 'planck' entity.

    295     => '\\ensuremath\{\hbar\}',
    'plank' => '\\ensuremath\{\hbar\}',

    # Latin extended-A HTML 4.01 entities:

    338      => '\\OE',
    'OElig'  => '\\OE',
    339      => '\\oe',
    'oelig'  => '\\oe',
    352      => '\\v{S}',
    'Scaron' => '\\v{S}',
    353      => '\\v{s}',
    'scaron' => '\\v{s}',
    376      => '\\\"{Y}',
    'Yuml'   => '\\\"{Y}', 


    # Latin extended B HTML 4.01 entities

    402      => '\\ensuremath{f}',
    'fnof'   => '\\ensuremath{f}',

    # Spacing modifier letters:
    
    710      => '\^{}',
    'circ'   => '\^{}',
    732      => '\~{}',
    'tilde'  => '\~{}',

    # Greek uppercase:

    913      => '\\ensuremath\{\\mathrm\{A\}\}',
    'Alpha'  => '\\ensuremath\{\\mathrm\{A\}\}',
    914      => '\\ensuremath\{\\mathrm\{B\}\}',
    'Beta'   => '\\ensuremath\{\\mathrm\{B\}\}',
    915      => '\\ensuremath\{\\Gamma\}',
    'Gamma'  => '\\ensuremath\{\\Gamma\}',
    916      => '\\ensuremath\{\\Delta\}',
    'Delta'  => '\\ensuremath\{\\Delta\}',
    917      => '\\ensuremath\{\\mathrm\{E\}\}',
    'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}',
    918      => '\\ensuremath\{\\mathrm\{Z\}\}',
    'Zeta'   => '\\ensuremath\{\\mathrm\{Z\}\}',
    919      => '\\ensuremath\{\\mathrm\{H\}\}',
    'Eta'    => '\\ensuremath\{\\mathrm\{H\}\}',
    920      => '\\ensuremath\{\\Theta\}',
    'Theta'  => '\\ensuremath\{\\Theta\}',
    921      => '\\ensuremath\{\\mathrm\{I\}\}',
    'Iota'   => '\\ensuremath\{\\mathrm\{I\}\}',
    922      => '\\ensuremath\{\\mathrm\{K\}\}',
    'Kappa'  => '\\ensuremath\{\\mathrm\{K\}\}',
    923      => '\\ensuremath\{\\Lambda\}',
    'Lambda' => '\\ensuremath\{\\Lambda\}',
    924      => '\\ensuremath\{\\mathrm\{M\}\}',
    'Mu'     => '\\ensuremath\{\\mathrm\{M\}\}',
    925      => '\\ensuremath\{\\mathrm\{N\}\}',
    'Nu'     => '\\ensuremath\{\\mathrm\{N\}\}',
    926      => '\\ensuremath\{\\mathrm\{\\Xi\}',
    'Xi'     => '\\ensuremath\{\\mathrm\{\\Xi\}',
    927      => '\\ensuremath\{\\mathrm\{O\}\}',
    'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}',
    928      => '\\ensuremath\{\\Pi\}',
    'Pi'     => '\\ensuremath\{\\Pi\}',
    929      => '\\ensuremath\{\\mathrm\{P\}\}',
    'Rho'    => '\\ensuremath\{\\mathrm\{P\}\}',
   
    # Skips 930

    931      => '\\ensuremath\{\Sigma\}',
    'Sigma'  => '\\ensuremath\{\Sigma\}',
    932      => '\\ensuremath\{\\mathrm\{T\}\}',
    'Tau'    => '\\ensuremath\{\\mathrm\{T\}\}',
    933      => '\\ensuremath\{\\Upsilon\}',
    'Upsilon'=> '\\ensuremath\{\\Upsilon\}',
    934      => '\\ensuremath\{\\Phi\}',
    'Phi'    => '\\ensuremath\{\\Phi\}',
    935      => '\\ensuremath\{\\mathrm\{X\}\}',
    'Chi'    => '\\ensuremath\{\\mathrm\{X\}\}',
    936      => '\\ensuremath\{\\Psi\}',
    'Psi'    => '\\ensuermath\{\\Psi\}',
    937      => '\\ensuremath\{\\Omega\}',
    'Omega'  => '\\ensuremath\{\\Omega\}',


    # Greek lowercase:

    945      => '\\ensuremath\{\\alpha\}',
    'alpha'  => '\\ensuremath\{\\alpha\}',
    946      => '\\ensuremath\{\\beta\}',
    'beta'   => '\\ensuremath\{\\beta\}',
    947      => '\\ensuremath\{\\gamma\}',
    'gamma'  => '\\ensuremath\{\\gamma\}',
    948      => '\\ensuremath\{\\delta\}',
    'delta'  => '\\ensuremath\{\\delta\}',
    949      => '\\ensuremath\{\\epsilon\}',
    'epsilon'=> '\\ensuremath\{\\epsilon\}',
    950      => '\\ensuremath\{\\zeta\}',
    'zeta'   => '\\ensuremath\{\\zeta\}',
    951      => '\\ensuremath\{\\eta\}',
    'eta'    => '\\ensuremath\{\\eta\}',
    952      => '\\ensuremath\{\\theta\}',
    'theta'  => '\\ensuremath\{\\theta\}',
    953      => '\\ensuremath\{\\iota\}',
    'iota'   => '\\ensuremath\{\\iota\}',
    954      => '\\ensuremath\{\\kappa\}',
    'kappa'  => '\\ensuremath\{\\kappa\}',
    955      => '\\ensuremath\{\\lambda\}',
    'lambda' => '\\ensuremath\{\\lambda\}',
    956      => '\\ensuremath\{\\mu\}',
    'mu'     => '\\ensuremath\{\\mu\}',
    957      => '\\ensuremath\{\\nu\}',
    'nu'     => '\\ensuremath\{\\nu\}',
    958      => '\\ensuremath\{\\xi\}',
    'xi'     => '\\ensuremath\{\\xi\}',
    959      => '\\ensuremath\{o\}',
    'omicron'=> '\\ensuremath\{o\}',
    960      => '\\ensuremath\{\\pi\}',
    'pi'     => '\\ensuremath\{\\pi\}',
    961      => '\\ensuremath\{\\rho\}',
    'rho'    => '\\ensuremath\{\\rho\}',
    962      => '\\ensuremath\{\\varsigma\}',
    'sigmaf' => '\\ensuremath\{\\varsigma\}',
    963      => '\\ensuremath\{\\sigma\}',
    'sigma'  => '\\ensuremath\{\\sigma\}',
    964      => '\\ensuremath\{\\tau\}',
    'tau'    => '\\ensuremath\{\\tau\}',
    965      => '\\ensuremath\{\\upsilon\}',
    'upsilon'=> '\\ensuremath\{\\upsilon\}',
    966      => '\\ensuremath\{\\phi\}',
    'phi'    => '\\ensuremath\{\\phi\}',
    967      => '\\ensuremath\{\\chi\}',
    'chi'    => '\\ensuremath\{\\chi\}',
    968      => '\\ensuremath\{\\psi\}',
    'psi'    => '\\ensuremath\{\\psi\}',
    969      => '\\ensuremath\{\\omega\}',
    'omega'  => '\\ensuremath\{\\omega\}',
    977      => '\\ensuremath\{\\vartheta\}',
    'thetasym'=>'\\ensuremath\{\\vartheta\}',
    978      => '\\ensuremath\{\\varUpsilon\}',
    'upsih'  => '\\ensuremath\{\\varUpsilon\}',
    982      => '\\ensuremath\{\\varpi\}',
    'piv'    => '\\ensuremath\{\\varpi\}',

    
    # The general punctuation set:

    8194,    => '\\hspace{.5em}',
    'enspc'  => '\\hspace{.5em}',
    8195     => '\\hspace{1.0em}',
    'emspc'  => '\\hspace{1.0em}',
    8201     => '\\hspace{0.167em}',
    'thinsp' => '\\hspace{0.167em}',
    8204     => '\{\}',
    'zwnj'   => '\{\}',
    8205     => '',
    'zwj'    => '',
    8206     => '',
    'lrm'    => '',
    8207     => '',
    'rlm'    => '',
    8211     => '--',
    'ndash'  => '--',
    8212     => '---',
    'mdash'  => '---',
    8216     => '`',
    'lsquo'  => '`',
    8217     => "'",
    'rsquo'  => "'",
    8218     => '\\quotesinglebase',
    'sbquo'  => '\\quotesinglebase',
    8220     => '``',
    'ldquo'  => '``',
    8221     => "''",
    'rdquo'  => "''",
    8222     => '\\quotedblbase',
    'bdquo'  => '\\quotedblbase',
    8224     => '\\dagger',
    'dagger' => '\\dagger',
    '8225'   => '\\ddag',
    'Dagger' => '\\ddag',
    8226     => '\\textbullet',
    'bull'   => '\\textbullet',
    8230     => '\\textellipsis',
    'hellep' => '\\textellipsis',
    8240     => '\\textperthousand',
    permil   => '\\textperthousand',
    8242     => '\\textquotesingle',
    'prime'  => '\\textquotesingle',
    8243     => '\\textquotedbl',
    'Prime'  => '\\textquotedbl',
    8249     => '\\guilsingleleft',
    'lsaquo' => '\\guilsingleleft',
    8250     => '\\guilsingleright',
    'rsaquo' => '\\guilsingleright',
    8254     => '\\textasciimacron',
    oline    => '\\textasciimacron',
    8260     => '\\textfractionsolidus',
    'frasl'  => '\\textfractionsolidus',
    8364     => '\\texteuro',
    'euro'   => '\\texteuro',

    # Letter like symbols

    
    8472     => '\\ensuremath\{\\wp\}',
    'weierp' => '\\ensuremath\{\\wp\}',
    8465     => '\\ensuremath\{\\Im\}',
    'image'  => '\\ensuremath\{\\Im\}',
    8476     => '\\ensuremath{\\Re\}',
    'real'   => '\\ensuremath{\\Re\}',
    8482     => '\\texttrademark',
    'trade'  => '\\texttrademark',
    8501     => '\\ensuremath{\\aleph\}',
    'alefsym'=> '\\ensuremath{\\aleph\}',

    # Arrows and then some (harpoons from Hon Kie).

    8592     => '\\textleftarrow',
    'larr'   => '\\textleftarrow',
    8593     => '\\textuparrow',
    'uarr'   => '\\textuparrow',
    8594     => '\\textrightarrow',
    'rarr'   => '\\textrightarrow',
    8595     => '\\textdownarrow',
    'darr'   => '\\textdownarrow',
    8596     => '\\ensuremath\{\\leftrightarrow\}',
    'harr'   => '\\ensuremath\{\\leftrightarrow\}',
    8598     => '\\ensuremath\{\\nwarrow\}',
    8599     => '\\ensuremath\{\\nearrow\}',
    8600     => '\\ensuremath\{\\searrow\}',
    8601     => '\\ensuremath\{\\swarrow\}',
    8605     => '\\ensuremath\{\\leadsto\}',
    8614     => '\\ensuremath\{\\mapsto\}',
    8617     => '\\ensuremath\{\\hookleftarrow\}',
    8618     => '\\ensuremath\{\\hookrightarrow\}',
    8629     => '\\ensuremath\{\\hookleftarrow\}', # not an exact match but best I know.
    'crarr'  => '\\ensuremath\{\\hookleftarrow\}', # not an exact match but best I know.
    8636     => '\\ensuremath\{\\leftharpoonup\}',
    8637     => '\\ensuremath\{\\leftharpoondown\}',
    8640     => '\\ensuremath\{\\rightharpoonup\}',
    8641     => '\\ensuremath\{\\rightharpoondown\}',
    8652     => '\\ensuremath\{\\rightleftharpoons\}',
    8656     => '\\ensuremath\{\\Leftarrow\}',
    'lArr'   => '\\ensuremath\{\\Leftarrow\}',
    8657     => '\\ensuremath\{\\Uparrow\}',
    'uArr'   => '\\ensuremath\{\\Uparrow\}',
    8658     => '\\ensuremath\{\\Rightarrow\}',
    'rArr'   => '\\ensuremath\{\\Rightarrow\}',
    8659     => '\\ensuremath\{\\Downarrow\}',
    'dArr'   => '\\ensuremath\{\\Downarrow\}',
    8660     => '\\ensuremath\{\\Leftrightarrow\}',
    'vArr'   => '\\ensuremath\{\\Updownarrow\}',
    8661     => '\\ensuremath\{\\Updownarrow\}',
    'lAarr'   => '\\ensuremath\{\\Lleftarrow\}',
    8666     => '\\ensuremath\{\\Lleftarrow\}',
    'rAarr'  => '\\ensuremath\{\\Rrightarrow\}',
    8667     => '\\ensuremath\{\\Rrightarrow\}',
    'rarrw'  => '\\ensuremath\{\\rightsquigarrow\}',
    8669     => '\\ensuremath\{\\rightsquigarrow\}',
    

    # Mathematical operators.
	
    
    'forall' => '\\ensuremath\{\\forall\}',
    8704     => '\\ensuremath\{\\forall\}',
    'comp'   => '\\ensuremath\{\\complement\}',
    8705     => '\\ensuremath\{\\complement\}',
    'part'   => '\\ensuremath\{\\partial\}',
    8706     => '\\ensuremath\{\\partial\}',
    'exist'  => '\\ensuremath\{\\exists\}',
    8707     => '\\ensuremath\{\\exists\}',
    'nexist' => '\\ensuremath\{\\nexists\}',
    8708     => '\\ensuremath\{\\nexists\}',
    'empty'  => '\\ensuremath\{\\emptysset\}',
    8709     => '\\ensuremath\{\\emptysset\}',
    8710     => '\\ensuremath\{\\Delta\}',
    'nabla'  => '\\ensuremath\{\\nabla\}',
    8711     => '\\ensuremath\{\\nabla\}',
    'isin'   => '\\ensuremath\{\\in\}',
    8712     => '\\ensuremath\{\\in\}',
    'notin'  => '\\ensuremath\{\\notin\}',
    8713     => '\\ensuremath\{\\notin\}',
    ni       => '\\ensuremath\{\\ni\}',
    8715     => '\\ensuremath\{\\ni\}',
    8716     => '\\ensuremath\{\\not\\ni\}',
    'prod'   => '\\ensuremath\{\\prod\}',
    8719     => '\\ensuremath\{\\prod\}',
    8720     => '\\ensuremath\{\\coprod\}',
    'sum'    => '\\ensuremath\{\\sum\}',
    8721     => '\\ensuremath\{\\sum\}',
    'minus'  => '\\ensuremath\{-\}',
    8722     => '\\ensuremath\{-\}',
    8723     => '\\ensuremath\{\\mp\}',
    8724     => '\\ensuremath\{\\dotplus\}',
    8725     => '\\ensuremath\{\\diagup\}',
    8726     => '\\ensuremath\{\\smallsetminus\}',
    'lowast' => '\\ensuremath\{*\}',
    8727     => '\\ensuremath\{*\}',
    8728     => '\\ensuremath\{\\circ\}',
    8729     => '\\ensuremath\{\\bullet\}',
    'radic'  => '\\ensuremath\{\\surd\}',
    8730     => '\\ensuremath\{\\surd\}',
    8731     => '\\ensuremath\{\\sqrt[3]\{\}\}',
    8732     => '\\ensuremath\{\\sqrt[4]\{\}\}',
    'prop'   => '\\ensuremath\{\\propto\}',
    8733     => '\\ensuremath\{\\propto\}',
    'infin'  => '\\ensuremath\{\\infty\}',
    8734     => '\\ensuremath\{\\infty\}',
    'ang90'  => '\\ensuremath\{\\sqangle\}',
    8735     => '\\ensuremath\{\\sqangle\}',
    'ang'    => '\\ensuremath\{\\angle\}',
    8736     => '\\ensuremath\{\\angle\}',
    'angmsd' => '\\ensuremath\{\\measuredangle\}',
    8737     => '\\ensuremath\{\\measuredangle\}',
    'angsph' => '\\ensuremath\{\\sphiericalangle\}',
    8738     => '\\ensuremath\{\\sphiericalangle\}',
    8739     => '\\ensuremath\{\\vert\}',
    8740     => '\\ensuremath\{\\Vert\}',
    'and'    => '\\ensuremath\{\\land\}',
    8743     => '\\ensuremath\{\\land\}',
    'or'     => '\\ensuremath\{\\lor\}',
    8744     => '\\ensuremath\{\\lor\}',
    'cap'    => '\\ensuremath\{\\cap\}',
    8745     => '\\ensuremath\{\\cap\}',
    'cup'    => '\\ensuremath\{\\cup\}',
    8746     => '\\ensuremath\{\\cup\}',
    'int'    => '\\ensuremath\{\\int\}',
    8747     => '\\ensuremath\{\\int\}',
    'conint' => '\\ensuremath\{\\oint\}',
    8750     => '\\ensuremath\{\\oint\}',
    'there4' => '\\ensuremath\{\\therefore\}',
    8756     => '\\ensuremath\{\\therefore\}',
    'becaus' => '\\ensuremath\{\\because\}',
    8757     => '\\ensuremath\{\\because\}',
    8758     => '\\ensuremath\{:\}',
    8759     => '\\ensuremath\{::\}',
    'sim'    => '\\ensuremath\{\\sim\}',
    8764     => '\\ensuremath\{\\sim\}',
    8765     => '\\ensuremath\{\\backsim\}',
    'wreath' => '\\ensuremath\{\\wr\}',
    8768     => '\\ensuremath\{\\wr\}',
    'nsim'   => '\\ensuremath\{\\not\sim\}',
    8769     => '\\ensuremath\{\\not\sim\}',
#    'asymp'  => '\\ensuremath\{\\asymp\}',  &asymp; is actually a different glyph.
    8771     => '\\ensuremath\{\\asymp\}',
    8772     => '\\ensuremath\{\\not\\asymp\}',
    'cong'   => '\\ensuremath\{\\cong\}',
    8773     => '\\ensuremath\{\\cong\}',
    8775     => '\\ensuremath\{\\ncong\}',
    8778     => '\\ensuremath\{\\approxeq\}',
    8784     => '\\ensuremath\{\\doteq\}',
    8785     => '\\ensuremath\{\\doteqdot\}',
    8786     => '\\ensuremath\{\\fallingdotseq\}',
    8787     => '\\ensuremath\{\\risingdotseq\}',
    8788     => '\\ensuremath\{:=\}',
    8789     => '\\ensuremath\{=:\}',
    8790     => '\\ensuremath\{\\eqcirc\}',
    8791     => '\\ensuremath\{\\circeq\}',
    'wedgeq' => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}',
    8792     => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}',
    8794     => '\\ensuremath\{\\stackrel\{\\vee\}\{=\}\}',
    8795     => '\\ensuremath\{\\stackrel\{\\star}\{=\}\}',
    8796     => '\\ensuremath\{\\triangleeq\}',
    8797     => '\\ensuremath\{\\stackrel\{def\}\{=\}\}',
    8798     => '\\ensuremath\{\\stackrel\{m\}\{=\}\}',
    8799     => '\\ensuremath\{\\stackrel\{?\}\{=\}\}',
    'ne'     => '\\ensuremath\{\\neq\}',
    8800     => '\\ensuremath\{\\neq\}',
    'equiv'  => '\\ensuremath\{\\equiv\}',
    8801     => '\\ensuremath\{\\equiv\}',
    8802     => '\\ensuremath\{\\not\\equiv\}',
    'le'     => '\\ensuremath\{\\leq\}',
    8804     => '\\ensuremath\{\\leq\}',
    'ge'     => '\\ensuremath\{\\geq\}',
    8805     => '\\ensuremath\{\\geq\}',
    8806     => '\\ensuremath\{\\leqq\}',
    8807     => '\\ensuremath\{\\geqq\}',
    8810     => '\\ensuremath\{\\ll\}',
    8811     => '\\ensuremath\{\\gg\}',
    'twixt'  => '\\ensuremath\{\\between\}',
    8812     => '\\ensuremath\{\\between\}',
    8813     => '\\ensuremath\{\\not\\asymp\}',
    8814     => '\\ensuremath\{\\not<\}',
    8815     => '\\ensuremath\{\\not>\}',
    8816     => '\\ensuremath\{\\not\\leqslant\}',
    8817     => '\\ensuremath\{\\not\\geqslant\}',
    8818     => '\\ensuremath\{\\lessim\}',
    8819     => '\\ensuremath\{\\gtrsim\}',
    8820     => '\\ensuremath\{\\stackrel\{<\}\{>\}\}',
    8821     => '\\ensuremath\{\\stackrel\{>\}\{<\}\}',
    8826     => '\\ensuremath\{\\prec\}',
    8827     => '\\ensuremath\{\\succ\}',
    8828     => '\\ensuremath\{\\preceq\}',
    8829     => '\\ensuremath\{\\succeq\}',
    8830     => '\\ensuremath\{\\not\\prec\}',
    8831     => '\\ensuremath\{\\not\\succ\}',
    'sub'    => '\\ensuremath\{\\subset\}',
    8834     => '\\ensuremath\{\\subset\}',
    'sup'    => '\\ensuremath\{\\supset\}',
    8835     => '\\ensuremath\{\\supset\}',
    'nsub'   => '\\ensuremath\{\\not\\subset\}',
    8836     => '\\ensuremath\{\\not\\subset\}',
    8837     => '\\ensuremath\{\\not\\supset\}',
    'sube'   => '\\ensuremath\{\\subseteq\}',
    8838     => '\\ensuremath\{\\subseteq\}',
    'supe'   => '\\ensuermath\{\\supseteq\}',
    8839     => '\\ensuermath\{\\supseteq\}',
    8840     => '\\ensuremath\{\\nsubseteq\}',
    8841     => '\\ensuremath\{\\nsupseteq\}',
    8842     => '\\ensuremath\{\\subsetneq\}',
    8843     => '\\ensuremath\{\\supsetneq\}',
    8847     => '\\ensuremath\{\\sqsubset\}',
    8848     => '\\ensuremath\{\\sqsupset\}',
    8849     => '\\ensuremath\{\\sqsubseteq\}',
    8850     => '\\ensuremath\{\\sqsupseteq\}',
    8851     => '\\ensuremath\{\\sqcap\}',
    8852     => '\\ensuremath\{\\sqcup\}',
    'oplus'  => '\\ensuremath\{\\oplus\}',
    8853     => '\\ensuremath\{\\oplus\}',
    8854     => '\\ensuremath\{\\ominus\}',
    'otimes' => '\\ensuremath\{\\otimes\}',
    8855     => '\\ensuremath\{\\otimes\}',
    8856     => '\\ensuremath\{\\oslash\}',
    8857     => '\\ensuremath\{\\odot\}',
    8858     => '\\ensuremath\{\\circledcirc\}',
    8859     => '\\ensuremath\{\\circledast\}',
    8861     => '\\ensuremath\{\\ominus\}', # Close enough for government work.
    8862     => '\\ensuremath\{\\boxplus\}',
    8863     => '\\ensuremath\{\\boxminus\}',
    8864     => '\\ensuremath\{\\boxtimes\}',
    8865     => '\\ensuremath\{\\boxdot\}',
    'vdash'  => '\\ensuremath\{\\vdash\}',
    8866     => '\\ensuremath\{\\vdash\}',
    'dashv'  => '\\ensuremath\{\\dashv\}',
    8867     => '\\ensuremath\{\\dashv\}',
    'perp'   => '\\ensuremath\{\\perp\}',
    8869     => '\\ensuremath\{\\perp\}',
    8871     => '\\ensuremath\{\\models\}',
    8872     => '\\ensuremath\{\\vDash\}',    
    8873     => '\\ensuremath\{\\Vdash\}',
    8874     => '\\ensuremath\{\\Vvdash\}',
    8876     => '\\ensuremath\{\\nvdash\}',
    8877     => '\\ensuremath\{\\nvDash\}',
    8878     => '\\ensuremath\{\\nVdash\}',
    8880     => '\\ensuremath\{\\prec\}',
    8881     => '\\ensuremath\{\\succ\}',
    8882     => '\\ensuremath\{\\vartriangleleft\}',
    8883     => '\\ensuremath\{\\vartriangleright\}',
    8884     => '\\ensuremath\{\\trianglelefteq\}',
    8885     => '\\ensuremath\{\\trianglerighteq\}',
    8891     => '\\ensuremath\{\\veebar\}',
    8896     => '\\ensuremath\{\\land\}',
    8897     => '\\ensuremath\{\\lor\}',
    8898     => '\\ensuremath\{\\cap\}',
    8899     => '\\ensuremath\{\\cup\}',
    8900     => '\\ensuremath\{\\diamond\}',
    'sdot'   => '\\ensuremath\{\\cdot\}',
    8901     => '\\ensuremath\{\\cdot\}',
    8902     => '\\ensuremath\{\\star\}',
    8903     => '\\ensuremath\{\\divideontimes\}',
    8904     => '\\ensuremath\{\\bowtie\}',
    8905     => '\\ensuremath\{\\ltimes\}',
    8906     => '\\ensuremath\{\\rtimes\}',
    8907     => '\\ensuremath\{\\leftthreetimes\}',
    8908     => '\\ensuremath\{\\rightthreetimes\}',
    8909     => '\\ensuremath\{\\simeq\}',
    8910     => '\\ensuremath\{\\curlyvee\}',
    8911     => '\\ensuremath\{\\curlywedge\}',
    8912     => '\\ensuremath\{\\Subset\}',
    8913     => '\\ensuremath\{\\Supset\}',
    8914     => '\\ensuremath\{\\Cap\}',
    8915     => '\\ensuremath\{\\Cup\}',
    8916     => '\\ensuremath\{\\pitchfork\}',
    8918     => '\\ensuremath\{\\lessdot\}',
    8919     => '\\ensuremath\{\\gtrdot\}',
    8920     => '\\ensuremath\{\\lll\}',
    8921     => '\\ensuremath\{\\ggg\}',
    8922     => '\\ensuremath\{\\gtreqless\}',
    8923     => '\\ensuremath\{\\lesseqgtr\}',
    8924     => '\\ensuremath\{\\eqslantless\}',
    8925     => '\\ensuremath\{\\eqslantgtr\}',
    8926     => '\\ensuremath\{\\curlyeqprec\}',
    8927     => '\\ensuremath\{\\curlyeqsucc\}',
    8928     => '\\ensuremath\{\\not\\preccurlyeq\}',
    8929     => '\\ensuremath\{\\not\\succurlyeq\}',
    8930     => '\\ensuremath\{\\not\\sqsupseteq\}',
    8931     => '\\ensuremath\{\\not\\sqsubseteq\}',
    8938     => '\\ensuremath\{\\not\\vartriangleleft\}',
    8939     => '\\ensuremath\{\\not\vartriangleright\}',
    8940     => '\\ensuremath\{\\not\trianglelefteq\}',
    8941     => '\\ensuremath\{\\not\trianglerighteq\}',
    8942     => '\\ensuremath\{\\vdots\}',
    8960     => '\\ensuremath\{\\varnothing\}',
    'lceil'  => '\\ensuremath\{\\lceil\}',
    8968     => '\\ensuremath\{\\lceil\}',
    'rceil'  => '\\ensuremath\{\\rceil\}',
    8969     => '\\ensuremath\{\\rceil\}',
    'lfloor' => '\\ensuremath\{\\lfloor\}',
    8970     => '\\ensuremath\{\\lfloor\}',
    'rfloor' => '\\ensuremath\{\\rfloor}',
    8971     => '\\ensuremath\{\\rfloor}',
    'lang'   => '\\ensuremath\{\\langle\}',
    9001     => '\\ensuremath\{\\langle\}',
    'rang'   => '\\ensuremath\{\\rangle\}',
    9002     => '\\ensuremath\{\\rangle\}',
    'loz'    => '\\ensuremath\{\\lozenge\}',
    9674     => '\\ensuremath\{\\lozenge\}',
    'spades' => '\\ensuremath\{\\spadesuit\}',
    9824     => '\\ensuremath\{\\spadesuit\}',
    9825     => '\\ensuremath\{\\heartsuit\}',
    9826     => '\\ensuremath\{\\diamondsuit\}',
    'clubs'  => '\\ensuremath\{\\clubsuit\}',
    9827     => '\\ensuremath\{\\clubsuit\}',
    'diams'  => '\\ensuremath\{\\blacklozenge\}',
    9830     => '\\ensuremath\{\\blacklozenge\}'
    
);

# 
#  Convert a numerical entity (that does not exist in our hash)
#  to its UTF-8 equivalent representation.
#  This allows us to support, to some extent, any entity for which
#  dvipdf can find a gylph (given that LaTeX is now UTF-8 clean).
#
# Parameters:
#   unicode  - The unicode for the character.  This is assumed to
#              be a decimal value
# Returns:
#   The UTF-8 equiavalent of the value.
#
sub entity_to_utf8 {
    my ($unicode) = @_;

    return pack("U", $unicode);
}


#
#  Convert an entity to the corresponding LateX if possible.
#  If not possible, and the entity is numeric,
#  the entity is treated like a Unicode character and converted
#  to UTF-8 which should display as long as dvipdf can find the
#  appropriate glyph.
#
#  The entity is assumed to have already had the 
#  &# ;  or & ; removed
#
# Parameters:
#   entity    - Name of entity to convert.
# Returns:
#  One of the following:
#   - Latex string that produces the entity.
#   - UTF-8 equivalent of a numeric entity for which we don't have a latex string.
#   - ' ' for text entities for which there's no latex equivalent.
#
sub entity_to_latex {
    my ($entity) = @_;

    # Try to look up the entity (text or numeric) in the hash:


    my $latex = $entities{"$entity"};
    if (defined $latex) {
	return $latex;
    }
    # If the text is purely numeric we can do the UTF-8 conversion:

    if ($entity =~ /^\d$/) {
	return &entity_to_utf8($entity);
    }
    #  Can't do the conversion`< ...

    return " ";
}

#
#  Convert all the entities in a string.
#  We locate all the entities, pass them into entity_to_latex and 
#  and replace occurences in the input string.
#  The assumption is that there are few entities in any string/document
#  so this looping is not too bad.  The advantage of looping vs. regexping is
#  that we now can use lookup tables for the translation in entity_to_latex above.
#
# Parameters:
#   input   - Input string/document
# Returns
#   input with entities replaced by latexable stuff (UTF-8 encodings or
#   latex control strings to produce the entity.
#
#
sub replace_entities {
    my ($input)  = @_;
    my $start;
    my $end;
    my $entity;
    my $latex;
    
    # First the &#nnn; entities:

    while ($input =~ /(&\#\d+;)/) {
	($start) = @-;
	($end)   = @+;
	$entity  = substr($input, $start+2, $end-$start-3);
	$latex = &entity_to_latex($entity);
	substr($input, $start, $end-$start) = $latex;
    }
    # Now the &text; entites;
    
    while ($input =~/(&\w+;)/) {
	($start) = @-;
	($end)   = @+;
	$entity   = substr($input, $start+1, $end-$start-2);
	$latex    = &entity_to_latex($entity);
	substr($input, $start, $end-$start) = $latex;
	
   }
    return $input;
}

1; 

__END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>