# The LearningOnline Network
# entity -> tex.
#
# $Id:
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
# http://www.lon-capa.org/
#
#
package Apache::entities;
use strict;
#
# This file contains a table driven entity-->latex converter.
#
# Assumptions:
# The number of entities in a resource is small compared with the
# number of possible entities that might be translated.
# Therefore the strategy is to match a general entity pattern
# &.+; over and over, pull out the match look it up in an entity -> tex hash
# and do the replacement.
#
# In order to simplify the hash, the following reductions are done:
# &#d+; have the &# and ; stripped and is converted to an int.
# &#.+; have the &#x and ; stripped and is converted to an int as a hex
# value.
# All others have the & and ; stripped.
# The hash: Add new conversions here; leave off the leading & and the trailing ;
# all numeric entities need only appear as their decimal versions
# (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
#
# This entity table is mercilessly cribbed from the HTML pocket reference
# table starting at pg 82. In most cases the LaTeX equivalent codes come from
# the original massive regular expression replacements originally by
# A. Sakharuk in lonprintout.pm
#
# I also want to acknowledge
# ISO Character entities and their LaTeX equivalents by
# Vidar Bronken Gundersen, and Rune Mathisen
# http://www.bitjungle.com/isoent-ref.pdf
#
# Note numerical entities are essentially unicode character codes.
#
package Apache::entities;
my %entities = (
# ---- ASCII code page: ----------------
# Translation to empty strings:
7 => "",
9 => "",
10 => "",
13 => "",
# Translations to simple characters:
32 => " ",
33 => "!",
34 => '"',
'quot' => '"',
35 => '\\#',
36 => '\\$',
37 => '\%',
38 => '\&',
'amp' => '\&',
39 => '\'', # Apostrophe
40 => '(',
41 => ')',
42 => '*',
43 => '+',
44 => ',', # comma
45 => '-',
46 => '.',
47 => '/',
48 => '0',
49 => '1',
50 => '2',
51 => '3',
52 => '4',
53 => '5',
54 => '6',
55 => '7',
56 => '8',
57 => '9',
58 => ':',
59 => ';',
60 => '\ensuremath{<}',
'lt' => '\ensuremath{<}',
61 => '\ensuremath{=}',
62 => '\ensuremath{>}',
'gt' => '\ensuremath{>}',
63 => '?',
64 => '@',
65 => 'A',
66 => 'B',
67 => 'C',
68 => 'D',
69 => 'E',
70 => 'F',
71 => 'G',
72 => 'H',
73 => 'I',
74 => 'J',
75 => 'K',
76 => 'L',
77 => 'M',
78 => 'N',
79 => 'O',
80 => 'P',
81 => 'Q',
82 => 'R',
83 => 'S',
84 => 'T',
85 => 'U',
86 => 'V',
87 => 'W',
88 => 'X',
89 => 'Y',
90 => 'Z',
91 => '[',
92 => '\ensuremath{\setminus}', # \setminus is \ with special spacing.
93 => ']',
94 => '\ensuremath{\wedge}',
95 => '\underline{\makebox[2mm]{\strut}}', # Underline 2mm of space for _
96 => '`',
97 => 'a',
98 => 'b',
99 => 'c',
100 => 'd',
101 => 'e',
102 => 'f',
103 => 'g',
104 => 'h',
105 => 'i',
106 => 'j',
107 => 'k',
108 => 'l',
109 => 'm',
110 => 'n',
111 => 'o',
112 => 'p',
113 => 'q',
114 => 'r',
115 => 's',
116 => 't',
117 => 'u',
118 => 'v',
119 => 'w',
120 => 'x',
121 => 'y',
122 => 'z',
123 => '\{',
124 => '|',
125 => '\}',
126 => '\~',
# Controls and Latin-1 supplement. Note that some entities that have
# visible effect are not printing unicode characters. Specifically
# ‚- 
130 => ',',
131 => '\ensuremath{f}',
132 => ',,', # Low double left quotes.
133 => '\ensuremath{\ldots}',
134 => '\ensuremath{\dagger}',
135 => '\ensuremath{\ddagger}',
136 => '\ensuremath{\wedge}',
137 => '\textperthousand ',
138 => '\v{S}',
139 => '\ensuremath{<}',
140 => '{\OE}',
# There's a gap here in my entity table
145 => '`',
146 => '\'',
147 => '``',
148 => '\'\'',
149 => '\ensuremath{\bullet}',
150 => '--',
151 => '---',
152 => '\ensuremath{\sim}',
153 => '\texttrademark',
154 => '\v{s}',
155 => '\ensuremath{>}',
156 => '\oe ',
# Another short gap:
159 => '\"Y',
160 => '~',
'nbsp' => '~',
161 => '\textexclamdown ',
'iexcl' => '\textexclamdown ',
162 => '\textcent ',
'cent' => '\textcent ',
163 => '\pounds ',
'pound' => '\pounds ',
164 => '\textcurrency ',
'curren' => '\textcurrency ',
165 => '\textyen ',
'yen' => '\textyen ',
166 => '\textbrokenbar ',
'brvbar' => '\textbrokenbar ',
167 => '\textsection ',
'sect' => '\textsection ',
168 => '\"{}',
'uml' => '\"{}',
169 => '\copyright ',
'copy' => '\copyright ',
170 => '\textordfeminine ',
'ordf' => '\textordfeminine ',
171 => '\ensuremath{\ll}', # approximation of left angle quote.
'laquo' => '\ensuremath{\ll}', # ""
172 => '\ensuremath{\neg}',
'not' => '\ensuremath{\neg}',
173 => ' - ',
'shy' => ' - ',
174 => '\textregistered ',
'reg' => '\textregistered ',
175 => '\ensuremath{^{-}}',
'macr' => '\ensuremath{^{-}}',
176 => '\ensuremath{^{\circ}}',
'deg' => '\ensuremath{^{\circ}}',
177 => '\ensuremath{\pm}',
'plusmn' => '\ensuremath{\pm}',
178 => '\ensuremath{^2}',
'sup2' => '\ensuremath{^2}',
179 => '\ensuremath{^3}',
'sup3' => '\ensuremath{^3}',
180 => "\\'{}",
'acute' => "\\'{}",
181 => '\ensuremath{\mu}',
'micro' => '\ensuremath{\mu}',
182 => '\P ',
para => '\P ',
183 => '\ensuremath{\cdot}',
'middot' => '\ensuremath{\cdot}',
184 => '\c{\strut}',
'cedil' => '\c{\strut}',
185 => '\ensuremath{^1}',
sup1 => '\ensuremath{^1}',
186 => '\textordmasculine ',
'ordm' => '\textordmasculine ',
187 => '\ensuremath{\gg}',
'raquo' => '\ensuremath{\gg}',
188 => '\textonequarter ',
'frac14' => '\textonequarter ',
189 => '\textonehalf' ,
'frac12' => '\textonehalf' ,
190 => '\textthreequarters ',
'frac34' => '\textthreequarters ',
191 => '\textquestiondown ',
'iquest' => '\textquestiondown ',
192 => '\\`{A}',
'Agrave' => '\\`{A}',
193 => "\\'{A}",
'Aacute' => "\\'{A}",
194 => '\^{A}',
'Acirc' => '\^{A}',
195 => '\~{A}',
'Atilde'=> '\~{A}',
196 => '\\"{A}',
'Auml' => '\\"{A}',
197 => '{\AA}',
'Aring' => '{\AA}',
198 => '{\AE}',
'AElig' => '{\AE}',
199 => '\c{c}',
'Ccedil'=> '\c{c}',
200 => '\\`{E}',
'Egrave'=> '\\`{E}',
201 => "\\'{E}",
'Eacute'=> "\\'{E}",
202 => '\\^{E}',
'Ecirc' => '\\^{E}',
203 => '\\"{E}',
'Euml' => '\\"{E}',
204 => '\\`{I}',
'Igrave'=> '\\`{I}',
205 => "\\'{I}",
'Iacute'=> "\\'{I}",
206 => '\\^{I}',
'Icirc' => '\\^{I}',
207 => '\\"{I}',
'Iuml' => '\\"{I}',
208 => '\DH',
'ETH' => '\DH',
209 => '\~{N}',
'Ntilde'=> '\~{N}',
210 => '\\`{O}',
'Ograve'=> '\\`{O}',
211 => "\\'{O}",
'Oacute'=> "\\'{O}",
212 => '\\^{O}',
'Ocirc' => '\\^{O}',
213 => '\~{O}',
'Otilde'=> '\~{O}',
214 => '\\"{O}',
'Ouml' => '\\"{O}',
215 => '\ensuremath{\times}',
'times' => '\ensuremath{\times}',
216 => '\O',
'Oslash'=> '\O',
217 => '\\`{U}',
'Ugrave'=> '\\`{U}',
218 => "\\'{U}",
'Uacute'=> "\\'{U}",
219 => '\\^{U}',
'Ucirc' => '\\^{U}',
220 => '\\"{U}',
'Uuml' => '\\"{U}',
221 => "\\'{Y}",
'Yacute'=> "\\'{Y}",
223 => '{\ss}',
'szlig' => '{\ss}',
224 => '\\`{a}',
'agrave'=> '\\`{a}',
225 => "\\'{a}",
'aacute'=> "\\'{a}",
226 => '\\^{a}',
'acirc' => '\\^{a}',
227 => '\\~{a}',
'atilde'=> '\\~{a}',
228 => '\\"{a}',
'auml' => '\\"{a}',
229 => '\aa',
'aring' => '\aa',
230 => '\ae',
'aelig' => '\ae',
231 => '\c{c}',
'ccedil'=> '\c{c}',
232 => '\\`{e}',
'egrave'=> '\\`{e}',
233 => "\\'{e}",
'eacute'=> "\\'{e}",
234 => '\\^{e}',
'ecirc' => '\\^{e}',
235 => '\\"{e}',
'euml' => '\\"{e}',
236 => '\\`{i}',
'igrave'=> '\\`{i}',
237 => "\\'{i}",
'iacute'=> "\\'{i}",
238 => '\\^{i}',
'icirc' => '\\^{i}',
239 => '\\"{i}',
'iuml' => '\\"{i}',
241 => '\\~{n}',
'ntilde'=> '\\~{n}',
242 => '\\`{o}',
'ograve'=> '\\`{o}',
243 => "\\'{o}",
'oacute'=> "\\'{o}",
244 => '\\^{o}',
'ocirc' => '\\^{o}',
245 => '\\~{o}',
'otilde'=> '\\~{o}',
246 => '\\"{o}',
'ouml' => '\\"{o}',
247 => '\ensuremath{\div}',
'divide'=> '\ensuremath{\div}',
248 => '{\o}',
'oslash'=> '{\o}',
249 => '\\`{u}',
'ugrave'=> '\\`{u}',
250 => "\\'{u}",
'uacute'=> "\\'{u}",
251 => '\\^{u}',
'ucirc' => '\\^{u}',
252 => '\\"{u}',
'uuml' => '\\"{u}',
253 => "\\'{y}",
'yacute'=> "\\'{y}",
255 => '\\"{y}',
'yuml' => '\\"{y}',
# hbar entity number comes from the unicode charater:
# see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
# ISO also documents a 'planck' entity.
295 => '\ensuremath{\hbar}',
'planck' => '\ensuremath{\hbar}',
# Latin extended-A HTML 4.01 entities:
338 => '\OE',
'OElig' => '\OE',
339 => '\oe',
'oelig' => '\oe',
352 => '\v{S}',
'Scaron' => '\v{S}',
353 => '\v{s}',
'scaron' => '\v{s}',
376 => '\\"{Y}',
'Yuml' => '\\"{Y}',
# Latin extended B HTML 4.01 entities
402 => '\ensuremath{f}',
'fnof' => '\ensuremath{f}',
# Spacing modifier letters:
710 => '\^{}',
'circ' => '\^{}',
732 => '\~{}',
'tilde' => '\~{}',
# Greek uppercase:
913 => '\ensuremath{\mathrm{A}}',
'Alpha' => '\ensuremath{\mathrm{A}}',
914 => '\ensuremath{\mathrm{B}}',
'Beta' => '\ensuremath{\mathrm{B}}',
915 => '\ensuremath{\Gamma}',
'Gamma' => '\ensuremath{\Gamma}',
916 => '\ensuremath{\Delta}',
'Delta' => '\ensuremath{\Delta}',
917 => '\ensuremath{\mathrm{E}}',
'Epsilon'=> '\ensuremath{\mathrm{E}}',
918 => '\ensuremath{\mathrm{Z}}',
'Zeta' => '\ensuremath{\mathrm{Z}}',
919 => '\ensuremath{\mathrm{H}}',
'Eta' => '\ensuremath{\mathrm{H}}',
920 => '\ensuremath{\Theta}',
'Theta' => '\ensuremath{\Theta}',
921 => '\ensuremath{\mathrm{I}}',
'Iota' => '\ensuremath{\mathrm{I}}',
922 => '\ensuremath{\mathrm{K}}',
'Kappa' => '\ensuremath{\mathrm{K}}',
923 => '\ensuremath{\Lambda}',
'Lambda' => '\ensuremath{\Lambda}',
924 => '\ensuremath{\mathrm{M}}',
'Mu' => '\ensuremath{\mathrm{M}}',
925 => '\ensuremath{\mathrm{N}}',
'Nu' => '\ensuremath{\mathrm{N}}',
926 => '\ensuremath{\mathrm{\Xi}}',
'Xi' => '\ensuremath{\mathrm{\Xi}}',
927 => '\ensuremath{\mathrm{O}}',
'Omicron'=> '\ensuremath{\mathrm{O}}',
928 => '\ensuremath{\Pi}',
'Pi' => '\ensuremath{\Pi}',
929 => '\ensuremath{\mathrm{P}}',
'Rho' => '\ensuremath{\mathrm{P}}',
# Skips 930
931 => '\ensuremath{\Sigma}',
'Sigma' => '\ensuremath{\Sigma}',
932 => '\ensuremath{\mathrm{T}}',
'Tau' => '\ensuremath{\mathrm{T}}',
933 => '\ensuremath{\Upsilon}',
'Upsilon'=> '\ensuremath{\Upsilon}',
934 => '\ensuremath{\Phi}',
'Phi' => '\ensuremath{\Phi}',
935 => '\ensuremath{\mathrm{X}}',
'Chi' => '\ensuremath{\mathrm{X}}',
936 => '\ensuremath{\Psi}',
'Psi' => '\ensuremath{\Psi}',
937 => '\ensuremath{\Omega}',
'Omega' => '\ensuremath{\Omega}',
# Greek lowercase:
945 => '\ensuremath{\alpha}',
'alpha' => '\ensuremath{\alpha}',
946 => '\ensuremath{\beta}',
'beta' => '\ensuremath{\beta}',
947 => '\ensuremath{\gamma}',
'gamma' => '\ensuremath{\gamma}',
948 => '\ensuremath{\delta}',
'delta' => '\ensuremath{\delta}',
949 => '\ensuremath{\epsilon}',
'epsilon'=> '\ensuremath{\epsilon}',
950 => '\ensuremath{\zeta}',
'zeta' => '\ensuremath{\zeta}',
951 => '\ensuremath{\eta}',
'eta' => '\ensuremath{\eta}',
952 => '\ensuremath{\theta}',
'theta' => '\ensuremath{\theta}',
953 => '\ensuremath{\iota}',
'iota' => '\ensuremath{\iota}',
954 => '\ensuremath{\kappa}',
'kappa' => '\ensuremath{\kappa}',
955 => '\ensuremath{\lambda}',
'lambda' => '\ensuremath{\lambda}',
956 => '\ensuremath{\mu}',
'mu' => '\ensuremath{\mu}',
957 => '\ensuremath{\nu}',
'nu' => '\ensuremath{\nu}',
958 => '\ensuremath{\xi}',
'xi' => '\ensuremath{\xi}',
959 => '\ensuremath{o}',
'omicron'=> '\ensuremath{o}',
960 => '\ensuremath{\pi}',
'pi' => '\ensuremath{\pi}',
961 => '\ensuremath{\rho}',
'rho' => '\ensuremath{\rho}',
962 => '\ensuremath{\varsigma}',
'sigmaf' => '\ensuremath{\varsigma}',
963 => '\ensuremath{\sigma}',
'sigma' => '\ensuremath{\sigma}',
964 => '\ensuremath{\tau}',
'tau' => '\ensuremath{\tau}',
965 => '\ensuremath{\upsilon}',
'upsilon'=> '\ensuremath{\upsilon}',
966 => '\ensuremath{\phi}',
'phi' => '\ensuremath{\phi}',
967 => '\ensuremath{\chi}',
'chi' => '\ensuremath{\chi}',
968 => '\ensuremath{\psi}',
'psi' => '\ensuremath{\psi}',
969 => '\ensuremath{\omega}',
'omega' => '\ensuremath{\omega}',
977 => '\ensuremath{\vartheta}',
'thetasym'=>'\ensuremath{\vartheta}',
978 => '\ensuremath{\mathit{\Upsilon}}',
'upsih' => '\ensuremath{\mathit{\Upsilon}}',
982 => '\ensuremath{\varpi}',
'piv' => '\ensuremath{\varpi}',
# The general punctuation set:
8194, => '\hspace{.5em}',
'enspc' => '\hspace{.5em}',
8195 => '\hspace{1.0em}',
'emspc' => '\hspace{1.0em}',
8201 => '\hspace{0.167em}',
'thinsp' => '\hspace{0.167em}',
8204 => '{}',
'zwnj' => '{}',
8205 => '',
'zwj' => '',
8206 => '',
'lrm' => '',
8207 => '',
'rlm' => '',
8211 => '--',
'ndash' => '--',
8212 => '---',
'mdash' => '---',
8216 => '`',
'lsquo' => '`',
8217 => "'",
'rsquo' => "'",
8218 => '\quotesinglbase',
'sbquo' => '\quotesinglbase',
8220 => '``',
'ldquo' => '``',
8221 => "''",
'rdquo' => "''",
8222 => '\quotedblbase',
'bdquo' => '\quotedblbase',
8224 => '\ensuremath{\dagger}',
'dagger' => '\ensuremath{\dagger}',
'8225' => '\ensuremath{\ddag}',
'Dagger' => '\ensuremath{\ddag}',
8226 => '\textbullet',
'bull' => '\textbullet',
8230 => '\textellipsis',
'hellep' => '\textellipsis',
8240 => '\textperthousand',
permil => '\textperthousand',
8242 => '\textquotesingle',
'prime' => '\textquotesingle',
8243 => '\textquotedbl',
'Prime' => '\textquotedbl',
8249 => '\guilsinglleft',
'lsaquo' => '\guilsinglleft',
8250 => '\guilsinglright',
'rsaquo' => '\guilsinglright',
8254 => '\textasciimacron',
oline => '\textasciimacron',
8260 => '\textfractionsolidus',
'frasl' => '\textfractionsolidus',
8364 => '\texteuro',
'euro' => '\texteuro',
# Letter like symbols
8472 => '\ensuremath{\wp}',
'weierp' => '\ensuremath{\wp}',
8465 => '\ensuremath{\Im}',
'image' => '\ensuremath{\Im}',
8476 => '\ensuremath{\Re}',
'real' => '\ensuremath{\Re}',
8482 => '\texttrademark',
'trade' => '\texttrademark',
8501 => '\ensuremath{\aleph}',
'alefsym'=> '\ensuremath{\aleph}',
# Arrows and then some (harpoons from Hon Kie).
8592 => '\textleftarrow',
'larr' => '\textleftarrow',
8593 => '\textuparrow',
'uarr' => '\textuparrow',
8594 => '\textrightarrow',
'rarr' => '\textrightarrow',
8595 => '\textdownarrow',
'darr' => '\textdownarrow',
8596 => '\ensuremath{\leftrightarrow}',
'harr' => '\ensuremath{\leftrightarrow}',
8598 => '\ensuremath{\nwarrow}',
8599 => '\ensuremath{\nearrow}',
8600 => '\ensuremath{\searrow}',
8601 => '\ensuremath{\swarrow}',
8605 => '\ensuremath{\leadsto}',
8614 => '\ensuremath{\mapsto}',
8617 => '\ensuremath{\hookleftarrow}',
8618 => '\ensuremath{\hookrightarrow}',
8629 => '\ensuremath{\hookleftarrow}', # not an exact match but best I know.
'crarr' => '\ensuremath{\hookleftarrow}', # not an exact match but best I know.
8636 => '\ensuremath{\leftharpoonup}',
8637 => '\ensuremath{\leftharpoondown}',
8640 => '\ensuremath{\rightharpoonup}',
8641 => '\ensuremath{\rightharpoondown}',
8652 => '\ensuremath{\rightleftharpoons}',
8656 => '\ensuremath{\Leftarrow}',
'lArr' => '\ensuremath{\Leftarrow}',
8657 => '\ensuremath{\Uparrow}',
'uArr' => '\ensuremath{\Uparrow}',
8658 => '\ensuremath{\Rightarrow}',
'rArr' => '\ensuremath{\Rightarrow}',
8659 => '\ensuremath{\Downarrow}',
'dArr' => '\ensuremath{\Downarrow}',
8660 => '\ensuremath{\Leftrightarrow}',
'hArr' => '\ensuremath{\Leftrightarrow}',
8661 => '\ensuremath{\Updownarrow}',
'vArr' => '\ensuremath{\Updownarrow}',
8666 => '\ensuremath{\Lleftarrow}',
'lAarr' => '\ensuremath{\Lleftarrow}',
8667 => '\ensuremath{\Rrightarrow}',
'rAarr' => '\ensuremath{\Rrightarrow}',
8669 => '\ensuremath{\rightsquigarrow}',
'rarrw' => '\ensuremath{\rightsquigarrow}',
# Mathematical operators.
'forall' => '\ensuremath{\forall}',
8704 => '\ensuremath{\forall}',
'comp' => '\ensuremath{\complement}',
8705 => '\ensuremath{\complement}',
'part' => '\ensuremath{\partial}',
8706 => '\ensuremath{\partial}',
'exist' => '\ensuremath{\exists}',
8707 => '\ensuremath{\exists}',
'nexist' => '\ensuremath{\nexists}',
8708 => '\ensuremath{\nexists}',
'empty' => '\ensuremath{\emptyset}',
8709 => '\ensuremath{\emptyset}',
8710 => '\ensuremath{\Delta}',
'nabla' => '\ensuremath{\nabla}',
8711 => '\ensuremath{\nabla}',
'isin' => '\ensuremath{\in}',
8712 => '\ensuremath{\in}',
'notin' => '\ensuremath{\notin}',
8713 => '\ensuremath{\notin}',
ni => '\ensuremath{\ni}',
8715 => '\ensuremath{\ni}',
8716 => '\ensuremath{\not\ni}',
'prod' => '\ensuremath{\prod}',
8719 => '\ensuremath{\prod}',
8720 => '\ensuremath{\coprod}',
'sum' => '\ensuremath{\sum}',
8721 => '\ensuremath{\sum}',
'minus' => '\ensuremath{-}',
8722 => '\ensuremath{-}',
8723 => '\ensuremath{\mp}',
8724 => '\ensuremath{\dotplus}',
8725 => '\ensuremath{\diagup}',
8726 => '\ensuremath{\smallsetminus}',
'lowast' => '\ensuremath{*}',
8727 => '\ensuremath{*}',
8728 => '\ensuremath{\circ}',
8729 => '\ensuremath{\bullet}',
'radic' => '\ensuremath{\surd}',
8730 => '\ensuremath{\surd}',
8731 => '\ensuremath{\sqrt[3]{}}',
8732 => '\ensuremath{\sqrt[4]{}}',
'prop' => '\ensuremath{\propto}',
8733 => '\ensuremath{\propto}',
'infin' => '\ensuremath{\infty}',
8734 => '\ensuremath{\infty}',
#
# The items below require the isoent latex package which I can't find at least for FC5.
# Temporarily commented out.
#
# 'ang90' => '\ensuremath{\sqangle}',
# 8735 => '\ensuremath{\sqangle}',
'ang' => '\ensuremath{\angle}',
8736 => '\ensuremath{\angle}',
'angmsd' => '\ensuremath{\measuredangle}',
8737 => '\ensuremath{\measuredangle}',
'angsph' => '\ensuremath{\sphericalangle}',
8738 => '\ensuremath{\sphericalangle}',
8739 => '\ensuremath{\vert}',
8740 => '\ensuremath{\Vert}',
'and' => '\ensuremath{\land}',
8743 => '\ensuremath{\land}',
'or' => '\ensuremath{\lor}',
8744 => '\ensuremath{\lor}',
'cap' => '\ensuremath{\cap}',
8745 => '\ensuremath{\cap}',
'cup' => '\ensuremath{\cup}',
8746 => '\ensuremath{\cup}',
'int' => '\ensuremath{\int}',
8747 => '\ensuremath{\int}',
'conint' => '\ensuremath{\oint}',
8750 => '\ensuremath{\oint}',
'there4' => '\ensuremath{\therefore}',
8756 => '\ensuremath{\therefore}',
'becaus' => '\ensuremath{\because}',
8757 => '\ensuremath{\because}',
8758 => '\ensuremath{:}',
8759 => '\ensuremath{::}',
'sim' => '\ensuremath{\sim}',
8764 => '\ensuremath{\sim}',
8765 => '\ensuremath{\backsim}',
'wreath' => '\ensuremath{\wr}',
8768 => '\ensuremath{\wr}',
'nsim' => '\ensuremath{\not\sim}',
8769 => '\ensuremath{\not\sim}',
# 'asymp' => '\ensuremath{\asymp}', ≈ is actually a different glyph.
8771 => '\ensuremath{\asymp}',
8772 => '\ensuremath{\not\asymp}',
'cong' => '\ensuremath{\cong}',
8773 => '\ensuremath{\cong}',
8775 => '\ensuremath{\ncong}',
8778 => '\ensuremath{\approxeq}',
8784 => '\ensuremath{\doteq}',
8785 => '\ensuremath{\doteqdot}',
8786 => '\ensuremath{\fallingdotseq}',
8787 => '\ensuremath{\risingdotseq}',
8788 => '\ensuremath{:=}',
8789 => '\ensuremath{=:}',
8790 => '\ensuremath{\eqcirc}',
8791 => '\ensuremath{\circeq}',
'wedgeq' => '\ensuremath{\stackrel{\wedge}{=}}',
8792 => '\ensuremath{\stackrel{\wedge}{=}}',
8794 => '\ensuremath{\stackrel{\vee}{=}}',
8795 => '\ensuremath{\stackrel{\star}{=}}',
8796 => '\ensuremath{\triangleq}',
8797 => '\ensuremath{\stackrel{def}{=}}',
8798 => '\ensuremath{\stackrel{m}{=}}',
8799 => '\ensuremath{\stackrel{?}{=}}',
'ne' => '\ensuremath{\neq}',
8800 => '\ensuremath{\neq}',
'equiv' => '\ensuremath{\equiv}',
8801 => '\ensuremath{\equiv}',
8802 => '\ensuremath{\not\equiv}',
'le' => '\ensuremath{\leq}',
8804 => '\ensuremath{\leq}',
'ge' => '\ensuremath{\geq}',
8805 => '\ensuremath{\geq}',
8806 => '\ensuremath{\leqq}',
8807 => '\ensuremath{\geqq}',
8810 => '\ensuremath{\ll}',
8811 => '\ensuremath{\gg}',
'twixt' => '\ensuremath{\between}',
8812 => '\ensuremath{\between}',
8813 => '\ensuremath{\not\asymp}',
8814 => '\ensuremath{\not<}',
8815 => '\ensuremath{\not>}',
8816 => '\ensuremath{\not\leqslant}',
8817 => '\ensuremath{\not\geqslant}',
8818 => '\ensuremath{\lesssim}',
8819 => '\ensuremath{\gtrsim}',
8820 => '\ensuremath{\stackrel{<}{>}}',
8821 => '\ensuremath{\stackrel{>}{<}}',
8826 => '\ensuremath{\prec}',
8827 => '\ensuremath{\succ}',
8828 => '\ensuremath{\preceq}',
8829 => '\ensuremath{\succeq}',
8830 => '\ensuremath{\not\prec}',
8831 => '\ensuremath{\not\succ}',
'sub' => '\ensuremath{\subset}',
8834 => '\ensuremath{\subset}',
'sup' => '\ensuremath{\supset}',
8835 => '\ensuremath{\supset}',
'nsub' => '\ensuremath{\not\subset}',
8836 => '\ensuremath{\not\subset}',
8837 => '\ensuremath{\not\supset}',
'sube' => '\ensuremath{\subseteq}',
8838 => '\ensuremath{\subseteq}',
'supe' => '\ensuremath{\supseteq}',
8839 => '\ensuremath{\supseteq}',
8840 => '\ensuremath{\nsubseteq}',
8841 => '\ensuremath{\nsupseteq}',
8842 => '\ensuremath{\subsetneq}',
8843 => '\ensuremath{\supsetneq}',
8847 => '\ensuremath{\sqsubset}',
8848 => '\ensuremath{\sqsupset}',
8849 => '\ensuremath{\sqsubseteq}',
8850 => '\ensuremath{\sqsupseteq}',
8851 => '\ensuremath{\sqcap}',
8852 => '\ensuremath{\sqcup}',
'oplus' => '\ensuremath{\oplus}',
8853 => '\ensuremath{\oplus}',
8854 => '\ensuremath{\ominus}',
'otimes' => '\ensuremath{\otimes}',
8855 => '\ensuremath{\otimes}',
8856 => '\ensuremath{\oslash}',
8857 => '\ensuremath{\odot}',
8858 => '\ensuremath{\circledcirc}',
8859 => '\ensuremath{\circledast}',
8861 => '\ensuremath{\ominus}', # Close enough for government work.
8862 => '\ensuremath{\boxplus}',
8863 => '\ensuremath{\boxminus}',
8864 => '\ensuremath{\boxtimes}',
8865 => '\ensuremath{\boxdot}',
'vdash' => '\ensuremath{\vdash}',
8866 => '\ensuremath{\vdash}',
'dashv' => '\ensuremath{\dashv}',
8867 => '\ensuremath{\dashv}',
'perp' => '\ensuremath{\perp}',
8869 => '\ensuremath{\perp}',
8871 => '\ensuremath{\models}',
8872 => '\ensuremath{\vDash}',
8873 => '\ensuremath{\Vdash}',
8874 => '\ensuremath{\Vvdash}',
8876 => '\ensuremath{\nvdash}',
8877 => '\ensuremath{\nvDash}',
8878 => '\ensuremath{\nVdash}',
8880 => '\ensuremath{\prec}',
8881 => '\ensuremath{\succ}',
8882 => '\ensuremath{\vartriangleleft}',
8883 => '\ensuremath{\vartriangleright}',
8884 => '\ensuremath{\trianglelefteq}',
8885 => '\ensuremath{\trianglerighteq}',
8891 => '\ensuremath{\veebar}',
8896 => '\ensuremath{\land}',
8897 => '\ensuremath{\lor}',
8898 => '\ensuremath{\cap}',
8899 => '\ensuremath{\cup}',
8900 => '\ensuremath{\diamond}',
'sdot' => '\ensuremath{\cdot}',
8901 => '\ensuremath{\cdot}',
8902 => '\ensuremath{\star}',
8903 => '\ensuremath{\divideontimes}',
8904 => '\ensuremath{\bowtie}',
8905 => '\ensuremath{\ltimes}',
8906 => '\ensuremath{\rtimes}',
8907 => '\ensuremath{\leftthreetimes}',
8908 => '\ensuremath{\rightthreetimes}',
8909 => '\ensuremath{\simeq}',
8910 => '\ensuremath{\curlyvee}',
8911 => '\ensuremath{\curlywedge}',
8912 => '\ensuremath{\Subset}',
8913 => '\ensuremath{\Supset}',
8914 => '\ensuremath{\Cap}',
8915 => '\ensuremath{\Cup}',
8916 => '\ensuremath{\pitchfork}',
8918 => '\ensuremath{\lessdot}',
8919 => '\ensuremath{\gtrdot}',
8920 => '\ensuremath{\lll}',
8921 => '\ensuremath{\ggg}',
8922 => '\ensuremath{\gtreqless}',
8923 => '\ensuremath{\lesseqgtr}',
8924 => '\ensuremath{\eqslantless}',
8925 => '\ensuremath{\eqslantgtr}',
8926 => '\ensuremath{\curlyeqprec}',
8927 => '\ensuremath{\curlyeqsucc}',
8928 => '\ensuremath{\not\preccurlyeq}',
8929 => '\ensuremath{\not\succcurlyeq}',
8930 => '\ensuremath{\not\sqsupseteq}',
8931 => '\ensuremath{\not\sqsubseteq}',
8938 => '\ensuremath{\not\vartriangleleft}',
8939 => '\ensuremath{\not\vartriangleright}',
8940 => '\ensuremath{\not\trianglelefteq}',
8941 => '\ensuremath{\not\trianglerighteq}',
8942 => '\ensuremath{\vdots}',
8960 => '\ensuremath{\varnothing}',
'lceil' => '\ensuremath{\lceil}',
8968 => '\ensuremath{\lceil}',
'rceil' => '\ensuremath{\rceil}',
8969 => '\ensuremath{\rceil}',
'lfloor' => '\ensuremath{\lfloor}',
8970 => '\ensuremath{\lfloor}',
'rfloor' => '\ensuremath{\rfloor}',
8971 => '\ensuremath{\rfloor}',
'lang' => '\ensuremath{\langle}',
9001 => '\ensuremath{\langle}',
'rang' => '\ensuremath{\rangle}',
9002 => '\ensuremath{\rangle}',
'loz' => '\ensuremath{\lozenge}',
9674 => '\ensuremath{\lozenge}',
'spades' => '\ensuremath{\spadesuit}',
9824 => '\ensuremath{\spadesuit}',
9825 => '\ensuremath{\heartsuit}',
9826 => '\ensuremath{\diamondsuit}',
'clubs' => '\ensuremath{\clubsuit}',
9827 => '\ensuremath{\clubsuit}',
'diams' => '\ensuremath{\blacklozenge}',
9830 => '\ensuremath{\blacklozenge}'
);
# There are some named entities that don't have a good
# latex equivalent, these are converted to utf-8 via this table
# of entity name -> unicode number.
my %utf_table = (
'THORN' => 222,
'thorn' => 254,
'eth' => 240,
'hearts' => 9829
);
#
# Convert a numerical entity (that does not exist in our hash)
# to its UTF-8 equivalent representation.
# This allows us to support, to some extent, any entity for which
# dvipdf can find a gylph (given that LaTeX is now UTF-8 clean).
#
# Parameters:
# unicode - The unicode for the character. This is assumed to
# be a decimal value
# Returns:
# The UTF-8 equiavalent of the value.
#
sub entity_to_utf8 {
my ($unicode) = @_;
my $result = pack("U", $unicode);
return $result;
}
#
# Convert an entity to the corresponding LateX if possible.
# If not possible, and the entity is numeric,
# the entity is treated like a Unicode character and converted
# to UTF-8 which should display as long as dvipdf can find the
# appropriate glyph.
#
# The entity is assumed to have already had the
# &# ; or & ; removed
#
# Parameters:
# entity - Name of entity to convert.
# Returns:
# One of the following:
# - Latex string that produces the entity.
# - UTF-8 equivalent of a numeric entity for which we don't have a latex string.
# - ' ' for text entities for which there's no latex equivalent.
#
sub entity_to_latex {
my ($entity) = @_;
# Try to look up the entity (text or numeric) in the hash:
my $latex = $entities{"$entity"};
if (defined $latex) {
return $latex;
}
# If the text is purely numeric we can do the UTF-8 conversion:
# Otherwise there are a few textual entities that don't have good latex
# which can be converted to unicode:
#
if ($entity =~ /^\d+$/) {
return &entity_to_utf8($entity);
} else {
my $result = $utf_table{"$entity"};
if (defined $result) {
return &entity_to_utf8($result);
}
}
# Can't do the conversion`< ...
return " ";
}
#
# Convert all the entities in a string.
# We locate all the entities, pass them into entity_to_latex and
# and replace occurences in the input string.
# The assumption is that there are few entities in any string/document
# so this looping is not too bad. The advantage of looping vs. regexping is
# that we now can use lookup tables for the translation in entity_to_latex above.
#
# Parameters:
# input - Input string/document
# Returns
# input with entities replaced by latexable stuff (UTF-8 encodings or
# latex control strings to produce the entity.
#
#
sub replace_entities {
my ($input) = @_;
my $start;
my $end;
my $entity;
my $latex;
# First the &#nnn; entities:
while ($input =~ /(&\#\d+;)/) {
($start) = @-;
($end) = @+;
$entity = substr($input, $start+2, $end-$start-3);
$latex = &entity_to_latex($entity);
substr($input, $start, $end-$start) = $latex;
}
# Hexadecimal entities:
while ($input =~ /&\#x(\d|[a-f,A-f])+;/) {
($start) = @-;
($end) = @+;
$entity = "0" . substr($input, $start+2, $end-$start-3); # 0xhexnumber
$latex = &entity_to_latex(hex($entity));
substr($input, $start, $end-$start) = $latex;
}
# Now the &text; entites;
while ($input =~/(&\w+;)/) {
($start) = @-;
($end) = @+;
$entity = substr($input, $start+1, $end-$start-2);
$latex = &entity_to_latex($entity);
substr($input, $start, $end-$start) = $latex;
}
return $input;
}
1;
__END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>