# The LearningOnline Network # entity -> tex. # # $Id: # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # http://www.lon-capa.org/ # # package Apache::entities; use strict; # # This file contains a table driven entity-->latex converter. # # Assumptions: # The number of entities in a resource is small compared with the # number of possible entities that might be translated. # Therefore the strategy is to match a general entity pattern # &.+; over and over, pull out the match look it up in an entity -> tex hash # and do the replacement. # # In order to simplify the hash, the following reductions are done: # &#d+; have the &# and ; stripped and is converted to an int. # &#.+; have the &#x and ; stripped and is converted to an int as a hex # value. # All others have the & and ; stripped. # The hash: Add new conversions here; leave off the leading & and the trailing ; # all numeric entities need only appear as their decimal versions # (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well. # # This entity table is mercilessly cribbed from the HTML pocket reference # table starting at pg 82. In most cases the LaTeX equivalent codes come from # the original massive regular expression replacements originally by # A. Sakharuk in lonprintout.pm # # I also want to acknowledge # ISO Character entities and their LaTeX equivalents by # Vidar Bronken Gundersen, and Rune Mathisen # http://www.bitjungle.com/isoent-ref.pdf # # Note numerical entities are essentially unicode character codes. # package Apache::entities; my %entities = ( # ---- ASCII code page: ---------------- # Translation to empty strings: 7 => "", 9 => "", 10 => "", 13 => "", # Translations to simple characters: 32 => " ", 33 => "!", 34 => '"', 'quot' => '"', 35 => '\\\#', 36 => '\\\$', 37 => '\\%', 38 => '\\&', 'amp' => '\\&', 39 => '\'', # Apostrophe 40 => '(', 41 => ')', 42 => '\*', 43 => '\+', 44 => ',', # comma 45 => '-', 46 => '\.', 47 => '\/', 48 => '0', 49 => '1', 50 => '2', 51 => '3', 52 => '4', 53 => '5', 54 => '6', 55 => '7', 56 => '8', 57 => '9', 58 => ':', 59 => ';', 60 => '\\ensuremath\{<\}', 'lt' => '\\ensuremath\{<\}', 61 => '\\ensuremath\{=\}', 62 => '\\ensuremath\{>\}', 'gt' => '\\ensuremath\{>\}', 63 => '\?', 64 => '@', 65 => 'A', 66 => 'B', 67 => 'C', 68 => 'D', 69 => 'E', 70 => 'F', 71 => 'G', 72 => 'H', 73 => 'I', 74 => 'J', 75 => 'K', 76 => 'L', 77 => 'M', 78 => 'N', 79 => 'O', 80 => 'P', 81 => 'Q', 82 => 'R', 83 => 'S', 84 => 'T', 85 => 'U', 86 => 'V', 87 => 'W', 88 => 'X', 89 => 'Y', 90 => 'Z', 91 => '[', 92 => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing. 93 => ']', 94 => '\\ensuremath\{\\wedge\}', 95 => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _ 96 => '`', 97 => 'a', 98 => 'b', 99 => 'c', 100 => 'd', 101 => 'e', 102 => 'f', 103 => 'g', 104 => 'h', 105 => 'i', 106 => 'j', 107 => 'k', 108 => 'l', 109 => 'm', 110 => 'n', 111 => 'o', 112 => 'p', 113 => 'q', 114 => 'r', 115 => 's', 116 => 't', 117 => 'u', 118 => 'v', 119 => 'w', 120 => 'x', 121 => 'y', 122 => 'z', 123 => '\\{', 124 => '\|', 125 => '\\}', 126 => '\~', # Controls and Latin-1 supplement. Note that some entities that have # visible effect are not printing unicode characters. Specifically # ‚-  130 => ',', 131 => '\\textflorin ', 132 => ',,', # Low double left quotes. 133 => '\\ensuremath\{\\ldots\}', 134 => '\\ensuremath\{\\dagger\}', 135 => '\\ensuremath\{\\ddagger\}', 136 => '\\ensuremath\{\\wedge\}', 137 => '\\textperthousand ', 138 => '\\v\{S\}', 139 => '\\ensuremath\{<\}', 140 => '\{\\OE\}', # There's a gap here in my entity table 145 => '\`', 146 => '\'', 147 => '\`\`', 148 => '\'\'', 149 => '\\ensuremath\{\\bullet\}', 150 => '--', 151 => '---', 152 => '\\ensuremath\{\\sim\}', 153 => '\\texttrademark', 154 => '\\v\{s\}', 155 => '\\ensuremath\{>\}', 156 => '\\oe ', # Another short gap: 159 => '\\"Y', 160 => '~', 'nbsp' => '~', 161 => '\\textexclamdown ', 'iexcl' => '\\textexclamdown ', 162 => '\\textcent ', 'cent' => '\\textcent ', 163 => '\\pounds ', 'pound' => '\\pounds ', 164 => '\\textcurrency ', 'curren' => '\\textcurrency ', 165 => '\\textyen ', 'yen' => '\\textyen ', 166 => '\\textbrokenbar ', 'brvbar' => '\\textbrokenbar ', 167 => '\\textsection ', 'sect' => '\\textsection ', 168 => '\\texthighdieresis ', 'uml' => '\\texthighdieresis ', 169 => '\\copyright ', 'copy' => '\\copyright ', 170 => '\\textordfeminine ', 'ordf' => '\\textordfeminine ', 171 => '\\ensuremath\{\ll\}', # approximation of left angle quote. 'laquo' => '\\ensuremath\{\ll\}', # "" 172 => '\\ensuremath\{\\neg\}', 'not' => '\\ensuremath\{\\neg\}', 173 => ' - ', 'shy' => ' - ', 174 => '\\textregistered ', 'reg' => '\\textregistered ', 175 => '\\ensuremath\{^\{-\}\}', 'macr' => '\\ensuremath\{^\{-\}\}', 176 => '\\ensuremath\{^\{\\circ\}\}', 'deg' => '\\ensuremath\{^\{\\circ\}\}', 177 => '\\ensuremath\{\\pm\}', 'plusmn' => '\\ensuremath\{\\pm\}', 178 => '\\ensuremath\{^2\}', 'sup2' => '\\ensuremath\{^2\}', 179 => '\\ensuremath\{^3\}', 'sup3' => '\\ensuremath\{^3\}', 180 => '\\textacute ', 'acute' => '\\textacute ', 181 => '\\ensuremath\{\\mu\}', 'micro' => '\\ensuremath\{\\mu\}', 182 => '\\P ', para => '\\P ', 183 => '\\ensuremath\{\\cdot\}', 'middot' => '\\ensuremath\{\\cdot\}', 184 => '\\c\{\\strut\}', 'cedil' => '\\c\{\\strut\}', 185 => '\\ensuremath\{^1\}', sup1 => '\\ensuremath\{^1\}', 186 => '\\textordmasculine ', 'ordm' => '\\textordmasculine ', 187 => '\\ensuremath\{\\gg\}', 'raquo' => '\\ensuremath\{\\gg\}', 188 => '\\textonequarter ', 'frac14' => '\\textonequarter ', 189 => '\\textonehalf' , 'frac12' => '\\textonehalf' , 190 => '\\textthreequarters ', 'frac34' => '\\textthreequarters ', 191 => '\\textquestiondown ', 'iquest' => '\\textquestiondown ', 192 => '\\\`\{A\}', 'Agrave' => '\\\`\{A\}', 193 => '\\\'\{A\}', 'Aacute' => '\\\'\{A\}', 194 => '\\^\{A\}', 'Acirc' => '\\^\{A\}', 195 => '\\~{A}', 'Atilde'=> '\\~{A}', 196 => '\\\"{A}', 'Auml' => '\\\"{A}', 197 => '{\\AA}', 'Aring' => '{\\AA}', 198 => '{\\AE}', 'AElig' => '{\\AE}', 199 => '\\c{c}', 'Ccedil'=> '\\c{c}', '200' => '\\\`{E}', 'Egrave'=> '\\\`{E}', 201 => '\\\'{E}', 'Eacute'=> '\\\'{E}', 202 => '\\\^{E}', 'Ecirc' => '\\\^{E}', 203 => '\\\"{E}', 'Euml' => '\\\"{E}', 204 => '\\\`{I}', 'Igrave'=> '\\\`{I}', 205 => '\\\'{I}', 'Iacute'=> '\\\'{I}', 206 => '\\\^{I}', 'Icirc' => '\\\^{I}', 207 => '\\\"{I}', 'Iuml' => '\\\"{I}', 208 => '\\OE', 'ETH' => '\\OE', 209 => '\\~{N}', 'Ntilde'=> '\\~{N}', 210 => '\\\`{O}', 'Ograve'=> '\\\`{O}', 211 => '\\\'{O}', 'Oacute'=> '\\\'{O}', 212 => '\\\^{O}', 'Ocirc' => '\\\^{O}', 213 => '\\~{O}', 'Otilde'=> '\\~{O}', 214 => '\\\"{O}', 'Ouml' => '\\\"{O}', 215 => '\\ensuremath\{\\times\}', 'times' => '\\ensuremath\{\\times\}', 216 => '\\O', 'Oslash'=> '\\O', 217 => '\\\`{U}', 'Ugrave'=> '\\\`{U}', 218 => '\\\'{U}', 'Uacute'=> '\\\'{U}', 219 => '\\\^{U}', 'Ucirc' => '\\\^{U}', 220 => '\\\"{U}', 'Uuml' => '\\\"{U}', 221 => '\\\'{Y}', 'Yacute'=> '\\\'{Y}', 222 => '\\TH', 'THORN' => '\\TH', 223 => '{\\sz}', 'szlig' => '{\\sz}', 224 => '\\\`{a}', 'agrave'=> '\\\`{a}', 225 => '\\\'{a}', 'aacute'=> '\\\'{a}', 226 => '\\\^{a}', 'acirc' => '\\\^{a}', 227 => '\\\~{a}', 'atilde'=> '\\\~{a}', 228 => '\\\"{a}', 'auml' => '\\\"{a}', 229 => '\\aa', 'aring' => '\\aa', 230 => '\\ae', 'aelig' => '\\ae', 231 => '\\c{c}', 'ccedil'=> '\\c{c}', 232 => '\\\`{e}', 'egrave'=> '\\\`{e}', 233 => '\\\'{e}', 'eacute'=> '\\\'{e}', 234 => '\\\^{e}', 'ecirc' => '\\\^{e}', 235 => '\\\"{e}', 'euml' => '\\\"{e}', 236 => '\\\`{i}', 'igrave'=> '\\\`{i}', 237 => '\\\'{i}', 'iacute'=> '\\\'{i}', 238 => '\\\^{i}', 'icirc' => '\\\^{i}', 239 => '\\\"{i}', 'iuml' => '\\\"{i}', 240 => '\\dh', 'eth' => '\\dh', 241 => '\\\~{n}', 'ntilde'=> '\\\~{n}', 242 => '\\\`{o}', 'ograve'=> '\\\`{o}', 243 => '\\\'{o}', 'oacute'=> '\\\'{o}', 244 => '\\\^{o}', 'ocirc' => '\\\^{o}', 245 => '\\\~{o}', 'otilde'=> '\\\~{o}', 246 => '\\\"{o}', 'ouml' => '\\\"{o}', 247 => '\\ensuremath\{\\div\}', 'divide'=> '\\ensuremath\{\\div\}', 248 => '{\\o}', 'oslash'=> '{\\o}', 249 => '\\\`{u}', 'ugrave'=> '\\\`{u}', 250 => '\\\'{u}', 'uacute'=> '\\\'{u}', 251 => '\\\^{u}', 'ucirc' => '\\\^{u}', 252 => '\\\"{u}', 'uuml' => '\\\"{u}', 253 => '\\\'{y}', 'yacute'=> '\\\'{y}', 254 => '\\th', 'thorn' => '\\th', 255 => '\\\"{y}', 'yuml' => '\\\"{y}', # hbar entity number comes from the unicode charater: # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf # ISO also documents a 'planck' entity. 295 => '\\ensuremath\{\hbar\}', 'plank' => '\\ensuremath\{\hbar\}', # Latin extended-A HTML 4.01 entities: 338 => '\\OE', 'OElig' => '\\OE', 339 => '\\oe', 'oelig' => '\\oe', 352 => '\\v{S}', 'Scaron' => '\\v{S}', 353 => '\\v{s}', 'scaron' => '\\v{s}', 376 => '\\\"{Y}', 'Yuml' => '\\\"{Y}', # Latin extended B HTML 4.01 entities 402 => '\\ensuremath{f}', 'fnof' => '\\ensuremath{f}', # Spacing modifier letters: 710 => '\^{}', 'circ' => '\^{}', 732 => '\~{}', 'tilde' => '\~{}', # Greek uppercase: 913 => '\\ensuremath\{\\mathrm\{A\}\}', 'Alpha' => '\\ensuremath\{\\mathrm\{A\}\}', 914 => '\\ensuremath\{\\mathrm\{B\}\}', 'Beta' => '\\ensuremath\{\\mathrm\{B\}\}', 915 => '\\ensuremath\{\\Gamma\}', 'Gamma' => '\\ensuremath\{\\Gamma\}', 916 => '\\ensuremath\{\\Delta\}', 'Delta' => '\\ensuremath\{\\Delta\}', 917 => '\\ensuremath\{\\mathrm\{E\}\}', 'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}', 918 => '\\ensuremath\{\\mathrm\{Z\}\}', 'Zeta' => '\\ensuremath\{\\mathrm\{Z\}\}', 919 => '\\ensuremath\{\\mathrm\{H\}\}', 'Eta' => '\\ensuremath\{\\mathrm\{H\}\}', 920 => '\\ensuremath\{\\Theta\}', 'Theta' => '\\ensuremath\{\\Theta\}', 921 => '\\ensuremath\{\\mathrm\{I\}\}', 'Iota' => '\\ensuremath\{\\mathrm\{I\}\}', 922 => '\\ensuremath\{\\mathrm\{K\}\}', 'Kappa' => '\\ensuremath\{\\mathrm\{K\}\}', 923 => '\\ensuremath\{\\Lambda\}', 'Lambda' => '\\ensuremath\{\\Lambda\}', 924 => '\\ensuremath\{\\mathrm\{M\}\}', 'Mu' => '\\ensuremath\{\\mathrm\{M\}\}', 925 => '\\ensuremath\{\\mathrm\{N\}\}', 'Nu' => '\\ensuremath\{\\mathrm\{N\}\}', 926 => '\\ensuremath\{\\mathrm\{\\Xi\}', 'Xi' => '\\ensuremath\{\\mathrm\{\\Xi\}', 927 => '\\ensuremath\{\\mathrm\{O\}\}', 'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}', 928 => '\\ensuremath\{\\Pi\}', 'Pi' => '\\ensuremath\{\\Pi\}', 929 => '\\ensuremath\{\\mathrm\{P\}\}', 'Rho' => '\\ensuremath\{\\mathrm\{P\}\}', # Skips 930 931 => '\\ensuremath\{\Sigma\}', 'Sigma' => '\\ensuremath\{\Sigma\}', 932 => '\\ensuremath\{\\mathrm\{T\}\}', 'Tau' => '\\ensuremath\{\\mathrm\{T\}\}', 933 => '\\ensuremath\{\\Upsilon\}', 'Upsilon'=> '\\ensuremath\{\\Upsilon\}', 934 => '\\ensuremath\{\\Phi\}', 'Phi' => '\\ensuremath\{\\Phi\}', 935 => '\\ensuremath\{\\mathrm\{X\}\}', 'Chi' => '\\ensuremath\{\\mathrm\{X\}\}', 936 => '\\ensuremath\{\\Psi\}', 'Psi' => '\\ensuermath\{\\Psi\}', 937 => '\\ensuremath\{\\Omega\}', 'Omega' => '\\ensuremath\{\\Omega\}', # Greek lowercase: 945 => '\\ensuremath\{\\alpha\}', 'alpha' => '\\ensuremath\{\\alpha\}', 946 => '\\ensuremath\{\\beta\}', 'beta' => '\\ensuremath\{\\beta\}', 947 => '\\ensuremath\{\\gamma\}', 'gamma' => '\\ensuremath\{\\gamma\}', 948 => '\\ensuremath\{\\delta\}', 'delta' => '\\ensuremath\{\\delta\}', 949 => '\\ensuremath\{\\epsilon\}', 'epsilon'=> '\\ensuremath\{\\epsilon\}', 950 => '\\ensuremath\{\\zeta\}', 'zeta' => '\\ensuremath\{\\zeta\}', 951 => '\\ensuremath\{\\eta\}', 'eta' => '\\ensuremath\{\\eta\}', 952 => '\\ensuremath\{\\theta\}', 'theta' => '\\ensuremath\{\\theta\}', 953 => '\\ensuremath\{\\iota\}', 'iota' => '\\ensuremath\{\\iota\}', 954 => '\\ensuremath\{\\kappa\}', 'kappa' => '\\ensuremath\{\\kappa\}', 955 => '\\ensuremath\{\\lambda\}', 'lambda' => '\\ensuremath\{\\lambda\}', 956 => '\\ensuremath\{\\mu\}', 'mu' => '\\ensuremath\{\\mu\}', 957 => '\\ensuremath\{\\nu\}', 'nu' => '\\ensuremath\{\\nu\}', 958 => '\\ensuremath\{\\xi\}', 'xi' => '\\ensuremath\{\\xi\}', 959 => '\\ensuremath\{o\}', 'omicron'=> '\\ensuremath\{o\}', 960 => '\\ensuremath\{\\pi\}', 'pi' => '\\ensuremath\{\\pi\}', 961 => '\\ensuremath\{\\rho\}', 'rho' => '\\ensuremath\{\\rho\}', 962 => '\\ensuremath\{\\varsigma\}', 'sigmaf' => '\\ensuremath\{\\varsigma\}', 963 => '\\ensuremath\{\\sigma\}', 'sigma' => '\\ensuremath\{\\sigma\}', 964 => '\\ensuremath\{\\tau\}', 'tau' => '\\ensuremath\{\\tau\}', 965 => '\\ensuremath\{\\upsilon\}', 'upsilon'=> '\\ensuremath\{\\upsilon\}', 966 => '\\ensuremath\{\\phi\}', 'phi' => '\\ensuremath\{\\phi\}', 967 => '\\ensuremath\{\\chi\}', 'chi' => '\\ensuremath\{\\chi\}', 968 => '\\ensuremath\{\\psi\}', 'psi' => '\\ensuremath\{\\psi\}', 969 => '\\ensuremath\{\\omega\}', 'omega' => '\\ensuremath\{\\omega\}', 977 => '\\ensuremath\{\\vartheta\}', 'thetasym'=>'\\ensuremath\{\\vartheta\}', 978 => '\\ensuremath\{\\varUpsilon\}', 'upsih' => '\\ensuremath\{\\varUpsilon\}', 982 => '\\ensuremath\{\\varpi\}', 'piv' => '\\ensuremath\{\\varpi\}', # The general punctuation set: 8194, => '\\hspace{.5em}', 'enspc' => '\\hspace{.5em}', 8195 => '\\hspace{1.0em}', 'emspc' => '\\hspace{1.0em}', 8201 => '\\hspace{0.167em}', 'thinsp' => '\\hspace{0.167em}', 8204 => '\{\}', 'zwnj' => '\{\}', 8205 => '', 'zwj' => '', 8206 => '', 'lrm' => '', 8207 => '', 'rlm' => '', 8211 => '--', 'ndash' => '--', 8212 => '---', 'mdash' => '---', 8216 => '`', 'lsquo' => '`', 8217 => "'", 'rsquo' => "'", 8218 => '\\quotesinglebase', 'sbquo' => '\\quotesinglebase', 8220 => '``', 'ldquo' => '``', 8221 => "''", 'rdquo' => "''", 8222 => '\\quotedblbase', 'bdquo' => '\\quotedblbase', 8224 => '\\dagger', 'dagger' => '\\dagger', '8225' => '\\ddag', 'Dagger' => '\\ddag', 8226 => '\\textbullet', 'bull' => '\\textbullet', 8230 => '\\textellipsis', 'hellep' => '\\textellipsis', 8240 => '\\textperthousand', permil => '\\textperthousand', 8242 => '\\textquotesingle', 'prime' => '\\textquotesingle', 8243 => '\\textquotedbl', 'Prime' => '\\textquotedbl', 8249 => '\\guilsingleleft', 'lsaquo' => '\\guilsingleleft', 8250 => '\\guilsingleright', 'rsaquo' => '\\guilsingleright', 8254 => '\\textasciimacron', oline => '\\textasciimacron', 8260 => '\\textfractionsolidus', 'frasl' => '\\textfractionsolidus', 8364 => '\\texteuro', 'euro' => '\\texteuro', # Letter like symbols 8472 => '\\ensuremath\{\\wp\}', 'weierp' => '\\ensuremath\{\\wp\}', 8465 => '\\ensuremath\{\\Im\}', 'image' => '\\ensuremath\{\\Im\}', 8476 => '\\ensuremath{\\Re\}', 'real' => '\\ensuremath{\\Re\}', 8482 => '\\texttrademark', 'trade' => '\\texttrademark', 8501 => '\\ensuremath{\\aleph\}', 'alefsym'=> '\\ensuremath{\\aleph\}', # Arrows and then some (harpoons from Hon Kie). 8592 => '\\textleftarrow', 'larr' => '\\textleftarrow', 8593 => '\\textuparrow', 'uarr' => '\\textuparrow', 8594 => '\\textrightarrow', 'rarr' => '\\textrightarrow', 8595 => '\\textdownarrow', 'darr' => '\\textdownarrow', 8596 => '\\ensuremath\{\\leftrightarrow\}', 'harr' => '\\ensuremath\{\\leftrightarrow\}', 8598 => '\\ensuremath\{\\nwarrow\}', 8599 => '\\ensuremath\{\\nearrow\}', 8600 => '\\ensuremath\{\\searrow\}', 8601 => '\\ensuremath\{\\swarrow\}', 8605 => '\\ensuremath\{\\leadsto\}', 8614 => '\\ensuremath\{\\mapsto\}', 8617 => '\\ensuremath\{\\hookleftarrow\}', 8618 => '\\ensuremath\{\\hookrightarrow\}', 8629 => '\\ensuremath\{\\hookleftarrow\}', # not an exact match but best I know. 'crarr' => '\\ensuremath\{\\hookleftarrow\}', # not an exact match but best I know. 8636 => '\\ensuremath\{\\leftharpoonup\}', 8637 => '\\ensuremath\{\\leftharpoondown\}', 8640 => '\\ensuremath\{\\rightharpoonup\}', 8641 => '\\ensuremath\{\\rightharpoondown\}', 8652 => '\\ensuremath\{\\rightleftharpoons\}', 8656 => '\\ensuremath\{\\Leftarrow\}', 'lArr' => '\\ensuremath\{\\Leftarrow\}', 8657 => '\\ensuremath\{\\Uparrow\}', 'uArr' => '\\ensuremath\{\\Uparrow\}', 8658 => '\\ensuremath\{\\Rightarrow\}', 'rArr' => '\\ensuremath\{\\Rightarrow\}', 8659 => '\\ensuremath\{\\Downarrow\}', 'dArr' => '\\ensuremath\{\\Downarrow\}', 8660 => '\\ensuremath\{\\Leftrightarrow\}', 'vArr' => '\\ensuremath\{\\Updownarrow\}', 8661 => '\\ensuremath\{\\Updownarrow\}', 'lAarr' => '\\ensuremath\{\\Lleftarrow\}', 8666 => '\\ensuremath\{\\Lleftarrow\}', 'rAarr' => '\\ensuremath\{\\Rrightarrow\}', 8667 => '\\ensuremath\{\\Rrightarrow\}', 'rarrw' => '\\ensuremath\{\\rightsquigarrow\}', 8669 => '\\ensuremath\{\\rightsquigarrow\}', # Mathematical operators. 'forall' => '\\ensuremath\{\\forall\}', 8704 => '\\ensuremath\{\\forall\}', 'comp' => '\\ensuremath\{\\complement\}', 8705 => '\\ensuremath\{\\complement\}', 'part' => '\\ensuremath\{\\partial\}', 8706 => '\\ensuremath\{\\partial\}', 'exist' => '\\ensuremath\{\\exists\}', 8707 => '\\ensuremath\{\\exists\}', 'nexist' => '\\ensuremath\{\\nexists\}', 8708 => '\\ensuremath\{\\nexists\}', 'empty' => '\\ensuremath\{\\emptysset\}', 8709 => '\\ensuremath\{\\emptysset\}', 8710 => '\\ensuremath\{\\Delta\}', 'nabla' => '\\ensuremath\{\\nabla\}', 8711 => '\\ensuremath\{\\nabla\}', 'isin' => '\\ensuremath\{\\in\}', 8712 => '\\ensuremath\{\\in\}', 'notin' => '\\ensuremath\{\\notin\}', 8713 => '\\ensuremath\{\\notin\}', ni => '\\ensuremath\{\\ni\}', 8715 => '\\ensuremath\{\\ni\}', 8716 => '\\ensuremath\{\\not\\ni\}', 'prod' => '\\ensuremath\{\\prod\}', 8719 => '\\ensuremath\{\\prod\}', 8720 => '\\ensuremath\{\\coprod\}', 'sum' => '\\ensuremath\{\\sum\}', 8721 => '\\ensuremath\{\\sum\}', 'minus' => '\\ensuremath\{-\}', 8722 => '\\ensuremath\{-\}', 8723 => '\\ensuremath\{\\mp\}', 8724 => '\\ensuremath\{\\dotplus\}', 8725 => '\\ensuremath\{\\diagup\}', 8726 => '\\ensuremath\{\\smallsetminus\}', 'lowast' => '\\ensuremath\{*\}', 8727 => '\\ensuremath\{*\}', 8728 => '\\ensuremath\{\\circ\}', 8729 => '\\ensuremath\{\\bullet\}', 'radic' => '\\ensuremath\{\\surd\}', 8730 => '\\ensuremath\{\\surd\}', 8731 => '\\ensuremath\{\\sqrt[3]\{\}\}', 8732 => '\\ensuremath\{\\sqrt[4]\{\}\}', 'prop' => '\\ensuremath\{\\propto\}', 8733 => '\\ensuremath\{\\propto\}', 'infin' => '\\ensuremath\{\\infty\}', 8734 => '\\ensuremath\{\\infty\}', 'ang90' => '\\ensuremath\{\\sqangle\}', 8735 => '\\ensuremath\{\\sqangle\}', 'ang' => '\\ensuremath\{\\angle\}', 8736 => '\\ensuremath\{\\angle\}', 'angmsd' => '\\ensuremath\{\\measuredangle\}', 8737 => '\\ensuremath\{\\measuredangle\}', 'angsph' => '\\ensuremath\{\\sphiericalangle\}', 8738 => '\\ensuremath\{\\sphiericalangle\}', 8739 => '\\ensuremath\{\\vert\}', 8740 => '\\ensuremath\{\\Vert\}', 'and' => '\\ensuremath\{\\land\}', 8743 => '\\ensuremath\{\\land\}', 'or' => '\\ensuremath\{\\lor\}', 8744 => '\\ensuremath\{\\lor\}', 'cap' => '\\ensuremath\{\\cap\}', 8745 => '\\ensuremath\{\\cap\}', 'cup' => '\\ensuremath\{\\cup\}', 8746 => '\\ensuremath\{\\cup\}', 'int' => '\\ensuremath\{\\int\}', 8747 => '\\ensuremath\{\\int\}', 'conint' => '\\ensuremath\{\\oint\}', 8750 => '\\ensuremath\{\\oint\}', 'there4' => '\\ensuremath\{\\therefore\}', 8756 => '\\ensuremath\{\\therefore\}', 'becaus' => '\\ensuremath\{\\because\}', 8757 => '\\ensuremath\{\\because\}', 8758 => '\\ensuremath\{:\}', 8759 => '\\ensuremath\{::\}', 'sim' => '\\ensuremath\{\\sim\}', 8764 => '\\ensuremath\{\\sim\}', 8765 => '\\ensuremath\{\\backsim\}', 'wreath' => '\\ensuremath\{\\wr\}', 8768 => '\\ensuremath\{\\wr\}', 'nsim' => '\\ensuremath\{\\not\sim\}', 8769 => '\\ensuremath\{\\not\sim\}', # 'asymp' => '\\ensuremath\{\\asymp\}', ≈ is actually a different glyph. 8771 => '\\ensuremath\{\\asymp\}', 8772 => '\\ensuremath\{\\not\\asymp\}', 'cong' => '\\ensuremath\{\\cong\}', 8773 => '\\ensuremath\{\\cong\}', 8775 => '\\ensuremath\{\\ncong\}', 8778 => '\\ensuremath\{\\approxeq\}', 8784 => '\\ensuremath\{\\doteq\}', 8785 => '\\ensuremath\{\\doteqdot\}', 8786 => '\\ensuremath\{\\fallingdotseq\}', 8787 => '\\ensuremath\{\\risingdotseq\}', 8788 => '\\ensuremath\{:=\}', 8789 => '\\ensuremath\{=:\}', 8790 => '\\ensuremath\{\\eqcirc\}', 8791 => '\\ensuremath\{\\circeq\}', 'wedgeq' => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}', 8792 => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}', 8794 => '\\ensuremath\{\\stackrel\{\\vee\}\{=\}\}', 8795 => '\\ensuremath\{\\stackrel\{\\star}\{=\}\}', 8796 => '\\ensuremath\{\\triangleeq\}', 8797 => '\\ensuremath\{\\stackrel\{def\}\{=\}\}', 8798 => '\\ensuremath\{\\stackrel\{m\}\{=\}\}', 8799 => '\\ensuremath\{\\stackrel\{?\}\{=\}\}', 'ne' => '\\ensuremath\{\\neq\}', 8800 => '\\ensuremath\{\\neq\}', 'equiv' => '\\ensuremath\{\\equiv\}', 8801 => '\\ensuremath\{\\equiv\}', 8802 => '\\ensuremath\{\\not\\equiv\}', 'le' => '\\ensuremath\{\\leq\}', 8804 => '\\ensuremath\{\\leq\}', 'ge' => '\\ensuremath\{\\geq\}', 8805 => '\\ensuremath\{\\geq\}', 8806 => '\\ensuremath\{\\leqq\}', 8807 => '\\ensuremath\{\\geqq\}', 8810 => '\\ensuremath\{\\ll\}', 8811 => '\\ensuremath\{\\gg\}', 'twixt' => '\\ensuremath\{\\between\}', 8812 => '\\ensuremath\{\\between\}', 8813 => '\\ensuremath\{\\not\\asymp\}', 8814 => '\\ensuremath\{\\not<\}', 8815 => '\\ensuremath\{\\not>\}', 8816 => '\\ensuremath\{\\not\\leqslant\}', 8817 => '\\ensuremath\{\\not\\geqslant\}', 8818 => '\\ensuremath\{\\lessim\}', 8819 => '\\ensuremath\{\\gtrsim\}', 8820 => '\\ensuremath\{\\stackrel\{<\}\{>\}\}', 8821 => '\\ensuremath\{\\stackrel\{>\}\{<\}\}', 8826 => '\\ensuremath\{\\prec\}', 8827 => '\\ensuremath\{\\succ\}', 8828 => '\\ensuremath\{\\preceq\}', 8829 => '\\ensuremath\{\\succeq\}', 8830 => '\\ensuremath\{\\not\\prec\}', 8831 => '\\ensuremath\{\\not\\succ\}', 'sub' => '\\ensuremath\{\\subset\}', 8834 => '\\ensuremath\{\\subset\}', 'sup' => '\\ensuremath\{\\supset\}', 8835 => '\\ensuremath\{\\supset\}', 'nsub' => '\\ensuremath\{\\not\\subset\}', 8836 => '\\ensuremath\{\\not\\subset\}', 8837 => '\\ensuremath\{\\not\\supset\}', 'sube' => '\\ensuremath\{\\subseteq\}', 8838 => '\\ensuremath\{\\subseteq\}', 'supe' => '\\ensuermath\{\\supseteq\}', 8839 => '\\ensuermath\{\\supseteq\}', 8840 => '\\ensuremath\{\\nsubseteq\}', 8841 => '\\ensuremath\{\\nsupseteq\}', 8842 => '\\ensuremath\{\\subsetneq\}', 8843 => '\\ensuremath\{\\supsetneq\}', 8847 => '\\ensuremath\{\\sqsubset\}', 8848 => '\\ensuremath\{\\sqsupset\}', 8849 => '\\ensuremath\{\\sqsubseteq\}', 8850 => '\\ensuremath\{\\sqsupseteq\}', 8851 => '\\ensuremath\{\\sqcap\}', 8852 => '\\ensuremath\{\\sqcup\}', 'oplus' => '\\ensuremath\{\\oplus\}', 8853 => '\\ensuremath\{\\oplus\}', 8854 => '\\ensuremath\{\\ominus\}', 'otimes' => '\\ensuremath\{\\otimes\}', 8855 => '\\ensuremath\{\\otimes\}', 8856 => '\\ensuremath\{\\oslash\}', 8857 => '\\ensuremath\{\\odot\}', 8858 => '\\ensuremath\{\\circledcirc\}', 8859 => '\\ensuremath\{\\circledast\}', 8861 => '\\ensuremath\{\\ominus\}', # Close enough for government work. 8862 => '\\ensuremath\{\\boxplus\}', 8863 => '\\ensuremath\{\\boxminus\}', 8864 => '\\ensuremath\{\\boxtimes\}', 8865 => '\\ensuremath\{\\boxdot\}', 'vdash' => '\\ensuremath\{\\vdash\}', 8866 => '\\ensuremath\{\\vdash\}', 'dashv' => '\\ensuremath\{\\dashv\}', 8867 => '\\ensuremath\{\\dashv\}', 'perp' => '\\ensuremath\{\\perp\}', 8869 => '\\ensuremath\{\\perp\}', 8871 => '\\ensuremath\{\\models\}', 8872 => '\\ensuremath\{\\vDash\}', 8873 => '\\ensuremath\{\\Vdash\}', 8874 => '\\ensuremath\{\\Vvdash\}', 8876 => '\\ensuremath\{\\nvdash\}', 8877 => '\\ensuremath\{\\nvDash\}', 8878 => '\\ensuremath\{\\nVdash\}', 8880 => '\\ensuremath\{\\prec\}', 8881 => '\\ensuremath\{\\succ\}', 8882 => '\\ensuremath\{\\vartriangleleft\}', 8883 => '\\ensuremath\{\\vartriangleright\}', 8884 => '\\ensuremath\{\\trianglelefteq\}', 8885 => '\\ensuremath\{\\trianglerighteq\}', 8891 => '\\ensuremath\{\\veebar\}', 8896 => '\\ensuremath\{\\land\}', 8897 => '\\ensuremath\{\\lor\}', 8898 => '\\ensuremath\{\\cap\}', 8899 => '\\ensuremath\{\\cup\}', 8900 => '\\ensuremath\{\\diamond\}', 'sdot' => '\\ensuremath\{\\cdot\}', 8901 => '\\ensuremath\{\\cdot\}', 8902 => '\\ensuremath\{\\star\}', 8903 => '\\ensuremath\{\\divideontimes\}', 8904 => '\\ensuremath\{\\bowtie\}', 8905 => '\\ensuremath\{\\ltimes\}', 8906 => '\\ensuremath\{\\rtimes\}', 8907 => '\\ensuremath\{\\leftthreetimes\}', 8908 => '\\ensuremath\{\\rightthreetimes\}', 8909 => '\\ensuremath\{\\simeq\}', 8910 => '\\ensuremath\{\\curlyvee\}', 8911 => '\\ensuremath\{\\curlywedge\}', 8912 => '\\ensuremath\{\\Subset\}', 8913 => '\\ensuremath\{\\Supset\}', 8914 => '\\ensuremath\{\\Cap\}', 8915 => '\\ensuremath\{\\Cup\}', 8916 => '\\ensuremath\{\\pitchfork\}', 8918 => '\\ensuremath\{\\lessdot\}', 8919 => '\\ensuremath\{\\gtrdot\}', 8920 => '\\ensuremath\{\\lll\}', 8921 => '\\ensuremath\{\\ggg\}', 8922 => '\\ensuremath\{\\gtreqless\}', 8923 => '\\ensuremath\{\\lesseqgtr\}', 8924 => '\\ensuremath\{\\eqslantless\}', 8925 => '\\ensuremath\{\\eqslantgtr\}', 8926 => '\\ensuremath\{\\curlyeqprec\}', 8927 => '\\ensuremath\{\\curlyeqsucc\}', 8928 => '\\ensuremath\{\\not\\preccurlyeq\}', 8929 => '\\ensuremath\{\\not\\succurlyeq\}', 8930 => '\\ensuremath\{\\not\\sqsupseteq\}', 8931 => '\\ensuremath\{\\not\\sqsubseteq\}', 8938 => '\\ensuremath\{\\not\\vartriangleleft\}', 8939 => '\\ensuremath\{\\not\vartriangleright\}', 8940 => '\\ensuremath\{\\not\trianglelefteq\}', 8941 => '\\ensuremath\{\\not\trianglerighteq\}', 8942 => '\\ensuremath\{\\vdots\}', 8960 => '\\ensuremath\{\\varnothing\}', 'lceil' => '\\ensuremath\{\\lceil\}', 8968 => '\\ensuremath\{\\lceil\}', 'rceil' => '\\ensuremath\{\\rceil\}', 8969 => '\\ensuremath\{\\rceil\}', 'lfloor' => '\\ensuremath\{\\lfloor\}', 8970 => '\\ensuremath\{\\lfloor\}', 'rfloor' => '\\ensuremath\{\\rfloor}', 8971 => '\\ensuremath\{\\rfloor}', 'lang' => '\\ensuremath\{\\langle\}', 9001 => '\\ensuremath\{\\langle\}', 'rang' => '\\ensuremath\{\\rangle\}', 9002 => '\\ensuremath\{\\rangle\}', 'loz' => '\\ensuremath\{\\lozenge\}', 9674 => '\\ensuremath\{\\lozenge\}', 'spades' => '\\ensuremath\{\\spadesuit\}', 9824 => '\\ensuremath\{\\spadesuit\}', 9825 => '\\ensuremath\{\\heartsuit\}', 9826 => '\\ensuremath\{\\diamondsuit\}', 'clubs' => '\\ensuremath\{\\clubsuit\}', 9827 => '\\ensuremath\{\\clubsuit\}', 'diams' => '\\ensuremath\{\\blacklozenge\}', 9830 => '\\ensuremath\{\\blacklozenge\}' ); # # Convert a numerical entity (that does not exist in our hash) # to its UTF-8 equivalent representation. # This allows us to support, to some extent, any entity for which # dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). # # Parameters: # unicode - The unicode for the character. This is assumed to # be a decimal value # Returns: # The UTF-8 equiavalent of the value. # sub entity_to_utf8 { my ($unicode) = @_; return pack("U", $unicode); } # # Convert an entity to the corresponding LateX if possible. # If not possible, and the entity is numeric, # the entity is treated like a Unicode character and converted # to UTF-8 which should display as long as dvipdf can find the # appropriate glyph. # # The entity is assumed to have already had the # &# ; or & ; removed # # Parameters: # entity - Name of entity to convert. # Returns: # One of the following: # - Latex string that produces the entity. # - UTF-8 equivalent of a numeric entity for which we don't have a latex string. # - ' ' for text entities for which there's no latex equivalent. # sub entity_to_latex { my ($entity) = @_; # Try to look up the entity (text or numeric) in the hash: my $latex = $entities{"$entity"}; if (defined $latex) { return $latex; } # If the text is purely numeric we can do the UTF-8 conversion: if ($entity =~ /^\d$/) { return &entity_to_utf8($entity); } # Can't do the conversion`< ... return " "; } # # Convert all the entities in a string. # We locate all the entities, pass them into entity_to_latex and # and replace occurences in the input string. # The assumption is that there are few entities in any string/document # so this looping is not too bad. The advantage of looping vs. regexping is # that we now can use lookup tables for the translation in entity_to_latex above. # # Parameters: # input - Input string/document # Returns # input with entities replaced by latexable stuff (UTF-8 encodings or # latex control strings to produce the entity. # # sub replace_entities { my ($input) = @_; my $start; my $end; my $entity; my $latex; # First the &#nnn; entities: while ($input =~ /(&\#\d+;)/) { ($start) = @-; ($end) = @+; $entity = substr($input, $start+2, $end-$start-3); $latex = &entity_to_latex($entity); substr($input, $start, $end-$start) = $latex; } # Now the &text; entites; while ($input =~/(&\w+;)/) { ($start) = @-; ($end) = @+; $entity = substr($input, $start+1, $end-$start-2); $latex = &entity_to_latex($entity); substr($input, $start, $end-$start) = $latex; } return $input; } 1; __END__