# The LearningOnline Network # entity -> tex. # # $Id: # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # http://www.lon-capa.org/ # # package Apache::entities; use strict; # # This file contains a table driven entity-->latex converter. # # Assumptions: # The number of entities in a resource is small compared with the # number of possible entities that might be translated. # Therefore the strategy is to match a general entity pattern # &.+; over and over, pull out the match look it up in an entity -> tex hash # and do the replacement. # # In order to simplify the hash, the following reductions are done: # &#d+; have the &# and ; stripped and is converted to an int. # &#.+; have the &#x and ; stripped and is converted to an int as a hex # value. # All others have the & and ; stripped. # The hash: Add new conversions here; leave off the leading & and the trailing ; # all numeric entities need only appear as their decimal versions # (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well. # # This entity table is mercilessly cribbed from the HTML pocket reference # table starting at pg 82. In most cases the LaTeX equivalent codes come from # the original massive regular expression replacements originally by # A. Sakharuk in lonprintout.pm # # I also want to acknowledge # ISO Character entities and their LaTeX equivalents by # Vidar Bronken Gundersen, and Rune Mathisen # http://www.bitjungle.com/isoent-ref.pdf # # Note numerical entities are essentially unicode character codes. # package Apache::entities; my %entities = ( # ---- ASCII code page: ---------------- # Translation to empty strings: 7 => "", 9 => "", 10 => "", 13 => "", # Translations to simple characters: 32 => " ", 33 => "!", 34 => '"', 'quot' => '"', 35 => '\\#', 36 => '\\$', 37 => '\%', 38 => '\&', 'amp' => '\&', 39 => '\'', # Apostrophe 40 => '(', 41 => ')', 42 => '*', 43 => '+', 44 => ',', # comma 45 => '-', 46 => '.', 47 => '/', 48 => '0', 49 => '1', 50 => '2', 51 => '3', 52 => '4', 53 => '5', 54 => '6', 55 => '7', 56 => '8', 57 => '9', 58 => ':', 59 => ';', 60 => '\ensuremath{<}', 'lt' => '\ensuremath{<}', 61 => '\ensuremath{=}', 62 => '\ensuremath{>}', 'gt' => '\ensuremath{>}', 63 => '?', 64 => '@', 65 => 'A', 66 => 'B', 67 => 'C', 68 => 'D', 69 => 'E', 70 => 'F', 71 => 'G', 72 => 'H', 73 => 'I', 74 => 'J', 75 => 'K', 76 => 'L', 77 => 'M', 78 => 'N', 79 => 'O', 80 => 'P', 81 => 'Q', 82 => 'R', 83 => 'S', 84 => 'T', 85 => 'U', 86 => 'V', 87 => 'W', 88 => 'X', 89 => 'Y', 90 => 'Z', 91 => '[', 92 => '\ensuremath{\setminus}', # \setminus is \ with special spacing. 93 => ']', 94 => '\ensuremath{\wedge}', 95 => '\underline{\makebox[2mm]{\strut}}', # Underline 2mm of space for _ 96 => '`', 97 => 'a', 98 => 'b', 99 => 'c', 100 => 'd', 101 => 'e', 102 => 'f', 103 => 'g', 104 => 'h', 105 => 'i', 106 => 'j', 107 => 'k', 108 => 'l', 109 => 'm', 110 => 'n', 111 => 'o', 112 => 'p', 113 => 'q', 114 => 'r', 115 => 's', 116 => 't', 117 => 'u', 118 => 'v', 119 => 'w', 120 => 'x', 121 => 'y', 122 => 'z', 123 => '\{', 124 => '|', 125 => '\}', 126 => '\~', # Controls and Latin-1 supplement. Note that some entities that have # visible effect are not printing unicode characters. Specifically # ‚-  130 => ',', 131 => '\ensuremath{f}', 132 => ',,', # Low double left quotes. 133 => '\ensuremath{\ldots}', 134 => '\ensuremath{\dagger}', 135 => '\ensuremath{\ddagger}', 136 => '\ensuremath{\wedge}', 137 => '\textperthousand ', 138 => '\v{S}', 139 => '\ensuremath{<}', 140 => '{\OE}', # There's a gap here in my entity table 145 => '`', 146 => '\'', 147 => '``', 148 => '\'\'', 149 => '\ensuremath{\bullet}', 150 => '--', 151 => '---', 152 => '\ensuremath{\sim}', 153 => '\texttrademark', 154 => '\v{s}', 155 => '\ensuremath{>}', 156 => '\oe ', # Another short gap: 159 => '\"Y', 160 => '~', 'nbsp' => '~', 161 => '\textexclamdown ', 'iexcl' => '\textexclamdown ', 162 => '\textcent ', 'cent' => '\textcent ', 163 => '\pounds ', 'pound' => '\pounds ', 164 => '\textcurrency ', 'curren' => '\textcurrency ', 165 => '\textyen ', 'yen' => '\textyen ', 166 => '\textbrokenbar ', 'brvbar' => '\textbrokenbar ', 167 => '\textsection ', 'sect' => '\textsection ', 168 => '\"{}', 'uml' => '\"{}', 169 => '\copyright ', 'copy' => '\copyright ', 170 => '\textordfeminine ', 'ordf' => '\textordfeminine ', 171 => '\ensuremath{\ll}', # approximation of left angle quote. 'laquo' => '\ensuremath{\ll}', # "" 172 => '\ensuremath{\neg}', 'not' => '\ensuremath{\neg}', 173 => ' - ', 'shy' => ' - ', 174 => '\textregistered ', 'reg' => '\textregistered ', 175 => '\ensuremath{^{-}}', 'macr' => '\ensuremath{^{-}}', 176 => '\ensuremath{^{\circ}}', 'deg' => '\ensuremath{^{\circ}}', 177 => '\ensuremath{\pm}', 'plusmn' => '\ensuremath{\pm}', 178 => '\ensuremath{^2}', 'sup2' => '\ensuremath{^2}', 179 => '\ensuremath{^3}', 'sup3' => '\ensuremath{^3}', 180 => "\\'{}", 'acute' => "\\'{}", 181 => '\ensuremath{\mu}', 'micro' => '\ensuremath{\mu}', 182 => '\P ', para => '\P ', 183 => '\ensuremath{\cdot}', 'middot' => '\ensuremath{\cdot}', 184 => '\c{\strut}', 'cedil' => '\c{\strut}', 185 => '\ensuremath{^1}', sup1 => '\ensuremath{^1}', 186 => '\textordmasculine ', 'ordm' => '\textordmasculine ', 187 => '\ensuremath{\gg}', 'raquo' => '\ensuremath{\gg}', 188 => '\textonequarter ', 'frac14' => '\textonequarter ', 189 => '\textonehalf' , 'frac12' => '\textonehalf' , 190 => '\textthreequarters ', 'frac34' => '\textthreequarters ', 191 => '\textquestiondown ', 'iquest' => '\textquestiondown ', 192 => '\\`{A}', 'Agrave' => '\\`{A}', 193 => "\\'{A}", 'Aacute' => "\\'{A}", 194 => '\^{A}', 'Acirc' => '\^{A}', 195 => '\~{A}', 'Atilde'=> '\~{A}', 196 => '\\"{A}', 'Auml' => '\\"{A}', 197 => '{\AA}', 'Aring' => '{\AA}', 198 => '{\AE}', 'AElig' => '{\AE}', 199 => '\c{c}', 'Ccedil'=> '\c{c}', 200 => '\\`{E}', 'Egrave'=> '\\`{E}', 201 => "\\'{E}", 'Eacute'=> "\\'{E}", 202 => '\\^{E}', 'Ecirc' => '\\^{E}', 203 => '\\"{E}', 'Euml' => '\\"{E}', 204 => '\\`{I}', 'Igrave'=> '\\`{I}', 205 => "\\'{I}", 'Iacute'=> "\\'{I}", 206 => '\\^{I}', 'Icirc' => '\\^{I}', 207 => '\\"{I}', 'Iuml' => '\\"{I}', 208 => '\DH', 'ETH' => '\DH', 209 => '\~{N}', 'Ntilde'=> '\~{N}', 210 => '\\`{O}', 'Ograve'=> '\\`{O}', 211 => "\\'{O}", 'Oacute'=> "\\'{O}", 212 => '\\^{O}', 'Ocirc' => '\\^{O}', 213 => '\~{O}', 'Otilde'=> '\~{O}', 214 => '\\"{O}', 'Ouml' => '\\"{O}', 215 => '\ensuremath{\times}', 'times' => '\ensuremath{\times}', 216 => '\O', 'Oslash'=> '\O', 217 => '\\`{U}', 'Ugrave'=> '\\`{U}', 218 => "\\'{U}", 'Uacute'=> "\\'{U}", 219 => '\\^{U}', 'Ucirc' => '\\^{U}', 220 => '\\"{U}', 'Uuml' => '\\"{U}', 221 => "\\'{Y}", 'Yacute'=> "\\'{Y}", 223 => '{\ss}', 'szlig' => '{\ss}', 224 => '\\`{a}', 'agrave'=> '\\`{a}', 225 => "\\'{a}", 'aacute'=> "\\'{a}", 226 => '\\^{a}', 'acirc' => '\\^{a}', 227 => '\\~{a}', 'atilde'=> '\\~{a}', 228 => '\\"{a}', 'auml' => '\\"{a}', 229 => '\aa', 'aring' => '\aa', 230 => '\ae', 'aelig' => '\ae', 231 => '\c{c}', 'ccedil'=> '\c{c}', 232 => '\\`{e}', 'egrave'=> '\\`{e}', 233 => "\\'{e}", 'eacute'=> "\\'{e}", 234 => '\\^{e}', 'ecirc' => '\\^{e}', 235 => '\\"{e}', 'euml' => '\\"{e}', 236 => '\\`{i}', 'igrave'=> '\\`{i}', 237 => "\\'{i}", 'iacute'=> "\\'{i}", 238 => '\\^{i}', 'icirc' => '\\^{i}', 239 => '\\"{i}', 'iuml' => '\\"{i}', 241 => '\\~{n}', 'ntilde'=> '\\~{n}', 242 => '\\`{o}', 'ograve'=> '\\`{o}', 243 => "\\'{o}", 'oacute'=> "\\'{o}", 244 => '\\^{o}', 'ocirc' => '\\^{o}', 245 => '\\~{o}', 'otilde'=> '\\~{o}', 246 => '\\"{o}', 'ouml' => '\\"{o}', 247 => '\ensuremath{\div}', 'divide'=> '\ensuremath{\div}', 248 => '{\o}', 'oslash'=> '{\o}', 249 => '\\`{u}', 'ugrave'=> '\\`{u}', 250 => "\\'{u}", 'uacute'=> "\\'{u}", 251 => '\\^{u}', 'ucirc' => '\\^{u}', 252 => '\\"{u}', 'uuml' => '\\"{u}', 253 => "\\'{y}", 'yacute'=> "\\'{y}", 255 => '\\"{y}', 'yuml' => '\\"{y}', # hbar entity number comes from the unicode charater: # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf # ISO also documents a 'planck' entity. 295 => '\ensuremath{\hbar}', 'planck' => '\ensuremath{\hbar}', # Latin extended-A HTML 4.01 entities: 338 => '\OE', 'OElig' => '\OE', 339 => '\oe', 'oelig' => '\oe', 352 => '\v{S}', 'Scaron' => '\v{S}', 353 => '\v{s}', 'scaron' => '\v{s}', 376 => '\\"{Y}', 'Yuml' => '\\"{Y}', # Latin extended B HTML 4.01 entities 402 => '\ensuremath{f}', 'fnof' => '\ensuremath{f}', # Spacing modifier letters: 710 => '\^{}', 'circ' => '\^{}', 732 => '\~{}', 'tilde' => '\~{}', # Greek uppercase: 913 => '\ensuremath{\mathrm{A}}', 'Alpha' => '\ensuremath{\mathrm{A}}', 914 => '\ensuremath{\mathrm{B}}', 'Beta' => '\ensuremath{\mathrm{B}}', 915 => '\ensuremath{\Gamma}', 'Gamma' => '\ensuremath{\Gamma}', 916 => '\ensuremath{\Delta}', 'Delta' => '\ensuremath{\Delta}', 917 => '\ensuremath{\mathrm{E}}', 'Epsilon'=> '\ensuremath{\mathrm{E}}', 918 => '\ensuremath{\mathrm{Z}}', 'Zeta' => '\ensuremath{\mathrm{Z}}', 919 => '\ensuremath{\mathrm{H}}', 'Eta' => '\ensuremath{\mathrm{H}}', 920 => '\ensuremath{\Theta}', 'Theta' => '\ensuremath{\Theta}', 921 => '\ensuremath{\mathrm{I}}', 'Iota' => '\ensuremath{\mathrm{I}}', 922 => '\ensuremath{\mathrm{K}}', 'Kappa' => '\ensuremath{\mathrm{K}}', 923 => '\ensuremath{\Lambda}', 'Lambda' => '\ensuremath{\Lambda}', 924 => '\ensuremath{\mathrm{M}}', 'Mu' => '\ensuremath{\mathrm{M}}', 925 => '\ensuremath{\mathrm{N}}', 'Nu' => '\ensuremath{\mathrm{N}}', 926 => '\ensuremath{\mathrm{\Xi}}', 'Xi' => '\ensuremath{\mathrm{\Xi}}', 927 => '\ensuremath{\mathrm{O}}', 'Omicron'=> '\ensuremath{\mathrm{O}}', 928 => '\ensuremath{\Pi}', 'Pi' => '\ensuremath{\Pi}', 929 => '\ensuremath{\mathrm{P}}', 'Rho' => '\ensuremath{\mathrm{P}}', # Skips 930 931 => '\ensuremath{\Sigma}', 'Sigma' => '\ensuremath{\Sigma}', 932 => '\ensuremath{\mathrm{T}}', 'Tau' => '\ensuremath{\mathrm{T}}', 933 => '\ensuremath{\Upsilon}', 'Upsilon'=> '\ensuremath{\Upsilon}', 934 => '\ensuremath{\Phi}', 'Phi' => '\ensuremath{\Phi}', 935 => '\ensuremath{\mathrm{X}}', 'Chi' => '\ensuremath{\mathrm{X}}', 936 => '\ensuremath{\Psi}', 'Psi' => '\ensuremath{\Psi}', 937 => '\ensuremath{\Omega}', 'Omega' => '\ensuremath{\Omega}', # Greek lowercase: 945 => '\ensuremath{\alpha}', 'alpha' => '\ensuremath{\alpha}', 946 => '\ensuremath{\beta}', 'beta' => '\ensuremath{\beta}', 947 => '\ensuremath{\gamma}', 'gamma' => '\ensuremath{\gamma}', 948 => '\ensuremath{\delta}', 'delta' => '\ensuremath{\delta}', 949 => '\ensuremath{\epsilon}', 'epsilon'=> '\ensuremath{\epsilon}', 950 => '\ensuremath{\zeta}', 'zeta' => '\ensuremath{\zeta}', 951 => '\ensuremath{\eta}', 'eta' => '\ensuremath{\eta}', 952 => '\ensuremath{\theta}', 'theta' => '\ensuremath{\theta}', 953 => '\ensuremath{\iota}', 'iota' => '\ensuremath{\iota}', 954 => '\ensuremath{\kappa}', 'kappa' => '\ensuremath{\kappa}', 955 => '\ensuremath{\lambda}', 'lambda' => '\ensuremath{\lambda}', 956 => '\ensuremath{\mu}', 'mu' => '\ensuremath{\mu}', 957 => '\ensuremath{\nu}', 'nu' => '\ensuremath{\nu}', 958 => '\ensuremath{\xi}', 'xi' => '\ensuremath{\xi}', 959 => '\ensuremath{o}', 'omicron'=> '\ensuremath{o}', 960 => '\ensuremath{\pi}', 'pi' => '\ensuremath{\pi}', 961 => '\ensuremath{\rho}', 'rho' => '\ensuremath{\rho}', 962 => '\ensuremath{\varsigma}', 'sigmaf' => '\ensuremath{\varsigma}', 963 => '\ensuremath{\sigma}', 'sigma' => '\ensuremath{\sigma}', 964 => '\ensuremath{\tau}', 'tau' => '\ensuremath{\tau}', 965 => '\ensuremath{\upsilon}', 'upsilon'=> '\ensuremath{\upsilon}', 966 => '\ensuremath{\phi}', 'phi' => '\ensuremath{\phi}', 967 => '\ensuremath{\chi}', 'chi' => '\ensuremath{\chi}', 968 => '\ensuremath{\psi}', 'psi' => '\ensuremath{\psi}', 969 => '\ensuremath{\omega}', 'omega' => '\ensuremath{\omega}', 977 => '\ensuremath{\vartheta}', 'thetasym'=>'\ensuremath{\vartheta}', 978 => '\ensuremath{\mathit{\Upsilon}}', 'upsih' => '\ensuremath{\mathit{\Upsilon}}', 982 => '\ensuremath{\varpi}', 'piv' => '\ensuremath{\varpi}', # The general punctuation set: 8194, => '\hspace{.5em}', 'enspc' => '\hspace{.5em}', 8195 => '\hspace{1.0em}', 'emspc' => '\hspace{1.0em}', 8201 => '\hspace{0.167em}', 'thinsp' => '\hspace{0.167em}', 8204 => '{}', 'zwnj' => '{}', 8205 => '', 'zwj' => '', 8206 => '', 'lrm' => '', 8207 => '', 'rlm' => '', 8211 => '--', 'ndash' => '--', 8212 => '---', 'mdash' => '---', 8216 => '`', 'lsquo' => '`', 8217 => "'", 'rsquo' => "'", 8218 => '\quotesinglbase', 'sbquo' => '\quotesinglbase', 8220 => '``', 'ldquo' => '``', 8221 => "''", 'rdquo' => "''", 8222 => '\quotedblbase', 'bdquo' => '\quotedblbase', 8224 => '\ensuremath{\dagger}', 'dagger' => '\ensuremath{\dagger}', '8225' => '\ensuremath{\ddag}', 'Dagger' => '\ensuremath{\ddag}', 8226 => '\textbullet', 'bull' => '\textbullet', 8230 => '\textellipsis', 'hellep' => '\textellipsis', 8240 => '\textperthousand', permil => '\textperthousand', 8242 => '\textquotesingle', 'prime' => '\textquotesingle', 8243 => '\textquotedbl', 'Prime' => '\textquotedbl', 8249 => '\guilsinglleft', 'lsaquo' => '\guilsinglleft', 8250 => '\guilsinglright', 'rsaquo' => '\guilsinglright', 8254 => '\textasciimacron', oline => '\textasciimacron', 8260 => '\textfractionsolidus', 'frasl' => '\textfractionsolidus', 8364 => '\texteuro', 'euro' => '\texteuro', # Letter like symbols 8472 => '\ensuremath{\wp}', 'weierp' => '\ensuremath{\wp}', 8465 => '\ensuremath{\Im}', 'image' => '\ensuremath{\Im}', 8476 => '\ensuremath{\Re}', 'real' => '\ensuremath{\Re}', 8482 => '\texttrademark', 'trade' => '\texttrademark', 8501 => '\ensuremath{\aleph}', 'alefsym'=> '\ensuremath{\aleph}', # Arrows and then some (harpoons from Hon Kie). 8592 => '\textleftarrow', 'larr' => '\textleftarrow', 8593 => '\textuparrow', 'uarr' => '\textuparrow', 8594 => '\textrightarrow', 'rarr' => '\textrightarrow', 8595 => '\textdownarrow', 'darr' => '\textdownarrow', 8596 => '\ensuremath{\leftrightarrow}', 'harr' => '\ensuremath{\leftrightarrow}', 8598 => '\ensuremath{\nwarrow}', 8599 => '\ensuremath{\nearrow}', 8600 => '\ensuremath{\searrow}', 8601 => '\ensuremath{\swarrow}', 8605 => '\ensuremath{\leadsto}', 8614 => '\ensuremath{\mapsto}', 8617 => '\ensuremath{\hookleftarrow}', 8618 => '\ensuremath{\hookrightarrow}', 8629 => '\ensuremath{\hookleftarrow}', # not an exact match but best I know. 'crarr' => '\ensuremath{\hookleftarrow}', # not an exact match but best I know. 8636 => '\ensuremath{\leftharpoonup}', 8637 => '\ensuremath{\leftharpoondown}', 8640 => '\ensuremath{\rightharpoonup}', 8641 => '\ensuremath{\rightharpoondown}', 8652 => '\ensuremath{\rightleftharpoons}', 8656 => '\ensuremath{\Leftarrow}', 'lArr' => '\ensuremath{\Leftarrow}', 8657 => '\ensuremath{\Uparrow}', 'uArr' => '\ensuremath{\Uparrow}', 8658 => '\ensuremath{\Rightarrow}', 'rArr' => '\ensuremath{\Rightarrow}', 8659 => '\ensuremath{\Downarrow}', 'dArr' => '\ensuremath{\Downarrow}', 8660 => '\ensuremath{\Leftrightarrow}', 'hArr' => '\ensuremath{\Leftrightarrow}', 8661 => '\ensuremath{\Updownarrow}', 'vArr' => '\ensuremath{\Updownarrow}', 8666 => '\ensuremath{\Lleftarrow}', 'lAarr' => '\ensuremath{\Lleftarrow}', 8667 => '\ensuremath{\Rrightarrow}', 'rAarr' => '\ensuremath{\Rrightarrow}', 8669 => '\ensuremath{\rightsquigarrow}', 'rarrw' => '\ensuremath{\rightsquigarrow}', # Mathematical operators. 'forall' => '\ensuremath{\forall}', 8704 => '\ensuremath{\forall}', 'comp' => '\ensuremath{\complement}', 8705 => '\ensuremath{\complement}', 'part' => '\ensuremath{\partial}', 8706 => '\ensuremath{\partial}', 'exist' => '\ensuremath{\exists}', 8707 => '\ensuremath{\exists}', 'nexist' => '\ensuremath{\nexists}', 8708 => '\ensuremath{\nexists}', 'empty' => '\ensuremath{\emptyset}', 8709 => '\ensuremath{\emptyset}', 8710 => '\ensuremath{\Delta}', 'nabla' => '\ensuremath{\nabla}', 8711 => '\ensuremath{\nabla}', 'isin' => '\ensuremath{\in}', 8712 => '\ensuremath{\in}', 'notin' => '\ensuremath{\notin}', 8713 => '\ensuremath{\notin}', ni => '\ensuremath{\ni}', 8715 => '\ensuremath{\ni}', 8716 => '\ensuremath{\not\ni}', 'prod' => '\ensuremath{\prod}', 8719 => '\ensuremath{\prod}', 8720 => '\ensuremath{\coprod}', 'sum' => '\ensuremath{\sum}', 8721 => '\ensuremath{\sum}', 'minus' => '\ensuremath{-}', 8722 => '\ensuremath{-}', 8723 => '\ensuremath{\mp}', 8724 => '\ensuremath{\dotplus}', 8725 => '\ensuremath{\diagup}', 8726 => '\ensuremath{\smallsetminus}', 'lowast' => '\ensuremath{*}', 8727 => '\ensuremath{*}', 8728 => '\ensuremath{\circ}', 8729 => '\ensuremath{\bullet}', 'radic' => '\ensuremath{\surd}', 8730 => '\ensuremath{\surd}', 8731 => '\ensuremath{\sqrt[3]{}}', 8732 => '\ensuremath{\sqrt[4]{}}', 'prop' => '\ensuremath{\propto}', 8733 => '\ensuremath{\propto}', 'infin' => '\ensuremath{\infty}', 8734 => '\ensuremath{\infty}', # # The items below require the isoent latex package which I can't find at least for FC5. # Temporarily commented out. # # 'ang90' => '\ensuremath{\sqangle}', # 8735 => '\ensuremath{\sqangle}', 'ang' => '\ensuremath{\angle}', 8736 => '\ensuremath{\angle}', 'angmsd' => '\ensuremath{\measuredangle}', 8737 => '\ensuremath{\measuredangle}', 'angsph' => '\ensuremath{\sphericalangle}', 8738 => '\ensuremath{\sphericalangle}', 8739 => '\ensuremath{\vert}', 8740 => '\ensuremath{\Vert}', 'and' => '\ensuremath{\land}', 8743 => '\ensuremath{\land}', 'or' => '\ensuremath{\lor}', 8744 => '\ensuremath{\lor}', 'cap' => '\ensuremath{\cap}', 8745 => '\ensuremath{\cap}', 'cup' => '\ensuremath{\cup}', 8746 => '\ensuremath{\cup}', 'int' => '\ensuremath{\int}', 8747 => '\ensuremath{\int}', 'conint' => '\ensuremath{\oint}', 8750 => '\ensuremath{\oint}', 'there4' => '\ensuremath{\therefore}', 8756 => '\ensuremath{\therefore}', 'becaus' => '\ensuremath{\because}', 8757 => '\ensuremath{\because}', 8758 => '\ensuremath{:}', 8759 => '\ensuremath{::}', 'sim' => '\ensuremath{\sim}', 8764 => '\ensuremath{\sim}', 8765 => '\ensuremath{\backsim}', 'wreath' => '\ensuremath{\wr}', 8768 => '\ensuremath{\wr}', 'nsim' => '\ensuremath{\not\sim}', 8769 => '\ensuremath{\not\sim}', # 'asymp' => '\ensuremath{\asymp}', ≈ is actually a different glyph. 8771 => '\ensuremath{\asymp}', 8772 => '\ensuremath{\not\asymp}', 'cong' => '\ensuremath{\cong}', 8773 => '\ensuremath{\cong}', 8775 => '\ensuremath{\ncong}', 8778 => '\ensuremath{\approxeq}', 8784 => '\ensuremath{\doteq}', 8785 => '\ensuremath{\doteqdot}', 8786 => '\ensuremath{\fallingdotseq}', 8787 => '\ensuremath{\risingdotseq}', 8788 => '\ensuremath{:=}', 8789 => '\ensuremath{=:}', 8790 => '\ensuremath{\eqcirc}', 8791 => '\ensuremath{\circeq}', 'wedgeq' => '\ensuremath{\stackrel{\wedge}{=}}', 8792 => '\ensuremath{\stackrel{\wedge}{=}}', 8794 => '\ensuremath{\stackrel{\vee}{=}}', 8795 => '\ensuremath{\stackrel{\star}{=}}', 8796 => '\ensuremath{\triangleq}', 8797 => '\ensuremath{\stackrel{def}{=}}', 8798 => '\ensuremath{\stackrel{m}{=}}', 8799 => '\ensuremath{\stackrel{?}{=}}', 'ne' => '\ensuremath{\neq}', 8800 => '\ensuremath{\neq}', 'equiv' => '\ensuremath{\equiv}', 8801 => '\ensuremath{\equiv}', 8802 => '\ensuremath{\not\equiv}', 'le' => '\ensuremath{\leq}', 8804 => '\ensuremath{\leq}', 'ge' => '\ensuremath{\geq}', 8805 => '\ensuremath{\geq}', 8806 => '\ensuremath{\leqq}', 8807 => '\ensuremath{\geqq}', 8810 => '\ensuremath{\ll}', 8811 => '\ensuremath{\gg}', 'twixt' => '\ensuremath{\between}', 8812 => '\ensuremath{\between}', 8813 => '\ensuremath{\not\asymp}', 8814 => '\ensuremath{\not<}', 8815 => '\ensuremath{\not>}', 8816 => '\ensuremath{\not\leqslant}', 8817 => '\ensuremath{\not\geqslant}', 8818 => '\ensuremath{\lesssim}', 8819 => '\ensuremath{\gtrsim}', 8820 => '\ensuremath{\stackrel{<}{>}}', 8821 => '\ensuremath{\stackrel{>}{<}}', 8826 => '\ensuremath{\prec}', 8827 => '\ensuremath{\succ}', 8828 => '\ensuremath{\preceq}', 8829 => '\ensuremath{\succeq}', 8830 => '\ensuremath{\not\prec}', 8831 => '\ensuremath{\not\succ}', 'sub' => '\ensuremath{\subset}', 8834 => '\ensuremath{\subset}', 'sup' => '\ensuremath{\supset}', 8835 => '\ensuremath{\supset}', 'nsub' => '\ensuremath{\not\subset}', 8836 => '\ensuremath{\not\subset}', 8837 => '\ensuremath{\not\supset}', 'sube' => '\ensuremath{\subseteq}', 8838 => '\ensuremath{\subseteq}', 'supe' => '\ensuremath{\supseteq}', 8839 => '\ensuremath{\supseteq}', 8840 => '\ensuremath{\nsubseteq}', 8841 => '\ensuremath{\nsupseteq}', 8842 => '\ensuremath{\subsetneq}', 8843 => '\ensuremath{\supsetneq}', 8847 => '\ensuremath{\sqsubset}', 8848 => '\ensuremath{\sqsupset}', 8849 => '\ensuremath{\sqsubseteq}', 8850 => '\ensuremath{\sqsupseteq}', 8851 => '\ensuremath{\sqcap}', 8852 => '\ensuremath{\sqcup}', 'oplus' => '\ensuremath{\oplus}', 8853 => '\ensuremath{\oplus}', 8854 => '\ensuremath{\ominus}', 'otimes' => '\ensuremath{\otimes}', 8855 => '\ensuremath{\otimes}', 8856 => '\ensuremath{\oslash}', 8857 => '\ensuremath{\odot}', 8858 => '\ensuremath{\circledcirc}', 8859 => '\ensuremath{\circledast}', 8861 => '\ensuremath{\ominus}', # Close enough for government work. 8862 => '\ensuremath{\boxplus}', 8863 => '\ensuremath{\boxminus}', 8864 => '\ensuremath{\boxtimes}', 8865 => '\ensuremath{\boxdot}', 'vdash' => '\ensuremath{\vdash}', 8866 => '\ensuremath{\vdash}', 'dashv' => '\ensuremath{\dashv}', 8867 => '\ensuremath{\dashv}', 'perp' => '\ensuremath{\perp}', 8869 => '\ensuremath{\perp}', 8871 => '\ensuremath{\models}', 8872 => '\ensuremath{\vDash}', 8873 => '\ensuremath{\Vdash}', 8874 => '\ensuremath{\Vvdash}', 8876 => '\ensuremath{\nvdash}', 8877 => '\ensuremath{\nvDash}', 8878 => '\ensuremath{\nVdash}', 8880 => '\ensuremath{\prec}', 8881 => '\ensuremath{\succ}', 8882 => '\ensuremath{\vartriangleleft}', 8883 => '\ensuremath{\vartriangleright}', 8884 => '\ensuremath{\trianglelefteq}', 8885 => '\ensuremath{\trianglerighteq}', 8891 => '\ensuremath{\veebar}', 8896 => '\ensuremath{\land}', 8897 => '\ensuremath{\lor}', 8898 => '\ensuremath{\cap}', 8899 => '\ensuremath{\cup}', 8900 => '\ensuremath{\diamond}', 'sdot' => '\ensuremath{\cdot}', 8901 => '\ensuremath{\cdot}', 8902 => '\ensuremath{\star}', 8903 => '\ensuremath{\divideontimes}', 8904 => '\ensuremath{\bowtie}', 8905 => '\ensuremath{\ltimes}', 8906 => '\ensuremath{\rtimes}', 8907 => '\ensuremath{\leftthreetimes}', 8908 => '\ensuremath{\rightthreetimes}', 8909 => '\ensuremath{\simeq}', 8910 => '\ensuremath{\curlyvee}', 8911 => '\ensuremath{\curlywedge}', 8912 => '\ensuremath{\Subset}', 8913 => '\ensuremath{\Supset}', 8914 => '\ensuremath{\Cap}', 8915 => '\ensuremath{\Cup}', 8916 => '\ensuremath{\pitchfork}', 8918 => '\ensuremath{\lessdot}', 8919 => '\ensuremath{\gtrdot}', 8920 => '\ensuremath{\lll}', 8921 => '\ensuremath{\ggg}', 8922 => '\ensuremath{\gtreqless}', 8923 => '\ensuremath{\lesseqgtr}', 8924 => '\ensuremath{\eqslantless}', 8925 => '\ensuremath{\eqslantgtr}', 8926 => '\ensuremath{\curlyeqprec}', 8927 => '\ensuremath{\curlyeqsucc}', 8928 => '\ensuremath{\not\preccurlyeq}', 8929 => '\ensuremath{\not\succcurlyeq}', 8930 => '\ensuremath{\not\sqsupseteq}', 8931 => '\ensuremath{\not\sqsubseteq}', 8938 => '\ensuremath{\not\vartriangleleft}', 8939 => '\ensuremath{\not\vartriangleright}', 8940 => '\ensuremath{\not\trianglelefteq}', 8941 => '\ensuremath{\not\trianglerighteq}', 8942 => '\ensuremath{\vdots}', 8960 => '\ensuremath{\varnothing}', 'lceil' => '\ensuremath{\lceil}', 8968 => '\ensuremath{\lceil}', 'rceil' => '\ensuremath{\rceil}', 8969 => '\ensuremath{\rceil}', 'lfloor' => '\ensuremath{\lfloor}', 8970 => '\ensuremath{\lfloor}', 'rfloor' => '\ensuremath{\rfloor}', 8971 => '\ensuremath{\rfloor}', 'lang' => '\ensuremath{\langle}', 9001 => '\ensuremath{\langle}', 'rang' => '\ensuremath{\rangle}', 9002 => '\ensuremath{\rangle}', 'loz' => '\ensuremath{\lozenge}', 9674 => '\ensuremath{\lozenge}', 'spades' => '\ensuremath{\spadesuit}', 9824 => '\ensuremath{\spadesuit}', 9825 => '\ensuremath{\heartsuit}', 9826 => '\ensuremath{\diamondsuit}', 'clubs' => '\ensuremath{\clubsuit}', 9827 => '\ensuremath{\clubsuit}', 'diams' => '\ensuremath{\blacklozenge}', 9830 => '\ensuremath{\blacklozenge}' ); # There are some named entities that don't have a good # latex equivalent, these are converted to utf-8 via this table # of entity name -> unicode number. my %utf_table = ( 'THORN' => 222, 'thorn' => 254, 'eth' => 240, 'hearts' => 9829 ); # # Convert a numerical entity (that does not exist in our hash) # to its UTF-8 equivalent representation. # This allows us to support, to some extent, any entity for which # dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). # # Parameters: # unicode - The unicode for the character. This is assumed to # be a decimal value # Returns: # The UTF-8 equiavalent of the value. # sub entity_to_utf8 { my ($unicode) = @_; my $result = pack("U", $unicode); return $result; } # # Convert an entity to the corresponding LateX if possible. # If not possible, and the entity is numeric, # the entity is treated like a Unicode character and converted # to UTF-8 which should display as long as dvipdf can find the # appropriate glyph. # # The entity is assumed to have already had the # &# ; or & ; removed # # Parameters: # entity - Name of entity to convert. # Returns: # One of the following: # - Latex string that produces the entity. # - UTF-8 equivalent of a numeric entity for which we don't have a latex string. # - ' ' for text entities for which there's no latex equivalent. # sub entity_to_latex { my ($entity) = @_; # Try to look up the entity (text or numeric) in the hash: my $latex = $entities{"$entity"}; if (defined $latex) { return $latex; } # If the text is purely numeric we can do the UTF-8 conversion: # Otherwise there are a few textual entities that don't have good latex # which can be converted to unicode: # if ($entity =~ /^\d+$/) { return &entity_to_utf8($entity); } else { my $result = $utf_table{"$entity"}; if (defined $result) { return &entity_to_utf8($result); } } # Can't do the conversion`< ... return " "; } # # Convert all the entities in a string. # We locate all the entities, pass them into entity_to_latex and # and replace occurences in the input string. # The assumption is that there are few entities in any string/document # so this looping is not too bad. The advantage of looping vs. regexping is # that we now can use lookup tables for the translation in entity_to_latex above. # # Parameters: # input - Input string/document # Returns # input with entities replaced by latexable stuff (UTF-8 encodings or # latex control strings to produce the entity. # # sub replace_entities { my ($input) = @_; my $start; my $end; my $entity; my $latex; # First the &#nnn; entities: while ($input =~ /(&\#\d+;)/) { ($start) = @-; ($end) = @+; $entity = substr($input, $start+2, $end-$start-3); $latex = &entity_to_latex($entity); substr($input, $start, $end-$start) = $latex; } # Hexadecimal entities: while ($input =~ /&\#x(\d|[a-f,A-f])+;/) { ($start) = @-; ($end) = @+; $entity = "0" . substr($input, $start+2, $end-$start-3); # 0xhexnumber $latex = &entity_to_latex(hex($entity)); substr($input, $start, $end-$start) = $latex; } # Now the &text; entites; while ($input =~/(&\w+;)/) { ($start) = @-; ($end) = @+; $entity = substr($input, $start+1, $end-$start-2); $latex = &entity_to_latex($entity); substr($input, $start, $end-$start) = $latex; } return $input; } 1; __END__