--- loncom/interface/entities.pm 2008/02/11 11:35:46 1.1 +++ loncom/interface/entities.pm 2008/04/15 10:10:10 1.7 @@ -53,9 +53,17 @@ use strict; # the original massive regular expression replacements originally by # A. Sakharuk in lonprintout.pm # +# I also want to acknowledge +# ISO Character entities and their LaTeX equivalents by +# Vidar Bronken Gundersen, and Rune Mathisen +# http://www.bitjungle.com/isoent-ref.pdf +# + # Note numerical entities are essentially unicode character codes. # -my %entities = { +package Apache::entities; + +my %entities = ( # ---- ASCII code page: ---------------- @@ -175,7 +183,7 @@ my %entities = { 130 => ',', 131 => '\\textflorin ', 132 => ',,', # Low double left quotes. - 133 => '\\ensuremat\{\\ldots\}', + 133 => '\\ensuremath\{\\ldots\}', 134 => '\\ensuremath\{\\dagger\}', 135 => '\\ensuremath\{\\ddagger\}', 136 => '\\ensuremath\{\\wedge\}', @@ -499,6 +507,526 @@ my %entities = { 'zeta' => '\\ensuremath\{\\zeta\}', 951 => '\\ensuremath\{\\eta\}', 'eta' => '\\ensuremath\{\\eta\}', + 952 => '\\ensuremath\{\\theta\}', + 'theta' => '\\ensuremath\{\\theta\}', + 953 => '\\ensuremath\{\\iota\}', + 'iota' => '\\ensuremath\{\\iota\}', + 954 => '\\ensuremath\{\\kappa\}', + 'kappa' => '\\ensuremath\{\\kappa\}', + 955 => '\\ensuremath\{\\lambda\}', + 'lambda' => '\\ensuremath\{\\lambda\}', + 956 => '\\ensuremath\{\\mu\}', + 'mu' => '\\ensuremath\{\\mu\}', + 957 => '\\ensuremath\{\\nu\}', + 'nu' => '\\ensuremath\{\\nu\}', + 958 => '\\ensuremath\{\\xi\}', + 'xi' => '\\ensuremath\{\\xi\}', + 959 => '\\ensuremath\{o\}', + 'omicron'=> '\\ensuremath\{o\}', + 960 => '\\ensuremath\{\\pi\}', + 'pi' => '\\ensuremath\{\\pi\}', + 961 => '\\ensuremath\{\\rho\}', + 'rho' => '\\ensuremath\{\\rho\}', + 962 => '\\ensuremath\{\\varsigma\}', + 'sigmaf' => '\\ensuremath\{\\varsigma\}', + 963 => '\\ensuremath\{\\sigma\}', + 'sigma' => '\\ensuremath\{\\sigma\}', + 964 => '\\ensuremath\{\\tau\}', + 'tau' => '\\ensuremath\{\\tau\}', + 965 => '\\ensuremath\{\\upsilon\}', + 'upsilon'=> '\\ensuremath\{\\upsilon\}', + 966 => '\\ensuremath\{\\phi\}', + 'phi' => '\\ensuremath\{\\phi\}', + 967 => '\\ensuremath\{\\chi\}', + 'chi' => '\\ensuremath\{\\chi\}', + 968 => '\\ensuremath\{\\psi\}', + 'psi' => '\\ensuremath\{\\psi\}', + 969 => '\\ensuremath\{\\omega\}', + 'omega' => '\\ensuremath\{\\omega\}', + 977 => '\\ensuremath\{\\vartheta\}', + 'thetasym'=>'\\ensuremath\{\\vartheta\}', + 978 => '\\ensuremath\{\\varUpsilon\}', + 'upsih' => '\\ensuremath\{\\varUpsilon\}', + 982 => '\\ensuremath\{\\varpi\}', + 'piv' => '\\ensuremath\{\\varpi\}', + + + # The general punctuation set: + + 8194, => '\\hspace{.5em}', + 'enspc' => '\\hspace{.5em}', + 8195 => '\\hspace{1.0em}', + 'emspc' => '\\hspace{1.0em}', + 8201 => '\\hspace{0.167em}', + 'thinsp' => '\\hspace{0.167em}', + 8204 => '\{\}', + 'zwnj' => '\{\}', + 8205 => '', + 'zwj' => '', + 8206 => '', + 'lrm' => '', + 8207 => '', + 'rlm' => '', + 8211 => '--', + 'ndash' => '--', + 8212 => '---', + 'mdash' => '---', + 8216 => '`', + 'lsquo' => '`', + 8217 => "'", + 'rsquo' => "'", + 8218 => '\\quotesinglebase', + 'sbquo' => '\\quotesinglebase', + 8220 => '``', + 'ldquo' => '``', + 8221 => "''", + 'rdquo' => "''", + 8222 => '\\quotedblbase', + 'bdquo' => '\\quotedblbase', + 8224 => '\\dagger', + 'dagger' => '\\dagger', + '8225' => '\\ddag', + 'Dagger' => '\\ddag', + 8226 => '\\textbullet', + 'bull' => '\\textbullet', + 8230 => '\\textellipsis', + 'hellep' => '\\textellipsis', + 8240 => '\\textperthousand', + permil => '\\textperthousand', + 8242 => '\\textquotesingle', + 'prime' => '\\textquotesingle', + 8243 => '\\textquotedbl', + 'Prime' => '\\textquotedbl', + 8249 => '\\guilsingleleft', + 'lsaquo' => '\\guilsingleleft', + 8250 => '\\guilsingleright', + 'rsaquo' => '\\guilsingleright', + 8254 => '\\textasciimacron', + oline => '\\textasciimacron', + 8260 => '\\textfractionsolidus', + 'frasl' => '\\textfractionsolidus', + 8364 => '\\texteuro', + 'euro' => '\\texteuro', + + # Letter like symbols + + + 8472 => '\\ensuremath\{\\wp\}', + 'weierp' => '\\ensuremath\{\\wp\}', + 8465 => '\\ensuremath\{\\Im\}', + 'image' => '\\ensuremath\{\\Im\}', + 8476 => '\\ensuremath{\\Re\}', + 'real' => '\\ensuremath{\\Re\}', + 8482 => '\\texttrademark', + 'trade' => '\\texttrademark', + 8501 => '\\ensuremath{\\aleph\}', + 'alefsym'=> '\\ensuremath{\\aleph\}', + + # Arrows and then some (harpoons from Hon Kie). + + 8592 => '\\textleftarrow', + 'larr' => '\\textleftarrow', + 8593 => '\\textuparrow', + 'uarr' => '\\textuparrow', + 8594 => '\\textrightarrow', + 'rarr' => '\\textrightarrow', + 8595 => '\\textdownarrow', + 'darr' => '\\textdownarrow', + 8596 => '\\ensuremath\{\\leftrightarrow\}', + 'harr' => '\\ensuremath\{\\leftrightarrow\}', + 8598 => '\\ensuremath\{\\nwarrow\}', + 8599 => '\\ensuremath\{\\nearrow\}', + 8600 => '\\ensuremath\{\\searrow\}', + 8601 => '\\ensuremath\{\\swarrow\}', + 8605 => '\\ensuremath\{\\leadsto\}', + 8614 => '\\ensuremath\{\\mapsto\}', + 8617 => '\\ensuremath\{\\hookleftarrow\}', + 8618 => '\\ensuremath\{\\hookrightarrow\}', + 8629 => '\\ensuremath\{\\hookleftarrow\}', # not an exact match but best I know. + 'crarr' => '\\ensuremath\{\\hookleftarrow\}', # not an exact match but best I know. + 8636 => '\\ensuremath\{\\leftharpoonup\}', + 8637 => '\\ensuremath\{\\leftharpoondown\}', + 8640 => '\\ensuremath\{\\rightharpoonup\}', + 8641 => '\\ensuremath\{\\rightharpoondown\}', + 8652 => '\\ensuremath\{\\rightleftharpoons\}', + 8656 => '\\ensuremath\{\\Leftarrow\}', + 'lArr' => '\\ensuremath\{\\Leftarrow\}', + 8657 => '\\ensuremath\{\\Uparrow\}', + 'uArr' => '\\ensuremath\{\\Uparrow\}', + 8658 => '\\ensuremath\{\\Rightarrow\}', + 'rArr' => '\\ensuremath\{\\Rightarrow\}', + 8659 => '\\ensuremath\{\\Downarrow\}', + 'dArr' => '\\ensuremath\{\\Downarrow\}', + 8660 => '\\ensuremath\{\\Leftrightarrow\}', + 'vArr' => '\\ensuremath\{\\Updownarrow\}', + 8661 => '\\ensuremath\{\\Updownarrow\}', + 'lAarr' => '\\ensuremath\{\\Lleftarrow\}', + 8666 => '\\ensuremath\{\\Lleftarrow\}', + 'rAarr' => '\\ensuremath\{\\Rrightarrow\}', + 8667 => '\\ensuremath\{\\Rrightarrow\}', + 'rarrw' => '\\ensuremath\{\\rightsquigarrow\}', + 8669 => '\\ensuremath\{\\rightsquigarrow\}', + + + # Mathematical operators. + + + 'forall' => '\\ensuremath\{\\forall\}', + 8704 => '\\ensuremath\{\\forall\}', + 'comp' => '\\ensuremath\{\\complement\}', + 8705 => '\\ensuremath\{\\complement\}', + 'part' => '\\ensuremath\{\\partial\}', + 8706 => '\\ensuremath\{\\partial\}', + 'exist' => '\\ensuremath\{\\exists\}', + 8707 => '\\ensuremath\{\\exists\}', + 'nexist' => '\\ensuremath\{\\nexists\}', + 8708 => '\\ensuremath\{\\nexists\}', + 'empty' => '\\ensuremath\{\\emptysset\}', + 8709 => '\\ensuremath\{\\emptysset\}', + 8710 => '\\ensuremath\{\\Delta\}', + 'nabla' => '\\ensuremath\{\\nabla\}', + 8711 => '\\ensuremath\{\\nabla\}', + 'isin' => '\\ensuremath\{\\in\}', + 8712 => '\\ensuremath\{\\in\}', + 'notin' => '\\ensuremath\{\\notin\}', + 8713 => '\\ensuremath\{\\notin\}', + ni => '\\ensuremath\{\\ni\}', + 8715 => '\\ensuremath\{\\ni\}', + 8716 => '\\ensuremath\{\\not\\ni\}', + 'prod' => '\\ensuremath\{\\prod\}', + 8719 => '\\ensuremath\{\\prod\}', + 8720 => '\\ensuremath\{\\coprod\}', + 'sum' => '\\ensuremath\{\\sum\}', + 8721 => '\\ensuremath\{\\sum\}', + 'minus' => '\\ensuremath\{-\}', + 8722 => '\\ensuremath\{-\}', + 8723 => '\\ensuremath\{\\mp\}', + 8724 => '\\ensuremath\{\\dotplus\}', + 8725 => '\\ensuremath\{\\diagup\}', + 8726 => '\\ensuremath\{\\smallsetminus\}', + 'lowast' => '\\ensuremath\{*\}', + 8727 => '\\ensuremath\{*\}', + 8728 => '\\ensuremath\{\\circ\}', + 8729 => '\\ensuremath\{\\bullet\}', + 'radic' => '\\ensuremath\{\\surd\}', + 8730 => '\\ensuremath\{\\surd\}', + 8731 => '\\ensuremath\{\\sqrt[3]\{\}\}', + 8732 => '\\ensuremath\{\\sqrt[4]\{\}\}', + 'prop' => '\\ensuremath\{\\propto\}', + 8733 => '\\ensuremath\{\\propto\}', + 'infin' => '\\ensuremath\{\\infty\}', + 8734 => '\\ensuremath\{\\infty\}', + 'ang90' => '\\ensuremath\{\\sqangle\}', + 8735 => '\\ensuremath\{\\sqangle\}', + 'ang' => '\\ensuremath\{\\angle\}', + 8736 => '\\ensuremath\{\\angle\}', + 'angmsd' => '\\ensuremath\{\\measuredangle\}', + 8737 => '\\ensuremath\{\\measuredangle\}', + 'angsph' => '\\ensuremath\{\\sphiericalangle\}', + 8738 => '\\ensuremath\{\\sphiericalangle\}', + 8739 => '\\ensuremath\{\\vert\}', + 8740 => '\\ensuremath\{\\Vert\}', + 'and' => '\\ensuremath\{\\land\}', + 8743 => '\\ensuremath\{\\land\}', + 'or' => '\\ensuremath\{\\lor\}', + 8744 => '\\ensuremath\{\\lor\}', + 'cap' => '\\ensuremath\{\\cap\}', + 8745 => '\\ensuremath\{\\cap\}', + 'cup' => '\\ensuremath\{\\cup\}', + 8746 => '\\ensuremath\{\\cup\}', + 'int' => '\\ensuremath\{\\int\}', + 8747 => '\\ensuremath\{\\int\}', + 'conint' => '\\ensuremath\{\\oint\}', + 8750 => '\\ensuremath\{\\oint\}', + 'there4' => '\\ensuremath\{\\therefore\}', + 8756 => '\\ensuremath\{\\therefore\}', + 'becaus' => '\\ensuremath\{\\because\}', + 8757 => '\\ensuremath\{\\because\}', + 8758 => '\\ensuremath\{:\}', + 8759 => '\\ensuremath\{::\}', + 'sim' => '\\ensuremath\{\\sim\}', + 8764 => '\\ensuremath\{\\sim\}', + 8765 => '\\ensuremath\{\\backsim\}', + 'wreath' => '\\ensuremath\{\\wr\}', + 8768 => '\\ensuremath\{\\wr\}', + 'nsim' => '\\ensuremath\{\\not\sim\}', + 8769 => '\\ensuremath\{\\not\sim\}', +# 'asymp' => '\\ensuremath\{\\asymp\}', ≈ is actually a different glyph. + 8771 => '\\ensuremath\{\\asymp\}', + 8772 => '\\ensuremath\{\\not\\asymp\}', + 'cong' => '\\ensuremath\{\\cong\}', + 8773 => '\\ensuremath\{\\cong\}', + 8775 => '\\ensuremath\{\\ncong\}', + 8778 => '\\ensuremath\{\\approxeq\}', + 8784 => '\\ensuremath\{\\doteq\}', + 8785 => '\\ensuremath\{\\doteqdot\}', + 8786 => '\\ensuremath\{\\fallingdotseq\}', + 8787 => '\\ensuremath\{\\risingdotseq\}', + 8788 => '\\ensuremath\{:=\}', + 8789 => '\\ensuremath\{=:\}', + 8790 => '\\ensuremath\{\\eqcirc\}', + 8791 => '\\ensuremath\{\\circeq\}', + 'wedgeq' => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}', + 8792 => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}', + 8794 => '\\ensuremath\{\\stackrel\{\\vee\}\{=\}\}', + 8795 => '\\ensuremath\{\\stackrel\{\\star}\{=\}\}', + 8796 => '\\ensuremath\{\\triangleeq\}', + 8797 => '\\ensuremath\{\\stackrel\{def\}\{=\}\}', + 8798 => '\\ensuremath\{\\stackrel\{m\}\{=\}\}', + 8799 => '\\ensuremath\{\\stackrel\{?\}\{=\}\}', + 'ne' => '\\ensuremath\{\\neq\}', + 8800 => '\\ensuremath\{\\neq\}', + 'equiv' => '\\ensuremath\{\\equiv\}', + 8801 => '\\ensuremath\{\\equiv\}', + 8802 => '\\ensuremath\{\\not\\equiv\}', + 'le' => '\\ensuremath\{\\leq\}', + 8804 => '\\ensuremath\{\\leq\}', + 'ge' => '\\ensuremath\{\\geq\}', + 8805 => '\\ensuremath\{\\geq\}', + 8806 => '\\ensuremath\{\\leqq\}', + 8807 => '\\ensuremath\{\\geqq\}', + 8810 => '\\ensuremath\{\\ll\}', + 8811 => '\\ensuremath\{\\gg\}', + 'twixt' => '\\ensuremath\{\\between\}', + 8812 => '\\ensuremath\{\\between\}', + 8813 => '\\ensuremath\{\\not\\asymp\}', + 8814 => '\\ensuremath\{\\not<\}', + 8815 => '\\ensuremath\{\\not>\}', + 8816 => '\\ensuremath\{\\not\\leqslant\}', + 8817 => '\\ensuremath\{\\not\\geqslant\}', + 8818 => '\\ensuremath\{\\lessim\}', + 8819 => '\\ensuremath\{\\gtrsim\}', + 8820 => '\\ensuremath\{\\stackrel\{<\}\{>\}\}', + 8821 => '\\ensuremath\{\\stackrel\{>\}\{<\}\}', + 8826 => '\\ensuremath\{\\prec\}', + 8827 => '\\ensuremath\{\\succ\}', + 8828 => '\\ensuremath\{\\preceq\}', + 8829 => '\\ensuremath\{\\succeq\}', + 8830 => '\\ensuremath\{\\not\\prec\}', + 8831 => '\\ensuremath\{\\not\\succ\}', + 'sub' => '\\ensuremath\{\\subset\}', + 8834 => '\\ensuremath\{\\subset\}', + 'sup' => '\\ensuremath\{\\supset\}', + 8835 => '\\ensuremath\{\\supset\}', + 'nsub' => '\\ensuremath\{\\not\\subset\}', + 8836 => '\\ensuremath\{\\not\\subset\}', + 8837 => '\\ensuremath\{\\not\\supset\}', + 'sube' => '\\ensuremath\{\\subseteq\}', + 8838 => '\\ensuremath\{\\subseteq\}', + 'supe' => '\\ensuermath\{\\supseteq\}', + 8839 => '\\ensuermath\{\\supseteq\}', + 8840 => '\\ensuremath\{\\nsubseteq\}', + 8841 => '\\ensuremath\{\\nsupseteq\}', + 8842 => '\\ensuremath\{\\subsetneq\}', + 8843 => '\\ensuremath\{\\supsetneq\}', + 8847 => '\\ensuremath\{\\sqsubset\}', + 8848 => '\\ensuremath\{\\sqsupset\}', + 8849 => '\\ensuremath\{\\sqsubseteq\}', + 8850 => '\\ensuremath\{\\sqsupseteq\}', + 8851 => '\\ensuremath\{\\sqcap\}', + 8852 => '\\ensuremath\{\\sqcup\}', + 'oplus' => '\\ensuremath\{\\oplus\}', + 8853 => '\\ensuremath\{\\oplus\}', + 8854 => '\\ensuremath\{\\ominus\}', + 'otimes' => '\\ensuremath\{\\otimes\}', + 8855 => '\\ensuremath\{\\otimes\}', + 8856 => '\\ensuremath\{\\oslash\}', + 8857 => '\\ensuremath\{\\odot\}', + 8858 => '\\ensuremath\{\\circledcirc\}', + 8859 => '\\ensuremath\{\\circledast\}', + 8861 => '\\ensuremath\{\\ominus\}', # Close enough for government work. + 8862 => '\\ensuremath\{\\boxplus\}', + 8863 => '\\ensuremath\{\\boxminus\}', + 8864 => '\\ensuremath\{\\boxtimes\}', + 8865 => '\\ensuremath\{\\boxdot\}', + 'vdash' => '\\ensuremath\{\\vdash\}', + 8866 => '\\ensuremath\{\\vdash\}', + 'dashv' => '\\ensuremath\{\\dashv\}', + 8867 => '\\ensuremath\{\\dashv\}', + 'perp' => '\\ensuremath\{\\perp\}', + 8869 => '\\ensuremath\{\\perp\}', + 8871 => '\\ensuremath\{\\models\}', + 8872 => '\\ensuremath\{\\vDash\}', + 8873 => '\\ensuremath\{\\Vdash\}', + 8874 => '\\ensuremath\{\\Vvdash\}', + 8876 => '\\ensuremath\{\\nvdash\}', + 8877 => '\\ensuremath\{\\nvDash\}', + 8878 => '\\ensuremath\{\\nVdash\}', + 8880 => '\\ensuremath\{\\prec\}', + 8881 => '\\ensuremath\{\\succ\}', + 8882 => '\\ensuremath\{\\vartriangleleft\}', + 8883 => '\\ensuremath\{\\vartriangleright\}', + 8884 => '\\ensuremath\{\\trianglelefteq\}', + 8885 => '\\ensuremath\{\\trianglerighteq\}', + 8891 => '\\ensuremath\{\\veebar\}', + 8896 => '\\ensuremath\{\\land\}', + 8897 => '\\ensuremath\{\\lor\}', + 8898 => '\\ensuremath\{\\cap\}', + 8899 => '\\ensuremath\{\\cup\}', + 8900 => '\\ensuremath\{\\diamond\}', + 'sdot' => '\\ensuremath\{\\cdot\}', + 8901 => '\\ensuremath\{\\cdot\}', + 8902 => '\\ensuremath\{\\star\}', + 8903 => '\\ensuremath\{\\divideontimes\}', + 8904 => '\\ensuremath\{\\bowtie\}', + 8905 => '\\ensuremath\{\\ltimes\}', + 8906 => '\\ensuremath\{\\rtimes\}', + 8907 => '\\ensuremath\{\\leftthreetimes\}', + 8908 => '\\ensuremath\{\\rightthreetimes\}', + 8909 => '\\ensuremath\{\\simeq\}', + 8910 => '\\ensuremath\{\\curlyvee\}', + 8911 => '\\ensuremath\{\\curlywedge\}', + 8912 => '\\ensuremath\{\\Subset\}', + 8913 => '\\ensuremath\{\\Supset\}', + 8914 => '\\ensuremath\{\\Cap\}', + 8915 => '\\ensuremath\{\\Cup\}', + 8916 => '\\ensuremath\{\\pitchfork\}', + 8918 => '\\ensuremath\{\\lessdot\}', + 8919 => '\\ensuremath\{\\gtrdot\}', + 8920 => '\\ensuremath\{\\lll\}', + 8921 => '\\ensuremath\{\\ggg\}', + 8922 => '\\ensuremath\{\\gtreqless\}', + 8923 => '\\ensuremath\{\\lesseqgtr\}', + 8924 => '\\ensuremath\{\\eqslantless\}', + 8925 => '\\ensuremath\{\\eqslantgtr\}', + 8926 => '\\ensuremath\{\\curlyeqprec\}', + 8927 => '\\ensuremath\{\\curlyeqsucc\}', + 8928 => '\\ensuremath\{\\not\\preccurlyeq\}', + 8929 => '\\ensuremath\{\\not\\succurlyeq\}', + 8930 => '\\ensuremath\{\\not\\sqsupseteq\}', + 8931 => '\\ensuremath\{\\not\\sqsubseteq\}', + 8938 => '\\ensuremath\{\\not\\vartriangleleft\}', + 8939 => '\\ensuremath\{\\not\vartriangleright\}', + 8940 => '\\ensuremath\{\\not\trianglelefteq\}', + 8941 => '\\ensuremath\{\\not\trianglerighteq\}', + 8942 => '\\ensuremath\{\\vdots\}', + 8960 => '\\ensuremath\{\\varnothing\}', + 'lceil' => '\\ensuremath\{\\lceil\}', + 8968 => '\\ensuremath\{\\lceil\}', + 'rceil' => '\\ensuremath\{\\rceil\}', + 8969 => '\\ensuremath\{\\rceil\}', + 'lfloor' => '\\ensuremath\{\\lfloor\}', + 8970 => '\\ensuremath\{\\lfloor\}', + 'rfloor' => '\\ensuremath\{\\rfloor}', + 8971 => '\\ensuremath\{\\rfloor}', + 'lang' => '\\ensuremath\{\\langle\}', + 9001 => '\\ensuremath\{\\langle\}', + 'rang' => '\\ensuremath\{\\rangle\}', + 9002 => '\\ensuremath\{\\rangle\}', + 'loz' => '\\ensuremath\{\\lozenge\}', + 9674 => '\\ensuremath\{\\lozenge\}', + 'spades' => '\\ensuremath\{\\spadesuit\}', + 9824 => '\\ensuremath\{\\spadesuit\}', + 9825 => '\\ensuremath\{\\heartsuit\}', + 9826 => '\\ensuremath\{\\diamondsuit\}', + 'clubs' => '\\ensuremath\{\\clubsuit\}', + 9827 => '\\ensuremath\{\\clubsuit\}', + 'diams' => '\\ensuremath\{\\blacklozenge\}', + 9830 => '\\ensuremath\{\\blacklozenge\}' + +); + +# +# Convert a numerical entity (that does not exist in our hash) +# to its UTF-8 equivalent representation. +# This allows us to support, to some extent, any entity for which +# dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). +# +# Parameters: +# unicode - The unicode for the character. This is assumed to +# be a decimal value +# Returns: +# The UTF-8 equiavalent of the value. +# +sub entity_to_utf8 { + my ($unicode) = @_; + + return pack("U", $unicode); +} + +# +# Convert an entity to the corresponding LateX if possible. +# If not possible, and the entity is numeric, +# the entity is treated like a Unicode character and converted +# to UTF-8 which should display as long as dvipdf can find the +# appropriate glyph. +# +# The entity is assumed to have already had the +# &# ; or & ; removed +# +# Parameters: +# entity - Name of entity to convert. +# Returns: +# One of the following: +# - Latex string that produces the entity. +# - UTF-8 equivalent of a numeric entity for which we don't have a latex string. +# - ' ' for text entities for which there's no latex equivalent. +# +sub entity_to_latex { + my ($entity) = @_; + + # Try to look up the entity (text or numeric) in the hash: + + + my $latex = $entities{"$entity"}; + if (defined $latex) { + return $latex; + } + # If the text is purely numeric we can do the UTF-8 conversion: + + if ($entity =~ /^\d$/) { + return &entity_to_utf8($entity); + } + # Can't do the conversion`< ... + + return " "; +} + +# +# Convert all the entities in a string. +# We locate all the entities, pass them into entity_to_latex and +# and replace occurences in the input string. +# The assumption is that there are few entities in any string/document +# so this looping is not too bad. The advantage of looping vs. regexping is +# that we now can use lookup tables for the translation in entity_to_latex above. +# +# Parameters: +# input - Input string/document +# Returns +# input with entities replaced by latexable stuff (UTF-8 encodings or +# latex control strings to produce the entity. +# +# +sub replace_entities { + my ($input) = @_; + my $start; + my $end; + my $entity; + my $latex; -}; + # First the &#nnn; entities: + + while ($input =~ /(&\#\d+;)/) { + ($start) = @-; + ($end) = @+; + $entity = substr($input, $start+2, $end-$start-3); + $latex = &entity_to_latex($entity); + substr($input, $start, $end-$start) = $latex; + } + # Now the &text; entites; + + while ($input =~/(&\w+;)/) { + ($start) = @-; + ($end) = @+; + $entity = substr($input, $start+1, $end-$start-2); + $latex = &entity_to_latex($entity); + substr($input, $start, $end-$start) = $latex; + + } + return $input; +} + +1; + +__END__