--- loncom/interface/entities.pm 2008/03/31 11:01:25 1.3 +++ loncom/interface/entities.pm 2008/04/15 10:10:10 1.7 @@ -61,7 +61,9 @@ use strict; # Note numerical entities are essentially unicode character codes. # -my %entities = { +package Apache::entities; + +my %entities = ( # ---- ASCII code page: ---------------- @@ -181,7 +183,7 @@ my %entities = { 130 => ',', 131 => '\\textflorin ', 132 => ',,', # Low double left quotes. - 133 => '\\ensuremat\{\\ldots\}', + 133 => '\\ensuremath\{\\ldots\}', 134 => '\\ensuremath\{\\dagger\}', 135 => '\\ensuremath\{\\ddagger\}', 136 => '\\ensuremath\{\\wedge\}', @@ -734,6 +736,7 @@ my %entities = { 8746 => '\\ensuremath\{\\cup\}', 'int' => '\\ensuremath\{\\int\}', 8747 => '\\ensuremath\{\\int\}', + 'conint' => '\\ensuremath\{\\oint\}', 8750 => '\\ensuremath\{\\oint\}', 'there4' => '\\ensuremath\{\\therefore\}', 8756 => '\\ensuremath\{\\therefore\}', @@ -743,9 +746,287 @@ my %entities = { 8759 => '\\ensuremath\{::\}', 'sim' => '\\ensuremath\{\\sim\}', 8764 => '\\ensuremath\{\\sim\}', - - - + 8765 => '\\ensuremath\{\\backsim\}', + 'wreath' => '\\ensuremath\{\\wr\}', + 8768 => '\\ensuremath\{\\wr\}', + 'nsim' => '\\ensuremath\{\\not\sim\}', + 8769 => '\\ensuremath\{\\not\sim\}', +# 'asymp' => '\\ensuremath\{\\asymp\}', ≈ is actually a different glyph. + 8771 => '\\ensuremath\{\\asymp\}', + 8772 => '\\ensuremath\{\\not\\asymp\}', + 'cong' => '\\ensuremath\{\\cong\}', + 8773 => '\\ensuremath\{\\cong\}', + 8775 => '\\ensuremath\{\\ncong\}', + 8778 => '\\ensuremath\{\\approxeq\}', + 8784 => '\\ensuremath\{\\doteq\}', + 8785 => '\\ensuremath\{\\doteqdot\}', + 8786 => '\\ensuremath\{\\fallingdotseq\}', + 8787 => '\\ensuremath\{\\risingdotseq\}', + 8788 => '\\ensuremath\{:=\}', + 8789 => '\\ensuremath\{=:\}', + 8790 => '\\ensuremath\{\\eqcirc\}', + 8791 => '\\ensuremath\{\\circeq\}', + 'wedgeq' => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}', + 8792 => '\\ensuremath\{\\stackrel\{\\wedge\}\{=\}\}', + 8794 => '\\ensuremath\{\\stackrel\{\\vee\}\{=\}\}', + 8795 => '\\ensuremath\{\\stackrel\{\\star}\{=\}\}', + 8796 => '\\ensuremath\{\\triangleeq\}', + 8797 => '\\ensuremath\{\\stackrel\{def\}\{=\}\}', + 8798 => '\\ensuremath\{\\stackrel\{m\}\{=\}\}', + 8799 => '\\ensuremath\{\\stackrel\{?\}\{=\}\}', + 'ne' => '\\ensuremath\{\\neq\}', + 8800 => '\\ensuremath\{\\neq\}', + 'equiv' => '\\ensuremath\{\\equiv\}', + 8801 => '\\ensuremath\{\\equiv\}', + 8802 => '\\ensuremath\{\\not\\equiv\}', + 'le' => '\\ensuremath\{\\leq\}', + 8804 => '\\ensuremath\{\\leq\}', + 'ge' => '\\ensuremath\{\\geq\}', + 8805 => '\\ensuremath\{\\geq\}', + 8806 => '\\ensuremath\{\\leqq\}', + 8807 => '\\ensuremath\{\\geqq\}', + 8810 => '\\ensuremath\{\\ll\}', + 8811 => '\\ensuremath\{\\gg\}', + 'twixt' => '\\ensuremath\{\\between\}', + 8812 => '\\ensuremath\{\\between\}', + 8813 => '\\ensuremath\{\\not\\asymp\}', + 8814 => '\\ensuremath\{\\not<\}', + 8815 => '\\ensuremath\{\\not>\}', + 8816 => '\\ensuremath\{\\not\\leqslant\}', + 8817 => '\\ensuremath\{\\not\\geqslant\}', + 8818 => '\\ensuremath\{\\lessim\}', + 8819 => '\\ensuremath\{\\gtrsim\}', + 8820 => '\\ensuremath\{\\stackrel\{<\}\{>\}\}', + 8821 => '\\ensuremath\{\\stackrel\{>\}\{<\}\}', + 8826 => '\\ensuremath\{\\prec\}', + 8827 => '\\ensuremath\{\\succ\}', + 8828 => '\\ensuremath\{\\preceq\}', + 8829 => '\\ensuremath\{\\succeq\}', + 8830 => '\\ensuremath\{\\not\\prec\}', + 8831 => '\\ensuremath\{\\not\\succ\}', + 'sub' => '\\ensuremath\{\\subset\}', + 8834 => '\\ensuremath\{\\subset\}', + 'sup' => '\\ensuremath\{\\supset\}', + 8835 => '\\ensuremath\{\\supset\}', + 'nsub' => '\\ensuremath\{\\not\\subset\}', + 8836 => '\\ensuremath\{\\not\\subset\}', + 8837 => '\\ensuremath\{\\not\\supset\}', + 'sube' => '\\ensuremath\{\\subseteq\}', + 8838 => '\\ensuremath\{\\subseteq\}', + 'supe' => '\\ensuermath\{\\supseteq\}', + 8839 => '\\ensuermath\{\\supseteq\}', + 8840 => '\\ensuremath\{\\nsubseteq\}', + 8841 => '\\ensuremath\{\\nsupseteq\}', + 8842 => '\\ensuremath\{\\subsetneq\}', + 8843 => '\\ensuremath\{\\supsetneq\}', + 8847 => '\\ensuremath\{\\sqsubset\}', + 8848 => '\\ensuremath\{\\sqsupset\}', + 8849 => '\\ensuremath\{\\sqsubseteq\}', + 8850 => '\\ensuremath\{\\sqsupseteq\}', + 8851 => '\\ensuremath\{\\sqcap\}', + 8852 => '\\ensuremath\{\\sqcup\}', + 'oplus' => '\\ensuremath\{\\oplus\}', + 8853 => '\\ensuremath\{\\oplus\}', + 8854 => '\\ensuremath\{\\ominus\}', + 'otimes' => '\\ensuremath\{\\otimes\}', + 8855 => '\\ensuremath\{\\otimes\}', + 8856 => '\\ensuremath\{\\oslash\}', + 8857 => '\\ensuremath\{\\odot\}', + 8858 => '\\ensuremath\{\\circledcirc\}', + 8859 => '\\ensuremath\{\\circledast\}', + 8861 => '\\ensuremath\{\\ominus\}', # Close enough for government work. + 8862 => '\\ensuremath\{\\boxplus\}', + 8863 => '\\ensuremath\{\\boxminus\}', + 8864 => '\\ensuremath\{\\boxtimes\}', + 8865 => '\\ensuremath\{\\boxdot\}', + 'vdash' => '\\ensuremath\{\\vdash\}', + 8866 => '\\ensuremath\{\\vdash\}', + 'dashv' => '\\ensuremath\{\\dashv\}', + 8867 => '\\ensuremath\{\\dashv\}', + 'perp' => '\\ensuremath\{\\perp\}', + 8869 => '\\ensuremath\{\\perp\}', + 8871 => '\\ensuremath\{\\models\}', + 8872 => '\\ensuremath\{\\vDash\}', + 8873 => '\\ensuremath\{\\Vdash\}', + 8874 => '\\ensuremath\{\\Vvdash\}', + 8876 => '\\ensuremath\{\\nvdash\}', + 8877 => '\\ensuremath\{\\nvDash\}', + 8878 => '\\ensuremath\{\\nVdash\}', + 8880 => '\\ensuremath\{\\prec\}', + 8881 => '\\ensuremath\{\\succ\}', + 8882 => '\\ensuremath\{\\vartriangleleft\}', + 8883 => '\\ensuremath\{\\vartriangleright\}', + 8884 => '\\ensuremath\{\\trianglelefteq\}', + 8885 => '\\ensuremath\{\\trianglerighteq\}', + 8891 => '\\ensuremath\{\\veebar\}', + 8896 => '\\ensuremath\{\\land\}', + 8897 => '\\ensuremath\{\\lor\}', + 8898 => '\\ensuremath\{\\cap\}', + 8899 => '\\ensuremath\{\\cup\}', + 8900 => '\\ensuremath\{\\diamond\}', + 'sdot' => '\\ensuremath\{\\cdot\}', + 8901 => '\\ensuremath\{\\cdot\}', + 8902 => '\\ensuremath\{\\star\}', + 8903 => '\\ensuremath\{\\divideontimes\}', + 8904 => '\\ensuremath\{\\bowtie\}', + 8905 => '\\ensuremath\{\\ltimes\}', + 8906 => '\\ensuremath\{\\rtimes\}', + 8907 => '\\ensuremath\{\\leftthreetimes\}', + 8908 => '\\ensuremath\{\\rightthreetimes\}', + 8909 => '\\ensuremath\{\\simeq\}', + 8910 => '\\ensuremath\{\\curlyvee\}', + 8911 => '\\ensuremath\{\\curlywedge\}', + 8912 => '\\ensuremath\{\\Subset\}', + 8913 => '\\ensuremath\{\\Supset\}', + 8914 => '\\ensuremath\{\\Cap\}', + 8915 => '\\ensuremath\{\\Cup\}', + 8916 => '\\ensuremath\{\\pitchfork\}', + 8918 => '\\ensuremath\{\\lessdot\}', + 8919 => '\\ensuremath\{\\gtrdot\}', + 8920 => '\\ensuremath\{\\lll\}', + 8921 => '\\ensuremath\{\\ggg\}', + 8922 => '\\ensuremath\{\\gtreqless\}', + 8923 => '\\ensuremath\{\\lesseqgtr\}', + 8924 => '\\ensuremath\{\\eqslantless\}', + 8925 => '\\ensuremath\{\\eqslantgtr\}', + 8926 => '\\ensuremath\{\\curlyeqprec\}', + 8927 => '\\ensuremath\{\\curlyeqsucc\}', + 8928 => '\\ensuremath\{\\not\\preccurlyeq\}', + 8929 => '\\ensuremath\{\\not\\succurlyeq\}', + 8930 => '\\ensuremath\{\\not\\sqsupseteq\}', + 8931 => '\\ensuremath\{\\not\\sqsubseteq\}', + 8938 => '\\ensuremath\{\\not\\vartriangleleft\}', + 8939 => '\\ensuremath\{\\not\vartriangleright\}', + 8940 => '\\ensuremath\{\\not\trianglelefteq\}', + 8941 => '\\ensuremath\{\\not\trianglerighteq\}', + 8942 => '\\ensuremath\{\\vdots\}', + 8960 => '\\ensuremath\{\\varnothing\}', + 'lceil' => '\\ensuremath\{\\lceil\}', + 8968 => '\\ensuremath\{\\lceil\}', + 'rceil' => '\\ensuremath\{\\rceil\}', + 8969 => '\\ensuremath\{\\rceil\}', + 'lfloor' => '\\ensuremath\{\\lfloor\}', + 8970 => '\\ensuremath\{\\lfloor\}', + 'rfloor' => '\\ensuremath\{\\rfloor}', + 8971 => '\\ensuremath\{\\rfloor}', + 'lang' => '\\ensuremath\{\\langle\}', + 9001 => '\\ensuremath\{\\langle\}', + 'rang' => '\\ensuremath\{\\rangle\}', + 9002 => '\\ensuremath\{\\rangle\}', + 'loz' => '\\ensuremath\{\\lozenge\}', + 9674 => '\\ensuremath\{\\lozenge\}', + 'spades' => '\\ensuremath\{\\spadesuit\}', + 9824 => '\\ensuremath\{\\spadesuit\}', + 9825 => '\\ensuremath\{\\heartsuit\}', + 9826 => '\\ensuremath\{\\diamondsuit\}', + 'clubs' => '\\ensuremath\{\\clubsuit\}', + 9827 => '\\ensuremath\{\\clubsuit\}', + 'diams' => '\\ensuremath\{\\blacklozenge\}', + 9830 => '\\ensuremath\{\\blacklozenge\}' + +); + +# +# Convert a numerical entity (that does not exist in our hash) +# to its UTF-8 equivalent representation. +# This allows us to support, to some extent, any entity for which +# dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). +# +# Parameters: +# unicode - The unicode for the character. This is assumed to +# be a decimal value +# Returns: +# The UTF-8 equiavalent of the value. +# +sub entity_to_utf8 { + my ($unicode) = @_; + + return pack("U", $unicode); +} + + +# +# Convert an entity to the corresponding LateX if possible. +# If not possible, and the entity is numeric, +# the entity is treated like a Unicode character and converted +# to UTF-8 which should display as long as dvipdf can find the +# appropriate glyph. +# +# The entity is assumed to have already had the +# &# ; or & ; removed +# +# Parameters: +# entity - Name of entity to convert. +# Returns: +# One of the following: +# - Latex string that produces the entity. +# - UTF-8 equivalent of a numeric entity for which we don't have a latex string. +# - ' ' for text entities for which there's no latex equivalent. +# +sub entity_to_latex { + my ($entity) = @_; + + # Try to look up the entity (text or numeric) in the hash: + + + my $latex = $entities{"$entity"}; + if (defined $latex) { + return $latex; + } + # If the text is purely numeric we can do the UTF-8 conversion: + + if ($entity =~ /^\d$/) { + return &entity_to_utf8($entity); + } + # Can't do the conversion`< ... + + return " "; +} + +# +# Convert all the entities in a string. +# We locate all the entities, pass them into entity_to_latex and +# and replace occurences in the input string. +# The assumption is that there are few entities in any string/document +# so this looping is not too bad. The advantage of looping vs. regexping is +# that we now can use lookup tables for the translation in entity_to_latex above. +# +# Parameters: +# input - Input string/document +# Returns +# input with entities replaced by latexable stuff (UTF-8 encodings or +# latex control strings to produce the entity. +# +# +sub replace_entities { + my ($input) = @_; + my $start; + my $end; + my $entity; + my $latex; + + # First the &#nnn; entities: + + while ($input =~ /(&\#\d+;)/) { + ($start) = @-; + ($end) = @+; + $entity = substr($input, $start+2, $end-$start-3); + $latex = &entity_to_latex($entity); + substr($input, $start, $end-$start) = $latex; + } + # Now the &text; entites; + + while ($input =~/(&\w+;)/) { + ($start) = @-; + ($end) = @+; + $entity = substr($input, $start+1, $end-$start-2); + $latex = &entity_to_latex($entity); + substr($input, $start, $end-$start) = $latex; + + } + return $input; +} -}; +1; +__END__