--- loncom/interface/entities.pm	2008/11/17 13:52:39	1.13
+++ loncom/interface/entities.pm	2014/08/25 18:02:48	1.21
@@ -1,7 +1,7 @@
 # The LearningOnline Network
 # entity -> tex.
 #
-# 
+# $Id: entities.pm,v 1.21 2014/08/25 18:02:48 raeburn Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -26,138 +26,99 @@
 #
 #
 
+package Apache::entities;
+use strict;
 
-=head1 NAME
+=pod
 
-Apache::entities.pm
+=head1 TABLES ASCII code page
 
-=head1 SYNOPSIS
+=over
 
-This file contains a table driven entity-->latex converter.
+=item (7-13)
 
-This is part of the LearningOnline Network with CAPA project
-described at http://www.lon-capa.org.
+    Translation to empty strings
 
-=head1 OVERVIEW
+=item (32-126)
 
+    Translations to simple characters
 
-Assumptions:
- The number of entities in a resource is small compared with the
- number of possible entities that might be translated.
- Therefore the strategy is to match a general entity pattern
- &.+; over and over, pull out the match look it up in an entity -> tex hash
- and do the replacement.
+=item (130-140)
 
-In order to simplify the hash, the following reductions are done:
- &#d+; have the &# and ; stripped and is converted to an int.
- &#.+; have the &#x and ; stripped and is converted to an int as a hex
-                           value.
- All others have the & and ; stripped.
+    Controls and Latin-1 supplement.  Note that some entities that have
+    visible effect are not printing unicode characters.  Specifically
+    &#130;-&#160;
 
+=item (145-156)
 
-The hash:  Add new conversions here; leave off the leading & and the trailing ;
-all numeric entities need only appear as their decimal versions
-(e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
+    There's a gap here in my entity table
 
-This entity table is mercilessly cribbed from the  HTML pocket reference
-table starting at pg 82.  In most cases the LaTeX equivalent codes come from
-the original massive regular expression replacements originally by 
-A. Sakharuk in lonprintout.pm
+=item (159-255)
 
-I also want to acknowledge
- ISO Character entities and their LaTeX equivalents by 
-    Vidar Bronken Gundersen, and Rune Mathisen
-  http://www.bitjungle.com/isoent-ref.pdf
+     Another short gap
 
+=item (295)
 
-Note numerical entities are essentially unicode character codes.
+     hbar entity number comes from the unicode character:
+     see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
+     ISO also documents a 'planck' entity.
 
+=item (338-376)
 
-=head1 SUBROUTINES
+    Latin extended-A HTML 4.01 entities
 
-=item entity_to_utf8()
+=item (402)
 
+    Latin extended B HTML 4.01 entities
 
-Convert a numerical entity (that does not exist in our hash)
- to its UTF-8 equivalent representation.
- This allows us to support, to some extent, any entity for which
- dvipdf can find a gylph (given that LaTeX is now UTF-8 clean).
+=item (710 & 732)
 
-Parameters:
-  unicode  - The unicode for the character.  This is assumed to
-             be a decimal value
-Returns:
-  The UTF-8 equiavalent of the value.
+    Spacing modifier letters
 
-=item entity_to_latex()
+=item (913-937)
 
- Convert an entity to the corresponding LateX if possible.
- If not possible, and the entity is numeric,
- the entity is treated like a Unicode character and converted
- to UTF-8 which should display as long as dvipdf can find the
- appropriate glyph.
+    Greek uppercase (skipss 930)
 
- The entity is assumed to have already had the 
- &;  or & ; removed
+=item (945-982)
 
-Parameters:
-  entity    - Name of entity to convert.
-Returns:
- One of the following:
-  - Latex string that produces the entity.
-  - UTF-8 equivalent of a numeric entity for which we don't have a latex string.
-  - ' ' for text entities for which there's no latex equivalent.
+    Greek lowercase
 
+=item (8194-8364)
 
-=item replace_entities()
+    The general punctuation set
 
- Convert all the entities in a string.
- We locate all the entities, pass them into entity_to_latex and 
- and replace occurences in the input string.
- The assumption is that there are few entities in any string/document
- so this looping is not too bad.  The advantage of looping vs. regexping is
- that we now can use lookup tables for the translation in entity_to_latex above.
+=item (8472-8501)
 
-Parameters:
-  input   - Input string/document
-Returns
-  input with entities replaced by latexable stuff (UTF-8 encodings or
-  latex control strings to produce the entity.
+    Letter like symbols
 
-=head1 TABLES ASCII code page
+=item (8592-8669)
 
-=cut
+    Arrows and then some (harpoons from Hon Kie).
 
+=item (8704-8734)
 
-package Apache::entities;
-use strict;
+    Mathematical operators.
 
-package Apache::entities;
+=item (8735-9830)
 
-my %entities = (
+    The items below require the isoent latex package which I can't find at least for FC5.
+    Temporarily commented out.
 
-=pod
+=back
 
-=out
+=cut
 
-=item (7-13)
+my %entities = (
 
     # Translation to empty strings:
-=cut
 
     7        => "",
     9        => "",
     10       => "",
     13       => "",
     
-=pod
-
-=item (32-126)
-
     # Translations to simple characters:
 
-=cut
-
     32       => " ",
     33       => "!",
     34       => '"',
@@ -258,15 +219,7 @@ my %entities = (
     125      => '\}',
     126      => '\~',
 
-=pod
-
-=item (130-140)
-
-    Controls and Latin-1 supplement.  Note that some entities that have
-    visible effect are not printing unicode characters.  Specifically
-    &#130;-&#160;
-
-=cut
+    # Controls and Latin-1 supplement.
 
     130     => ',',
     131     => '\ensuremath{f}',
@@ -280,13 +233,7 @@ my %entities = (
     139     => '\ensuremath{<}',
     140     => '{\OE}',
     
-=pod
-
-=item (145-156)
-
-    There's a gap here in my entity table
-
-=cut
+    # There's a gap here in my entity table
 
     145     => '`',
     146     => '\'',
@@ -301,13 +248,7 @@ my %entities = (
     155     => '\ensuremath{>}',
     156     => '\oe ',
 
-=pod
-
-=item (159-255)
-
-     Another short gap:
-
-=cut
+     # Another short gap:
 
     159     => '\"Y',
     160     => '~',
@@ -498,26 +439,12 @@ my %entities = (
     'yuml'  => '\\"{y}',
 
 
-=pod
-
-=item (295)
-
-     hbar entity number comes from the unicode charater:
-     see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
-     ISO also documents a 'planck' entity.
-
-=cut
+     # hbar entity number comes from the unicode character:
 
     295     => '\ensuremath{\hbar}',
     'planck' => '\ensuremath{\hbar}',
 
-=pod
-
-=item (338-376)
-
-    Latin extended-A HTML 4.01 entities:
-
-=cut
+    # Latin extended-A HTML 4.01 entities:
 
     338      => '\OE',
     'OElig'  => '\OE',
@@ -530,37 +457,19 @@ my %entities = (
     376      => '\\"{Y}',
     'Yuml'   => '\\"{Y}', 
 
-=pod
-
-=item (402)
-
-    Latin extended B HTML 4.01 entities
-
-=cut
+    # Latin extended B HTML 4.01 entities
 
     402      => '\ensuremath{f}',
     'fnof'   => '\ensuremath{f}',
 
-=pod
-
-=item (710 & 732)
-
-    Spacing modifier letters:
-
-=cut
+    # Spacing modifier letters:
     
     710      => '\^{}',
     'circ'   => '\^{}',
     732      => '\~{}',
     'tilde'  => '\~{}',
 
-=pod
-
-=item (913-929)
-
-    Greek uppercase:
-
-=cut
+    # Greek uppercase:
 
     913      => '\ensuremath{\mathrm{A}}',
     'Alpha'  => '\ensuremath{\mathrm{A}}',
@@ -596,16 +505,6 @@ my %entities = (
     'Pi'     => '\ensuremath{\Pi}',
     929      => '\ensuremath{\mathrm{P}}',
     'Rho'    => '\ensuremath{\mathrm{P}}',
-   
-
-=pod
-
-=item (931-937)
-
-    Skips 930
-
-=cut
-
     931      => '\ensuremath{\Sigma}',
     'Sigma'  => '\ensuremath{\Sigma}',
     932      => '\ensuremath{\mathrm{T}}',
@@ -621,13 +520,7 @@ my %entities = (
     937      => '\ensuremath{\Omega}',
     'Omega'  => '\ensuremath{\Omega}',
 
-=pod
-
-=item (945-982)
-
-    Greek lowercase:
-
-=cut
+    # Greek lowercase:
 
     945      => '\ensuremath{\alpha}',
     'alpha'  => '\ensuremath{\alpha}',
@@ -686,13 +579,7 @@ my %entities = (
     982      => '\ensuremath{\varpi}',
     'piv'    => '\ensuremath{\varpi}',
 
-=pod
-
-=item (8194-8364)
-    
-    The general punctuation set:
-
-=cut
+    # The general punctuation set:
 
     8194,    => '\hspace{.5em}',
     'enspc'  => '\hspace{.5em}',
@@ -749,14 +636,7 @@ my %entities = (
     8364     => '\texteuro',
     'euro'   => '\texteuro',
 
-=pod
-
-=item (8472-8501)
-
-    Letter like symbols
-
-=cut
-
+    # Letter like symbols
     
     8472     => '\ensuremath{\wp}',
     'weierp' => '\ensuremath{\wp}',
@@ -769,22 +649,17 @@ my %entities = (
     8501     => '\ensuremath{\aleph}',
     'alefsym'=> '\ensuremath{\aleph}',
 
-=pod
-
-=item (8592-8669)
-    
-    Arrows and then some (harpoons from Hon Kie).
+    # Arrows and then some (harpoons from Hon Kie).
 
-=cut
-
-    8592     => '\textleftarrow',
-    'larr'   => '\textleftarrow',
-    8593     => '\textuparrow',
-    'uarr'   => '\textuparrow',
-    8594     => '\textrightarrow',
-    'rarr'   => '\textrightarrow',
-    8595     => '\textdownarrow',
-    'darr'   => '\textdownarrow',
+    8592     => '\ensuremath{\leftarrow}',
+    'larr'   => '\ensuremath{\leftarrow}',
+    8593     => '\ensuremath{\uparrow}',
+    'uarr'   => '\ensuremath{\uparrow}',
+    8594     => '\ensuremath{\rightarrow}',
+    'rarr'   => '\ensuremath{\rightarrow}',
+    'rightarrow' => '\ensuremath{\rightarrow}',
+    8595     => '\ensuremath{\downarrow}',
+    'darr'   => '\ensuremath{\downarrow}',
     8596     => '\ensuremath{\leftrightarrow}',
     'harr'   => '\ensuremath{\leftrightarrow}',
     8598     => '\ensuremath{\nwarrow}',
@@ -821,14 +696,7 @@ my %entities = (
     8669     => '\ensuremath{\rightsquigarrow}',
     'rarrw'  => '\ensuremath{\rightsquigarrow}',
     
-=pod
-
-=item (8704-8734)
-
-    Mathematical operators.
-
-=cut
-	
+    # Mathematical operators.
     
     'forall' => '\ensuremath{\forall}',
     8704     => '\ensuremath{\forall}',
@@ -876,20 +744,12 @@ my %entities = (
     'infin'  => '\ensuremath{\infty}',
     8734     => '\ensuremath{\infty}',
 
-
-=pod
-
-=item (8735-9830)
-
-
-    The items below require the isoent latex package which I can't find at least for FC5.
-    Temporarily commented out.
+    # The items below require the isoent latex package which I can't find at least for FC5.
+    # Temporarily commented out.
     
     'ang90'  => '\ensuremath{\sqangle}',
     8735     => '\ensuremath{\sqangle}',
 
-=cut
-
     'ang'    => '\ensuremath{\angle}',
     8736     => '\ensuremath{\angle}',
     'angmsd' => '\ensuremath{\measuredangle}',
@@ -929,7 +789,10 @@ my %entities = (
     'cong'   => '\ensuremath{\cong}',
     8773     => '\ensuremath{\cong}',
     8775     => '\ensuremath{\ncong}',
+    8776     => '\ensuremath{\approx}',
+    'approx' => '\ensuremath{\approx}', 
     8778     => '\ensuremath{\approxeq}',
+    'approxeq' => '\ensuremath{\approxeq}',
     8784     => '\ensuremath{\doteq}',
     8785     => '\ensuremath{\doteqdot}',
     8786     => '\ensuremath{\fallingdotseq}',
@@ -1100,12 +963,16 @@ my %entities = (
 
 =pod
 
-=item *
+=head1 UNICODE TABLE
+
+=over
 
     There are some named entities that don't have a good
     latex equivalent, these are converted to utf-8 via this table
     of entity name -> unicode number.
 
+=back
+
 =cut
 
 my  %utf_table = (
@@ -1199,6 +1066,105 @@ __END__
 
 =pod
 
+=head1 NAME
+
+Apache::entities.pm
+
+=head1 SYNOPSIS
+
+This file contains a table driven entity-->latex converter.
+
+This is part of the LearningOnline Network with CAPA project
+described at http://www.lon-capa.org.
+
+=head1 OVERVIEW
+
+
+Assumptions:
+ The number of entities in a resource is small compared with the
+ number of possible entities that might be translated.
+ Therefore the strategy is to match a general entity pattern
+ &.+; over and over, pull out the match look it up in an entity -> tex hash
+ and do the replacement.
+
+In order to simplify the hash, the following reductions are done:
+ &#d+; have the &# and ; stripped and is converted to an int.
+ &#.+; have the &#x and ; stripped and is converted to an int as a hex
+                           value.
+ All others have the & and ; stripped.
+
+
+The hash:  Add new conversions here; leave off the leading & and the trailing ;
+all numeric entities need only appear as their decimal versions
+(e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
+
+This entity table is mercilessly cribbed from the  HTML pocket reference
+table starting at pg 82.  In most cases the LaTeX equivalent codes come from
+the original massive regular expression replacements originally by 
+A. Sakharuk in lonprintout.pm
+
+I also want to acknowledge
+ ISO Character entities and their LaTeX equivalents by 
+    Vidar Bronken Gundersen, and Rune Mathisen
+  http://www.bitjungle.com/isoent-ref.pdf
+
+
+Note numerical entities are essentially unicode character codes.
+
+
+=head1 SUBROUTINES
+
+=over
+
+=item entity_to_utf8()
+
+
+Convert a numerical entity (that does not exist in our hash)
+ to its UTF-8 equivalent representation.
+ This allows us to support, to some extent, any entity for which
+ dvipdf can find a gylph (given that LaTeX is now UTF-8 clean).
+
+Parameters:
+  unicode  - The unicode for the character.  This is assumed to
+             be a decimal value
+Returns:
+  The UTF-8 equiavalent of the value.
+
+=item entity_to_latex()
+
+ Convert an entity to the corresponding LateX if possible.
+ If not possible, and the entity is numeric,
+ the entity is treated like a Unicode character and converted
+ to UTF-8 which should display as long as dvipdf can find the
+ appropriate glyph.
+
+ The entity is assumed to have already had the 
+ &;  or & ; removed
+
+Parameters:
+  entity    - Name of entity to convert.
+Returns:
+ One of the following:
+  - Latex string that produces the entity.
+  - UTF-8 equivalent of a numeric entity for which we don't have a latex string.
+  - ' ' for text entities for which there's no latex equivalent.
+
+
+=item replace_entities()
+
+ Convert all the entities in a string.
+ We locate all the entities, pass them into entity_to_latex and 
+ and replace occurences in the input string.
+ The assumption is that there are few entities in any string/document
+ so this looping is not too bad.  The advantage of looping vs. regexping is
+ that we now can use lookup tables for the translation in entity_to_latex above.
+
+Parameters:
+  input   - Input string/document
+Returns
+  input with entities replaced by latexable stuff (UTF-8 encodings or
+  latex control strings to produce the entity.
+
 =back
 
 =cut