--- loncom/interface/entities.pm 2008/11/20 13:07:32 1.17 +++ loncom/interface/entities.pm 2008/11/21 20:17:11 1.18 @@ -1,7 +1,7 @@ # The LearningOnline Network # entity -> tex. # -# $Id: entities.pm,v 1.17 2008/11/20 13:07:32 raeburn Exp $ +# $Id: entities.pm,v 1.18 2008/11/21 20:17:11 jms Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,111 +26,6 @@ # # -=pod - -=head1 NAME - -Apache::entities.pm - -=head1 SYNOPSIS - -This file contains a table driven entity-->latex converter. - -This is part of the LearningOnline Network with CAPA project -described at http://www.lon-capa.org. - -=head1 OVERVIEW - - -Assumptions: - The number of entities in a resource is small compared with the - number of possible entities that might be translated. - Therefore the strategy is to match a general entity pattern - &.+; over and over, pull out the match look it up in an entity -> tex hash - and do the replacement. - -In order to simplify the hash, the following reductions are done: - &#d+; have the &# and ; stripped and is converted to an int. - &#.+; have the &#x and ; stripped and is converted to an int as a hex - value. - All others have the & and ; stripped. - - -The hash: Add new conversions here; leave off the leading & and the trailing ; -all numeric entities need only appear as their decimal versions -(e.g. no need for 1234 is sufficient, no need for 0x4d2 as well. - -This entity table is mercilessly cribbed from the HTML pocket reference -table starting at pg 82. In most cases the LaTeX equivalent codes come from -the original massive regular expression replacements originally by -A. Sakharuk in lonprintout.pm - -I also want to acknowledge - ISO Character entities and their LaTeX equivalents by - Vidar Bronken Gundersen, and Rune Mathisen - http://www.bitjungle.com/isoent-ref.pdf - - -Note numerical entities are essentially unicode character codes. - - -=head1 SUBROUTINES - -=over - -=item entity_to_utf8() - - -Convert a numerical entity (that does not exist in our hash) - to its UTF-8 equivalent representation. - This allows us to support, to some extent, any entity for which - dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). - -Parameters: - unicode - The unicode for the character. This is assumed to - be a decimal value -Returns: - The UTF-8 equiavalent of the value. - -=item entity_to_latex() - - Convert an entity to the corresponding LateX if possible. - If not possible, and the entity is numeric, - the entity is treated like a Unicode character and converted - to UTF-8 which should display as long as dvipdf can find the - appropriate glyph. - - The entity is assumed to have already had the - &; or & ; removed - -Parameters: - entity - Name of entity to convert. -Returns: - One of the following: - - Latex string that produces the entity. - - UTF-8 equivalent of a numeric entity for which we don't have a latex string. - - ' ' for text entities for which there's no latex equivalent. - - -=item replace_entities() - - Convert all the entities in a string. - We locate all the entities, pass them into entity_to_latex and - and replace occurences in the input string. - The assumption is that there are few entities in any string/document - so this looping is not too bad. The advantage of looping vs. regexping is - that we now can use lookup tables for the translation in entity_to_latex above. - -Parameters: - input - Input string/document -Returns - input with entities replaced by latexable stuff (UTF-8 encodings or - latex control strings to produce the entity. - -=back - -=cut - package Apache::entities; use strict; @@ -1164,3 +1059,108 @@ sub replace_entities { 1; __END__ + +=pod + +=head1 NAME + +Apache::entities.pm + +=head1 SYNOPSIS + +This file contains a table driven entity-->latex converter. + +This is part of the LearningOnline Network with CAPA project +described at http://www.lon-capa.org. + +=head1 OVERVIEW + + +Assumptions: + The number of entities in a resource is small compared with the + number of possible entities that might be translated. + Therefore the strategy is to match a general entity pattern + &.+; over and over, pull out the match look it up in an entity -> tex hash + and do the replacement. + +In order to simplify the hash, the following reductions are done: + &#d+; have the &# and ; stripped and is converted to an int. + &#.+; have the &#x and ; stripped and is converted to an int as a hex + value. + All others have the & and ; stripped. + + +The hash: Add new conversions here; leave off the leading & and the trailing ; +all numeric entities need only appear as their decimal versions +(e.g. no need for 1234 is sufficient, no need for 0x4d2 as well. + +This entity table is mercilessly cribbed from the HTML pocket reference +table starting at pg 82. In most cases the LaTeX equivalent codes come from +the original massive regular expression replacements originally by +A. Sakharuk in lonprintout.pm + +I also want to acknowledge + ISO Character entities and their LaTeX equivalents by + Vidar Bronken Gundersen, and Rune Mathisen + http://www.bitjungle.com/isoent-ref.pdf + + +Note numerical entities are essentially unicode character codes. + + +=head1 SUBROUTINES + +=over + +=item entity_to_utf8() + + +Convert a numerical entity (that does not exist in our hash) + to its UTF-8 equivalent representation. + This allows us to support, to some extent, any entity for which + dvipdf can find a gylph (given that LaTeX is now UTF-8 clean). + +Parameters: + unicode - The unicode for the character. This is assumed to + be a decimal value +Returns: + The UTF-8 equiavalent of the value. + +=item entity_to_latex() + + Convert an entity to the corresponding LateX if possible. + If not possible, and the entity is numeric, + the entity is treated like a Unicode character and converted + to UTF-8 which should display as long as dvipdf can find the + appropriate glyph. + + The entity is assumed to have already had the + &; or & ; removed + +Parameters: + entity - Name of entity to convert. +Returns: + One of the following: + - Latex string that produces the entity. + - UTF-8 equivalent of a numeric entity for which we don't have a latex string. + - ' ' for text entities for which there's no latex equivalent. + + +=item replace_entities() + + Convert all the entities in a string. + We locate all the entities, pass them into entity_to_latex and + and replace occurences in the input string. + The assumption is that there are few entities in any string/document + so this looping is not too bad. The advantage of looping vs. regexping is + that we now can use lookup tables for the translation in entity_to_latex above. + +Parameters: + input - Input string/document +Returns + input with entities replaced by latexable stuff (UTF-8 encodings or + latex control strings to produce the entity. + +=back + +=cut