File:  [LON-CAPA] / loncom / interface / entities.pm
Revision 1.1: download - view: text, annotated - select for diffs
Mon Feb 11 11:35:46 2008 UTC (16 years, 3 months ago) by foxr
Branches: MAIN
CVS tags: HEAD
Building up hash of entity -> latex translations to sanitize
lonprintout.pm's character_table sub and to
1. Make it easy to add new entities.
2. Add some entities that are not in the table.
3. I think this version will also run faster.

    1: # The LearningOnline Network
    2: # entity -> tex.
    3: #
    4: # $Id:
    5: #
    6: # Copyright Michigan State University Board of Trustees
    7: #
    8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    9: #
   10: # LON-CAPA is free software; you can redistribute it and/or modify
   11: # it under the terms of the GNU General Public License as published by
   12: # the Free Software Foundation; either version 2 of the License, or
   13: # (at your option) any later version.
   14: #
   15: # LON-CAPA is distributed in the hope that it will be useful,
   16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: # GNU General Public License for more details.
   19: #
   20: # You should have received a copy of the GNU General Public License
   21: # along with LON-CAPA; if not, write to the Free Software
   22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23: #
   24: # /home/httpd/html/adm/gpl.txt
   25: # http://www.lon-capa.org/
   26: #
   27: #
   28: package Apache::entities;
   29: use strict;
   30: #
   31: #   This file contains a table driven entity-->latex converter.
   32: #
   33: #  Assumptions:
   34: #   The number of entities in a resource is small compared with the
   35: #   number of possible entities that might be translated.
   36: #   Therefore the strategy is to match a general entity pattern
   37: #   &.+; over and over, pull out the match look it up in an entity -> tex hash
   38: #   and do the replacement.
   39: #
   40: #  In order to simplify the hash, the following reductions are done:
   41: #   &#d+; have the &# and ; stripped and is converted to an int.
   42: #   &#.+; have the &#x and ; stripped and is converted to an int as a hex
   43: #                             value.
   44: #   All others have the & and ; stripped.
   45: 
   46: 
   47: #  The hash:  Add new conversions here; leave off the leading & and the trailing ;
   48: #  all numeric entities need only appear as their decimal versions
   49: #  (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
   50: #
   51: #  This entity table is mercilessly cribbed from the  HTML pocket reference
   52: #  table starting at pg 82.  In most cases the LaTeX equivalent codes come from
   53: #  the original massive regular expression replacements originally by 
   54: #  A. Sakharuk in lonprintout.pm
   55: #
   56: #  Note numerical entities are essentially unicode character codes.
   57: #
   58: my %entities = {
   59: 
   60:     #  ---- ASCII code page: ----------------
   61: 
   62:     # Translation to empty strings:
   63: 
   64:     7        => "",
   65:     9        => "",
   66:     10       => "",
   67:     13       => "",
   68:     
   69:     # Translations to simple characters:
   70: 
   71:     32       => " ",
   72:     33       => "!",
   73:     34       => '"',
   74:     'quot'   => '"',
   75:     35       => '\\\#',
   76:     36       => '\\\$',
   77:     37       => '\\%',
   78:     38       => '\\&',
   79:     'amp'    => '\\&',
   80:     39       => '\'',		# Apostrophe
   81:     40       => '(',
   82:     41       => ')',
   83:     42       => '\*',
   84:     43       => '\+',
   85:     44       => ',',		#  comma
   86:     45       => '-',
   87:     46       => '\.',
   88:     47       => '\/',
   89:     48       => '0',
   90:     49       => '1',
   91:     50       => '2',
   92:     51       => '3',
   93:     52       => '4',
   94:     53       => '5',
   95:     54       => '6',
   96:     55       => '7',
   97:     56       => '8',
   98:     57       => '9',
   99:     58       => ':',
  100:     59       => ';',
  101:     60       => '\\ensuremath\{<\}',
  102:     'lt'     => '\\ensuremath\{<\}',
  103:     61       => '\\ensuremath\{=\}',
  104:     62       => '\\ensuremath\{>\}',
  105:     'gt'     => '\\ensuremath\{>\}',
  106:     63       => '\?',
  107:     64       => '@',
  108:     65       => 'A',
  109:     66       => 'B',
  110:     67       => 'C',
  111:     68       => 'D',
  112:     69       => 'E',
  113:     70       => 'F',
  114:     71       => 'G',
  115:     72       => 'H',
  116:     73       => 'I',
  117:     74       => 'J',
  118:     75       => 'K',
  119:     76       => 'L',
  120:     77       => 'M',
  121:     78       => 'N',
  122:     79       => 'O',
  123:     80       => 'P',
  124:     81       => 'Q',
  125:     82       => 'R',
  126:     83       => 'S',
  127:     84       => 'T',
  128:     85       => 'U',
  129:     86       => 'V',
  130:     87       => 'W',
  131:     88       => 'X',
  132:     89       => 'Y',
  133:     90       => 'Z',
  134:     91       => '[',
  135:     92       => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing.
  136:     93       => ']',
  137:     94       => '\\ensuremath\{\\wedge\}',
  138:     95       => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _
  139:     96       => '`',
  140:     97       => 'a',
  141:     98       => 'b',
  142:     99       => 'c',
  143:     100      => 'd',
  144:     101      => 'e',
  145:     102      => 'f',
  146:     103      => 'g',
  147:     104      => 'h', 
  148:     105      => 'i',
  149:     106      => 'j',
  150:     107      => 'k',
  151:     108      => 'l',
  152:     109      => 'm',
  153:     110      => 'n',
  154:     111      => 'o',
  155:     112      => 'p',
  156:     113      => 'q',
  157:     114      => 'r',
  158:     115      => 's',
  159:     116      => 't',
  160:     117      => 'u',
  161:     118      => 'v',
  162:     119      => 'w',
  163:     120      => 'x',
  164:     121      => 'y',
  165:     122      => 'z',
  166:     123      => '\\{',
  167:     124      => '\|',
  168:     125      => '\\}',
  169:     126      => '\~',
  170: 
  171:     #   Controls and Latin-1 supplement.  Note that some entities that have
  172:     #   visible effect are not printing unicode characters.  Specifically
  173:     #   &#130;-&#160;
  174: 
  175:     130     => ',',
  176:     131     => '\\textflorin ',
  177:     132     => ',,',		# Low double left quotes.
  178:     133     => '\\ensuremat\{\\ldots\}',
  179:     134     => '\\ensuremath\{\\dagger\}',
  180:     135     => '\\ensuremath\{\\ddagger\}',
  181:     136     => '\\ensuremath\{\\wedge\}',
  182:     137     => '\\textperthousand ',
  183:     138     => '\\v\{S\}',
  184:     139     => '\\ensuremath\{<\}',
  185:     140     => '\{\\OE\}',
  186:     
  187:     #  There's a gap here in my entity table
  188: 
  189:     145     => '\`',
  190:     146     => '\'',
  191:     147     => '\`\`',
  192:     148     => '\'\'',
  193:     149     => '\\ensuremath\{\\bullet\}',
  194:     150     => '--',
  195:     151     => '---',
  196:     152     => '\\ensuremath\{\\sim\}',
  197:     153     => '\\texttrademark',
  198:     154     => '\\v\{s\}',
  199:     155     => '\\ensuremath\{>\}',
  200:     156     => '\\oe ',
  201:     
  202:     # Another short gap:
  203: 
  204:     159     => '\\"Y',
  205:     160     => '~',
  206:     'nbsp'  => '~',
  207:     161     => '\\textexclamdown ',
  208:     'iexcl' => '\\textexclamdown ',
  209:     162     => '\\textcent ',
  210:     'cent'  => '\\textcent ',
  211:     163     => '\\pounds ',
  212:     'pound' => '\\pounds ',
  213:     164     => '\\textcurrency ',
  214:     'curren' => '\\textcurrency ',
  215:     165     => '\\textyen ',
  216:     'yen'   => '\\textyen ',
  217:     166     => '\\textbrokenbar ',
  218:     'brvbar' => '\\textbrokenbar ',
  219:     167     => '\\textsection ',
  220:     'sect'  => '\\textsection ',
  221:     168     => '\\texthighdieresis ',
  222:     'uml'   => '\\texthighdieresis ',
  223:     169     => '\\copyright ',
  224:     'copy'  => '\\copyright ',
  225:     170     => '\\textordfeminine ',
  226:     'ordf'  => '\\textordfeminine ',
  227:     171     => '\\ensuremath\{\ll\}', # approximation of left angle quote.
  228:     'laquo' => '\\ensuremath\{\ll\}', #   ""
  229:     172     => '\\ensuremath\{\\neg\}',
  230:     'not'   => '\\ensuremath\{\\neg\}',
  231:     173     => ' - ',
  232:     'shy'   => ' - ',
  233:     174     => '\\textregistered ',
  234:     'reg'   => '\\textregistered ',
  235:     175     => '\\ensuremath\{^\{-\}\}',
  236:     'macr'  => '\\ensuremath\{^\{-\}\}',
  237:     176     => '\\ensuremath\{^\{\\circ\}\}',
  238:     'deg'   => '\\ensuremath\{^\{\\circ\}\}',
  239:     177     => '\\ensuremath\{\\pm\}',
  240:     'plusmn' => '\\ensuremath\{\\pm\}',
  241:     178     => '\\ensuremath\{^2\}',
  242:     'sup2'  => '\\ensuremath\{^2\}',
  243:     179     => '\\ensuremath\{^3\}',
  244:     'sup3'  => '\\ensuremath\{^3\}',
  245:     180     => '\\textacute ',
  246:     'acute' => '\\textacute ',
  247:     181     => '\\ensuremath\{\\mu\}',
  248:     'micro' => '\\ensuremath\{\\mu\}',
  249:     182     => '\\P ',
  250:     para    => '\\P ',
  251:     183     => '\\ensuremath\{\\cdot\}',
  252:     'middot' => '\\ensuremath\{\\cdot\}',
  253:     184     => '\\c\{\\strut\}',
  254:     'cedil' => '\\c\{\\strut\}',
  255:     185     => '\\ensuremath\{^1\}',
  256:     sup1    => '\\ensuremath\{^1\}',
  257:     186     => '\\textordmasculine ',
  258:     'ordm'  => '\\textordmasculine ',
  259:     187     => '\\ensuremath\{\\gg\}',
  260:     'raquo' => '\\ensuremath\{\\gg\}',
  261:     188     => '\\textonequarter ',
  262:     'frac14' => '\\textonequarter ',
  263:     189     => '\\textonehalf' ,
  264:     'frac12' => '\\textonehalf' ,
  265:     190     => '\\textthreequarters ',
  266:     'frac34' => '\\textthreequarters ',
  267:     191     =>  '\\textquestiondown ',
  268:     'iquest' => '\\textquestiondown ',
  269:     192     => '\\\`\{A\}',
  270:     'Agrave' => '\\\`\{A\}',
  271:     193     => '\\\'\{A\}',
  272:     'Aacute' => '\\\'\{A\}',
  273:     194     => '\\^\{A\}',
  274:     'Acirc' => '\\^\{A\}',
  275:     195     => '\\~{A}',
  276:     'Atilde'=> '\\~{A}',
  277:     196     => '\\\"{A}',
  278:     'Auml'  => '\\\"{A}',
  279:     197     => '{\\AA}',
  280:     'Aring' => '{\\AA}',
  281:     198     => '{\\AE}',
  282:     'AElig' => '{\\AE}',
  283:     199     => '\\c{c}',
  284:     'Ccedil'=> '\\c{c}',
  285:     '200'   => '\\\`{E}',
  286:     'Egrave'=> '\\\`{E}',
  287:     201     => '\\\'{E}',
  288:     'Eacute'=> '\\\'{E}',
  289:     202     => '\\\^{E}',
  290:     'Ecirc' => '\\\^{E}',
  291:     203     => '\\\"{E}',
  292:     'Euml'  => '\\\"{E}',
  293:     204     => '\\\`{I}',
  294:     'Igrave'=> '\\\`{I}',
  295:     205     => '\\\'{I}',
  296:     'Iacute'=> '\\\'{I}',
  297:     206     => '\\\^{I}',
  298:     'Icirc' => '\\\^{I}',
  299:     207     => '\\\"{I}',
  300:     'Iuml'  => '\\\"{I}',
  301:     208     => '\\OE',
  302:     'ETH'   => '\\OE',
  303:     209     => '\\~{N}',
  304:     'Ntilde'=> '\\~{N}',
  305:     210     => '\\\`{O}',
  306:     'Ograve'=> '\\\`{O}',
  307:     211     => '\\\'{O}',
  308:     'Oacute'=> '\\\'{O}',
  309:     212     => '\\\^{O}',
  310:     'Ocirc' => '\\\^{O}',
  311:     213     => '\\~{O}',
  312:     'Otilde'=> '\\~{O}',
  313:     214     => '\\\"{O}',
  314:     'Ouml'  => '\\\"{O}',
  315:     215     => '\\ensuremath\{\\times\}',
  316:     'times' => '\\ensuremath\{\\times\}',
  317:     216     => '\\O',
  318:     'Oslash'=> '\\O',
  319:     217     => '\\\`{U}',
  320:     'Ugrave'=> '\\\`{U}',
  321:     218     => '\\\'{U}',
  322:     'Uacute'=> '\\\'{U}',
  323:     219     => '\\\^{U}',
  324:     'Ucirc' => '\\\^{U}',
  325:     220     => '\\\"{U}',
  326:     'Uuml'  => '\\\"{U}',
  327:     221     => '\\\'{Y}',
  328:     'Yacute'=> '\\\'{Y}',
  329:     222     => '\\TH',
  330:     'THORN' => '\\TH',
  331:     223     => '{\\sz}',
  332:     'szlig' => '{\\sz}',
  333:     224     => '\\\`{a}',
  334:     'agrave'=> '\\\`{a}',
  335:     225     => '\\\'{a}',
  336:     'aacute'=> '\\\'{a}',
  337:     226     => '\\\^{a}',
  338:     'acirc' => '\\\^{a}',
  339:     227     => '\\\~{a}',
  340:     'atilde'=> '\\\~{a}',
  341:     228     => '\\\"{a}',
  342:     'auml'  => '\\\"{a}',
  343:     229     => '\\aa',
  344:     'aring' => '\\aa',
  345:     230     => '\\ae',
  346:     'aelig' => '\\ae',
  347:     231     => '\\c{c}',
  348:     'ccedil'=> '\\c{c}',
  349:     232     => '\\\`{e}',
  350:     'egrave'=> '\\\`{e}',
  351:     233     => '\\\'{e}',
  352:     'eacute'=> '\\\'{e}',
  353:     234     => '\\\^{e}',
  354:     'ecirc' => '\\\^{e}',
  355:     235     => '\\\"{e}',
  356:     'euml'  => '\\\"{e}',
  357:     236     => '\\\`{i}',
  358:     'igrave'=> '\\\`{i}',
  359:     237     => '\\\'{i}',
  360:     'iacute'=> '\\\'{i}',
  361:     238     => '\\\^{i}',
  362:     'icirc' => '\\\^{i}',
  363:     239     => '\\\"{i}',
  364:     'iuml'  => '\\\"{i}',
  365:     240     => '\\dh',
  366:     'eth'   => '\\dh',
  367:     241     => '\\\~{n}',
  368:     'ntilde'=> '\\\~{n}',
  369:     242     => '\\\`{o}',
  370:     'ograve'=> '\\\`{o}',
  371:     243     => '\\\'{o}',
  372:     'oacute'=> '\\\'{o}',
  373:     244     => '\\\^{o}',
  374:     'ocirc' => '\\\^{o}',
  375:     245     => '\\\~{o}',
  376:     'otilde'=> '\\\~{o}',
  377:     246     => '\\\"{o}',
  378:     'ouml'  => '\\\"{o}',
  379:     247     => '\\ensuremath\{\\div\}',
  380:     'divide'=> '\\ensuremath\{\\div\}',
  381:     248     => '{\\o}',
  382:     'oslash'=> '{\\o}',
  383:     249     => '\\\`{u}',
  384:     'ugrave'=> '\\\`{u}',
  385:     250     => '\\\'{u}',
  386:     'uacute'=> '\\\'{u}',
  387:     251     => '\\\^{u}',
  388:     'ucirc' => '\\\^{u}',
  389:     252     => '\\\"{u}',
  390:     'uuml'  => '\\\"{u}',
  391:     253     => '\\\'{y}',
  392:     'yacute'=> '\\\'{y}',
  393:     254     => '\\th',
  394:     'thorn' => '\\th',
  395:     255     => '\\\"{y}',
  396:     'yuml'  => '\\\"{y}',
  397: 
  398:     # hbar entity number comes from the unicode charater:
  399:     # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
  400:     # ISO also documents a 'planck' entity.
  401: 
  402:     295     => '\\ensuremath\{\hbar\}',
  403:     'plank' => '\\ensuremath\{\hbar\}',
  404: 
  405:     # Latin extended-A HTML 4.01 entities:
  406: 
  407:     338      => '\\OE',
  408:     'OElig'  => '\\OE',
  409:     339      => '\\oe',
  410:     'oelig'  => '\\oe',
  411:     352      => '\\v{S}',
  412:     'Scaron' => '\\v{S}',
  413:     353      => '\\v{s}',
  414:     'scaron' => '\\v{s}',
  415:     376      => '\\\"{Y}',
  416:     'Yuml'   => '\\\"{Y}', 
  417: 
  418: 
  419:     # Latin extended B HTML 4.01 entities
  420: 
  421:     402      => '\\ensuremath{f}',
  422:     'fnof'   => '\\ensuremath{f}',
  423: 
  424:     # Spacing modifier letters:
  425:     
  426:     710      => '\^{}',
  427:     'circ'   => '\^{}',
  428:     732      => '\~{}',
  429:     'tilde'  => '\~{}',
  430: 
  431:     # Greek uppercase:
  432: 
  433:     913      => '\\ensuremath\{\\mathrm\{A\}\}',
  434:     'Alpha'  => '\\ensuremath\{\\mathrm\{A\}\}',
  435:     914      => '\\ensuremath\{\\mathrm\{B\}\}',
  436:     'Beta'   => '\\ensuremath\{\\mathrm\{B\}\}',
  437:     915      => '\\ensuremath\{\\Gamma\}',
  438:     'Gamma'  => '\\ensuremath\{\\Gamma\}',
  439:     916      => '\\ensuremath\{\\Delta\}',
  440:     'Delta'  => '\\ensuremath\{\\Delta\}',
  441:     917      => '\\ensuremath\{\\mathrm\{E\}\}',
  442:     'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}',
  443:     918      => '\\ensuremath\{\\mathrm\{Z\}\}',
  444:     'Zeta'   => '\\ensuremath\{\\mathrm\{Z\}\}',
  445:     919      => '\\ensuremath\{\\mathrm\{H\}\}',
  446:     'Eta'    => '\\ensuremath\{\\mathrm\{H\}\}',
  447:     920      => '\\ensuremath\{\\Theta\}',
  448:     'Theta'  => '\\ensuremath\{\\Theta\}',
  449:     921      => '\\ensuremath\{\\mathrm\{I\}\}',
  450:     'Iota'   => '\\ensuremath\{\\mathrm\{I\}\}',
  451:     922      => '\\ensuremath\{\\mathrm\{K\}\}',
  452:     'Kappa'  => '\\ensuremath\{\\mathrm\{K\}\}',
  453:     923      => '\\ensuremath\{\\Lambda\}',
  454:     'Lambda' => '\\ensuremath\{\\Lambda\}',
  455:     924      => '\\ensuremath\{\\mathrm\{M\}\}',
  456:     'Mu'     => '\\ensuremath\{\\mathrm\{M\}\}',
  457:     925      => '\\ensuremath\{\\mathrm\{N\}\}',
  458:     'Nu'     => '\\ensuremath\{\\mathrm\{N\}\}',
  459:     926      => '\\ensuremath\{\\mathrm\{\\Xi\}',
  460:     'Xi'     => '\\ensuremath\{\\mathrm\{\\Xi\}',
  461:     927      => '\\ensuremath\{\\mathrm\{O\}\}',
  462:     'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}',
  463:     928      => '\\ensuremath\{\\Pi\}',
  464:     'Pi'     => '\\ensuremath\{\\Pi\}',
  465:     929      => '\\ensuremath\{\\mathrm\{P\}\}',
  466:     'Rho'    => '\\ensuremath\{\\mathrm\{P\}\}',
  467:    
  468:     # Skips 930
  469: 
  470:     931      => '\\ensuremath\{\Sigma\}',
  471:     'Sigma'  => '\\ensuremath\{\Sigma\}',
  472:     932      => '\\ensuremath\{\\mathrm\{T\}\}',
  473:     'Tau'    => '\\ensuremath\{\\mathrm\{T\}\}',
  474:     933      => '\\ensuremath\{\\Upsilon\}',
  475:     'Upsilon'=> '\\ensuremath\{\\Upsilon\}',
  476:     934      => '\\ensuremath\{\\Phi\}',
  477:     'Phi'    => '\\ensuremath\{\\Phi\}',
  478:     935      => '\\ensuremath\{\\mathrm\{X\}\}',
  479:     'Chi'    => '\\ensuremath\{\\mathrm\{X\}\}',
  480:     936      => '\\ensuremath\{\\Psi\}',
  481:     'Psi'    => '\\ensuermath\{\\Psi\}',
  482:     937      => '\\ensuremath\{\\Omega\}',
  483:     'Omega'  => '\\ensuremath\{\\Omega\}',
  484: 
  485: 
  486:     # Greek lowercase:
  487: 
  488:     945      => '\\ensuremath\{\\alpha\}',
  489:     'alpha'  => '\\ensuremath\{\\alpha\}',
  490:     946      => '\\ensuremath\{\\beta\}',
  491:     'beta'   => '\\ensuremath\{\\beta\}',
  492:     947      => '\\ensuremath\{\\gamma\}',
  493:     'gamma'  => '\\ensuremath\{\\gamma\}',
  494:     948      => '\\ensuremath\{\\delta\}',
  495:     'delta'  => '\\ensuremath\{\\delta\}',
  496:     949      => '\\ensuremath\{\\epsilon\}',
  497:     'epsilon'=> '\\ensuremath\{\\epsilon\}',
  498:     950      => '\\ensuremath\{\\zeta\}',
  499:     'zeta'   => '\\ensuremath\{\\zeta\}',
  500:     951      => '\\ensuremath\{\\eta\}',
  501:     'eta'    => '\\ensuremath\{\\eta\}',
  502: 
  503:     
  504: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.