Annotation of loncom/localize/transliterate.pm, revision 1.1

1.1     ! raeburn     1: # The LearningOnline Network with CAPA
        !             2: # Transliteration to ascii
        !             3: #
        !             4: # $Id: transliterate.pm,v 1.1 2019/02/25 23:00:55 raeburn Exp $
        !             5: #
        !             6: # Copyright Michigan State University Board of Trustees
        !             7: #
        !             8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
        !             9: #
        !            10: # LON-CAPA is free software; you can redistribute it and/or modify
        !            11: # it under the terms of the GNU General Public License as published by
        !            12: # the Free Software Foundation; either version 2 of the License, or
        !            13: # (at your option) any later version.
        !            14: #
        !            15: # LON-CAPA is distributed in the hope that it will be useful,
        !            16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            18: # GNU General Public License for more details.
        !            19: #
        !            20: # You should have received a copy of the GNU General Public License
        !            21: # along with LON-CAPA; if not, write to the Free Software
        !            22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
        !            23: #
        !            24: # /home/httpd/html/adm/gpl.txt
        !            25: #
        !            26: # http://www.lon-capa.org/
        !            27: #
        !            28: ######################################################################
        !            29: ######################################################################
        !            30: 
        !            31: =pod
        !            32: 
        !            33: =head1 NAME
        !            34: 
        !            35: LONCAPA::transliterate - transliterate non-ascii characters
        !            36: in filenames.
        !            37: 
        !            38: =head1 SYNOPSIS
        !            39: 
        !            40: When fed a filename it will replace instances of non-ascii
        !            41: characters with transliterations.
        !            42: 
        !            43: =head1 OVERVIEW
        !            44: 
        !            45: Used to replace non-ascii character(s) with a transliteration
        !            46: of the character(s) to ascii character(s).
        !            47: 
        !            48: If there are preferred replacements for a particular language
        !            49: then those should be included in a separate subroutine which
        !            50: is called before the transliteration of last resort (which is
        !            51: done with Text::Unidecode).
        !            52: 
        !            53: =head1 SUBROUTINES
        !            54: 
        !            55: =cut
        !            56: 
        !            57: package LONCAPA::transliterate;
        !            58: 
        !            59: use strict;
        !            60: use utf8;
        !            61: use Text::Unidecode qw(unidecode);
        !            62: use Encode qw(decode_utf8 encode_utf8);
        !            63: 
        !            64: =pod
        !            65: 
        !            66: =over
        !            67: 
        !            68: =item * fname_to_ascii()
        !            69: 
        !            70: Inputs: $fname (required), $language (optional)
        !            71: 
        !            72: Output: $fname
        !            73: 
        !            74: Replaces non-ascii characters with a transliteration
        !            75: of the character to an ascii character (using Text::Unidecode) 
        !            76: 
        !            77: If the language code is de, transliteration via 
        !            78: german_to_ascii() is used first to handle umlauts and eszett,
        !            79: before using Text::Unidecode.
        !            80: 
        !            81: If other routines are added to support preferred transliteration
        !            82: of non-ascii characters for specific languages, they should be
        !            83: added as new subroutines to this file, and then called if the
        !            84: language code has an appropriate value.
        !            85: 
        !            86: =back
        !            87: 
        !            88: =cut
        !            89: 
        !            90: sub fname_to_ascii {
        !            91:     my ($fname,$language) = @_;
        !            92:     if ($fname =~ /([^\x{00}-\x{7f}])/) {
        !            93:         $fname=&decode_utf8($fname);
        !            94:         if ($language eq 'de') {
        !            95:             $fname = &german_to_ascii($fname);
        !            96:         }
        !            97:         $fname = unidecode($fname);
        !            98:         $fname=&encode_utf8($fname);
        !            99:     }
        !           100:     return $fname;
        !           101: }
        !           102: 
        !           103: =pod 
        !           104: 
        !           105: =over
        !           106: 
        !           107: =item * german_to_ascii()
        !           108: 
        !           109: Input: $fname (required)
        !           110: 
        !           111: Output: $fname
        !           112: 
        !           113: Replaces letters with umlauts with the equivalent letter
        !           114: without an umlaut plus letter e. Case is preserved.
        !           115: 
        !           116: Replaces eszett with double s.
        !           117: 
        !           118: =back
        !           119: 
        !           120: =cut
        !           121: 
        !           122: sub german_to_ascii {
        !           123:     my ($fname) = @_;
        !           124:     my %characters = (
        !           125:                        'Ä' => 'AE',
        !           126:                        'Ö' => 'OE',
        !           127:                        'Ü' => 'UE',
        !           128:                        'ä' => 'ae',
        !           129:                        'ö' => 'oe',
        !           130:                        'ü' => 'ue',
        !           131:                        'ß' => 'ss',
        !           132:                       );
        !           133:     $fname =~ s/([ÄäÖöÜüß])/$characters{$1}/g;
        !           134:     return $fname;
        !           135: }
        !           136: 
        !           137: 1;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>