Annotation of modules/damieng/clean_xml/clean_xml.pl, revision 1.1

1.1     ! damieng     1: #!/usr/bin/perl
        !             2: 
        !             3: use strict;
        !             4: use utf8;
        !             5: use warnings;
        !             6: 
        !             7: use File::Basename;
        !             8: use Try::Tiny;
        !             9: 
        !            10: use lib dirname(__FILE__);
        !            11: 
        !            12: use pre_xml;
        !            13: use html_to_xml;
        !            14: use post_xml;
        !            15: 
        !            16: 
        !            17: binmode(STDOUT, ':encoding(UTF-8)');
        !            18: 
        !            19: if (scalar(@ARGV) != 1) {
        !            20:   print STDERR "Usage: perl clean_xml.pl file|directory\n";
        !            21:   exit(1);
        !            22: }
        !            23: 
        !            24: # find the command-line argument encoding
        !            25: use I18N::Langinfo qw(langinfo CODESET);
        !            26: my $codeset = langinfo(CODESET);
        !            27: use Encode qw(decode);
        !            28: @ARGV = map { decode $codeset, $_ } @ARGV;
        !            29: 
        !            30: my $pathname = "$ARGV[0]";
        !            31: if (-d "$pathname") {
        !            32:   $pathname =~ s/\/$//;
        !            33:   my $start = time();
        !            34:   my ($converted, $failures) = convert_dir($pathname);
        !            35:   my $end = time();
        !            36:   my $elapsed = $end - $start;
        !            37:   my $minutes = int($elapsed / 60);
        !            38:   my $seconds = $elapsed - ($minutes*60);
        !            39:   print "\n".scalar(@$converted)." files were converted in $minutes minutes $seconds seconds\n";
        !            40:   if (scalar(@$failures) > 0) {
        !            41:     print "\n".scalar(@$failures)." files need a manual fix:\n";
        !            42:     foreach my $failure (@$failures) {
        !            43:       print "  $failure\n";
        !            44:     }
        !            45:   }
        !            46: } elsif (-f $pathname) {
        !            47:   convert_file($pathname);
        !            48: }
        !            49: 
        !            50: # Converts a directory recursively, selecting only non-version .problem/exam/survey/html/library files.
        !            51: # Returns a list of files that were converted, and a list of files that could not be converted.
        !            52: sub convert_dir {
        !            53:   my ($dirpath) = @_;
        !            54:   
        !            55:   my @converted = ();
        !            56:   my @failures = ();
        !            57:   opendir (my $dh, $dirpath) or die $!;
        !            58:   while (my $entry = readdir($dh)) {
        !            59:     next if ($entry =~ m/^\./); # ignore entries starting with a period
        !            60:     my $pathname = $dirpath.'/'.$entry;
        !            61:     if (-d $pathname) {
        !            62:       my ($new_converted, $new_failures) = convert_dir($pathname);
        !            63:       push(@converted, @$new_converted);
        !            64:       push(@failures, @$new_failures);
        !            65:     } elsif (-f $pathname) {
        !            66:       # check that the file ends in .problem, .exam, .survey, .html or .htm but not .number.*
        !            67:       if (($pathname =~ /\.problem$/i || $pathname =~ /\.exam$/i || $pathname =~ /\.survey$/i ||
        !            68:           $pathname =~ /\.html?$/i || $pathname =~ /\.library$/i) &&
        !            69:           $pathname !~ /\.[0-9]+\.[a-z]+$/) {
        !            70:         try {
        !            71:           convert_file($pathname);
        !            72:           push(@converted, $pathname);
        !            73:         } catch {
        !            74:           print "$_\n"; # continue processing even if a file cannot be converted
        !            75:           push(@failures, $pathname);
        !            76:         };
        !            77:       }
        !            78:     }
        !            79:   }
        !            80:   closedir($dh);
        !            81:   return((\@converted, \@failures));
        !            82: }
        !            83: 
        !            84: # Converts a file, creating a .xml file in the same directory.
        !            85: sub convert_file {
        !            86:   my ($pathname) = @_;
        !            87: 
        !            88:   # create a name for the new file
        !            89:   my $newpath = $pathname.'.xml';
        !            90: 
        !            91:   print "converting $pathname...\n";
        !            92: 
        !            93:   my $textref;
        !            94:   try {
        !            95:     $textref = pre_xml::pre_xml($pathname);
        !            96:   } catch {
        !            97:     die "pre_xml error for $pathname: $_";
        !            98:   };
        !            99: 
        !           100:   try {
        !           101:     $textref = html_to_xml::html_to_xml($textref);
        !           102:   } catch {
        !           103:     die "html_to_xml error for $pathname: $_";
        !           104:   };
        !           105: 
        !           106:   try {
        !           107:     post_xml::post_xml($textref, $newpath);
        !           108:   } catch {
        !           109:     die "post_xml error for $pathname: $_";
        !           110:   };
        !           111: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>