/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Diff of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Fri Jul 11 10:42:26 2008 UTC revision 1.13 by wakaba, Sun Aug 10 04:16:33 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    
4    BEGIN { require 'common.pl' }
5    
6  my $source_file_name = shift;  my $source_file_name = shift;
 my $data_dir_name = q[data/];  
 my $data_suffix = q[.dat];  
7  my $result_file_name = shift;  my $result_file_name = shift;
8    my $status_file_name = shift;
9  sub normalize ($) {  my $tbt_file_name = shift;
   my $s = shift;  
   $s =~ s/\s+/ /g;  
   $s =~ s/^ //;  
   $s =~ s/ $//g;  
   return $s;  
 } # normalize  
   
 sub create_pattern1 ($) {  
   my $s = quotemeta shift;  
   $s =~ s/\\\*/(.+)/g;  
   return $s;  
 } # create_pattern1  
   
 sub replace_pattern2 ($@) {  
   my $s = shift;  
   my @arg = @_;  
   $s =~ s/\$(\d+)/$arg[$1 - 1]/g;  
   return $s;  
 } # replace_pattern2  
10    
11  my %data;  my %data;
12  my %pattern;  my %pattern;
13  {  for_each_data_file (sub ($) {
14    opendir my $data_dir, $data_dir_name or die "$0: $data_dir_name: $!";    my $data_file_name = shift;
15    local $/ = undef;    warn "$data_file_name...\n";
16    for (readdir $data_dir) {    load_data_file ($data_file_name, \%data, \%pattern);
17      next if /^\./; # hidden files  });
     my $data_file_name = qq[$data_dir_name$_];  
     next unless $data_file_name =~ /\Q$data_suffix\E$/;  
     warn "$data_file_name...\n";  
     open my $data_file, '<:utf8', $data_file_name  
         or die "$0: $data_file_name: $!";  
     for (split /\n\n+(?=#)/, scalar <$data_file>) {  
       my ($en, $ja) = split /\n#ja\n/, $_;  
       if ($en =~ s/^#en\n//) {  
         $data{normalize ($en)} = $ja;  
       } elsif ($en =~ s/^#pattern\n//) {  
         $pattern{create_pattern1 (normalize ($en))} = $ja;  
       }  
     }  
   }  
 }  
18  my @pattern = sort {length $b <=> length $a} keys %pattern;  my @pattern = sort {length $b <=> length $a} keys %pattern;
19    
20  my $source_text;  my $source_text;
# Line 60  my $source_text; Line 26  my $source_text;
26    $source_text = <$source_file>;    $source_text = <$source_file>;
27  }  }
28    
29    open my $tbt_file, '>:utf8', $tbt_file_name or die "$0: $tbt_file_name: $!";
30    
31    my $status = {};
32    
33  warn "Generating...\n";  warn "Generating...\n";
34  $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t[dh]|h[1-6]|ul|ol)(?>\s[^>]*)?>).)+)}  $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
35  {  {
36    my ($tag, $text) = ($1, $2);    my ($tag, $text) = ($1, $2);
37    my $n_text = normalize ($text);    my $n_text = normalize ($text);
38    my $ja_text = $data{$n_text};    
39    if (defined $ja_text) {    if (length $n_text) {
40      $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];      my $ja_text = $data{$n_text};
41    } else {      
42      my $v = $tag . $text;      $status->{all}++;
43      for my $pattern (@pattern) {      
44        if ($n_text =~ /^$pattern$/) {      if (defined $ja_text) {
45          $v = $tag . q[<span class=ja-translation lang=ja>] .        $status->{ja}++;
46              replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .        $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
47              q[</span>];      } else {
48          last;        my $v = $tag . $text;
49          my $has_ja;
50          for my $pattern (@pattern) {
51            if ($n_text =~ /^$pattern$/) {
52              $status->{ja}++;
53              $v = $tag . q[<span class=ja-translation lang=ja>] .
54                  replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
55                  q[</span>];
56              $has_ja = 1;
57              last;
58            }
59          }
60    
61          unless ($has_ja) {
62            $text =~ s/^\s+//;
63            $text =~ s/\s+\z//;
64            $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
65            print $tbt_file $text;
66            print $tbt_file "\n\n";
67        }        }
68    
69          $v;
70      }      }
71      $v;    } else {
72        $1 . $2;
73    }    }
74  }ges;  }ges;
75  $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{  $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
# Line 103  $source_text =~ s{\[\[([A-Z ]+):([^]]+)\ Line 94  $source_text =~ s{\[\[([A-Z ]+):([^]]+)\
94        or die "$0: $result_file_name: $!";        or die "$0: $result_file_name: $!";
95    print $result_file $source_text;    print $result_file $source_text;
96  }  }
97    
98    {
99      my $time = time;
100      open my $status_file, '>>', $status_file_name
101          or die "$0: $status_file_name: $!";
102      print $status_file "$time\t$status->{ja}\t$status->{all}\n";
103    }

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.13

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24