/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Diff of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Thu Jul 3 06:13:03 2008 UTC revision 1.16 by wakaba, Wed Aug 13 10:00:09 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    
4  my $source_file_name = q[.spec.en.html];  BEGIN { require 'common.pl' }
5  my $data_dir_name = q[data/];  
6  my $data_suffix = q[.dat];  my $source_file_name = shift;
7  my $result_file_name = q[non-normative.ja.html.u8];  my $result_file_name = shift;
8    my $status_file_name = shift;
9  sub normalize ($) {  my $tbt_file_name_stem = shift;
   my $s = shift;  
   $s =~ s/\s+/ /g;  
   $s =~ s/^ //;  
   $s =~ s/ $//g;  
   return $s;  
 } # normalize  
   
 sub create_pattern1 ($) {  
   my $s = quotemeta shift;  
   $s =~ s/\\\*/(.+)/g;  
   return $s;  
 } # create_pattern1  
   
 sub replace_pattern2 ($@) {  
   my $s = shift;  
   my @arg = @_;  
   $s =~ s/\$(\d+)/$arg[$1 - 1]/g;  
   return $s;  
 } # replace_pattern2  
10    
11  my %data;  my %data;
12  my %pattern;  my %pattern;
13  {  my %id_variant_pattern;
14    opendir my $data_dir, $data_dir_name or die "$0: $data_dir_name: $!";  for_each_data_file (sub ($) {
15    local $/ = undef;    my $data_file_name = shift;
16    for (readdir $data_dir) {    warn "$data_file_name...\n";
17      next if /^\./; # hidden files    load_data_file ($data_file_name, \%data, \%pattern, \%id_variant_pattern);
18      my $data_file_name = qq[$data_dir_name$_];  });
     next unless $data_file_name =~ /\Q$data_suffix\E$/;  
     warn "$data_file_name...\n";  
     open my $data_file, '<:utf8', $data_file_name  
         or die "$0: $data_file_name: $!";  
     for (split /\n\n+(?=#)/, scalar <$data_file>) {  
       my ($en, $ja) = split /\n#ja\n/, $_;  
       if ($en =~ s/^#en\n//) {  
         $data{normalize ($en)} = $ja;  
       } elsif ($en =~ s/^#regexp\n//) {  
         $pattern{create_pattern1 (normalize ($en))} = $ja;  
       }  
     }  
   }  
 }  
19  my @pattern = sort {length $b <=> length $a} keys %pattern;  my @pattern = sort {length $b <=> length $a} keys %pattern;
20    
21  my $source_text;  my $source_text;
# Line 60  my $source_text; Line 27  my $source_text;
27    $source_text = <$source_file>;    $source_text = <$source_file>;
28  }  }
29    
30    my $part = 'cover';
31    my $status = {};
32    my $all_status = {};
33    my $tbt_added = {};
34    
35    open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
36        die "$0: $tbt_file_name_stem.dat: $!";
37    open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
38        die "$0: $tbt_file_name_stem-$part.dat: $!";
39    
40  warn "Generating...\n";  warn "Generating...\n";
41  $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t[dh]|h[1-6]|ul|ol)(?>\s[^>]*)?>).)+)}  $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
42  {  {
43    my ($tag, $text) = ($1, $2);    my ($tag, $text) = ($1, $2);
44    my $n_text = normalize ($text);    my $n_text = normalize ($text);
45    my $ja_text = $data{$n_text};  
46    if (defined $ja_text) {    if ($tag =~ /^<h2 id=(\w+)/) {
47      $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];      $part = $1;
48    } else {      open $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
49      my $v = $tag . $text;          die "$0: $tbt_file_name_stem-$part.dat: $!";
50      for my $pattern (@pattern) {    }
51        if ($n_text =~ /^$pattern$/) {    
52          $v = $tag . q[<span class=ja-translation lang=ja>] .    if (length $n_text) {
53              replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .      my $ja_text = $data{$n_text};
54              q[</span>];      
55          last;      $status->{all}++;
56        $all_status->{$part}->{all}++;
57        
58        if (defined $ja_text) {
59          $status->{ja}++;
60          $all_status->{$part}->{ja}++;
61          $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
62        } else {
63          my $v = $tag . $text;
64          my $has_ja;
65          for my $pattern (@pattern) {
66            if ($n_text =~ /^$pattern$/) {
67              $status->{ja}++;
68              $v = $tag . q[<span class=ja-translation lang=ja>] .
69                  replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
70                  q[</span>];
71              $has_ja = 1;
72              last;
73            }
74          }
75    
76          if ($n_text =~ /\bhref="#/) {
77            for my $pattern (keys %id_variant_pattern) {
78              if ($n_text =~ /^$pattern$/) {
79                $status->{ja}++;
80                my $id_map = {};
81                my $old_ids = $id_variant_pattern{$pattern}->[1];
82                for (0..$#$old_ids) {
83                  $id_map->{$old_ids->[$_]}
84                      = substr ($n_text, $-[$_ + 1], $+[$_ + 1] - $-[$_ + 1]);
85                }
86                my $w = $id_variant_pattern{$pattern}->[0];
87                for (keys %$id_map) {
88                  $w =~ s/\bhref="#([^"]+)"/href="#$id_map->{$1}"/g;
89                }
90    
91                $v = $tag . q[<span class=ja-translation lang=ja>] .
92                    $w .
93                    q[</span>];
94                $has_ja = 1;
95    
96                my $o = $id_variant_pattern{$pattern}->[2];
97                for (keys %$id_map) {
98                  $o =~ s/\bhref="#([^"]+)"/href="#$id_map->{$1}"/g;
99                }
100                $data{$o} ||= $w;
101                delete $id_variant_pattern{$pattern};
102              
103    warn $o;
104    
105                last;
106              }
107            }
108        }        }
109    
110          unless ($has_ja) {
111            $text =~ s/^\s+//;
112            $text =~ s/\s+\z//;
113            $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
114            unless ($tbt_added->{$text}) {
115              print $tbt_file ($text);
116              print $tbt_file ("\n\n");
117              print $part_tbt_file ($text);
118              print $part_tbt_file ("\n\n");
119              $tbt_added->{$text} = 1;
120            }
121          }
122    
123          $v;
124      }      }
125      $v;    } else {
126        $1 . $2;
127    }    }
128  }ges;  }ges;
129  $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{  $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
# Line 103  $source_text =~ s{\[\[([A-Z ]+):([^]]+)\ Line 148  $source_text =~ s{\[\[([A-Z ]+):([^]]+)\
148        or die "$0: $result_file_name: $!";        or die "$0: $result_file_name: $!";
149    print $result_file $source_text;    print $result_file $source_text;
150  }  }
151    
152    {
153      my $time = time;
154      my @item = ($time, $status->{ja}, $status->{all});
155      for my $part (qw(cover introduction infrastructure dom semantics browsers
156                       editing comms syntax rendering no)) {
157        push @item, $all_status->{$part}->{ja};
158        push @item, $all_status->{$part}->{all};
159      }
160    
161      open my $status_file, '>>', $status_file_name
162          or die "$0: $status_file_name: $!";
163      print $status_file join "\t", @item;
164      print $status_file "\n";
165      close $status_file;
166    }

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.16

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24