/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Diff of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Thu Jul 3 06:13:03 2008 UTC revision 1.18 by wakaba, Wed Aug 13 15:12:03 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    
4  my $source_file_name = q[.spec.en.html];  BEGIN { require 'common.pl' }
5  my $data_dir_name = q[data/];  
6  my $data_suffix = q[.dat];  my $source_file_name = shift;
7  my $result_file_name = q[non-normative.ja.html.u8];  my $result_file_name = shift;
8    my $status_file_name = shift;
9  sub normalize ($) {  my $tbt_file_name_stem = shift;
   my $s = shift;  
   $s =~ s/\s+/ /g;  
   $s =~ s/^ //;  
   $s =~ s/ $//g;  
   return $s;  
 } # normalize  
   
 sub create_pattern1 ($) {  
   my $s = quotemeta shift;  
   $s =~ s/\\\*/(.+)/g;  
   return $s;  
 } # create_pattern1  
   
 sub replace_pattern2 ($@) {  
   my $s = shift;  
   my @arg = @_;  
   $s =~ s/\$(\d+)/$arg[$1 - 1]/g;  
   return $s;  
 } # replace_pattern2  
10    
11  my %data;  my %data;
12  my %pattern;  my %pattern;
13  {  for_each_data_file (sub ($) {
14    opendir my $data_dir, $data_dir_name or die "$0: $data_dir_name: $!";    my $data_file_name = shift;
15    local $/ = undef;    warn "$data_file_name...\n";
16    for (readdir $data_dir) {    load_data_file ($data_file_name, \%data, \%pattern);
17      next if /^\./; # hidden files  });
     my $data_file_name = qq[$data_dir_name$_];  
     next unless $data_file_name =~ /\Q$data_suffix\E$/;  
     warn "$data_file_name...\n";  
     open my $data_file, '<:utf8', $data_file_name  
         or die "$0: $data_file_name: $!";  
     for (split /\n\n+(?=#)/, scalar <$data_file>) {  
       my ($en, $ja) = split /\n#ja\n/, $_;  
       if ($en =~ s/^#en\n//) {  
         $data{normalize ($en)} = $ja;  
       } elsif ($en =~ s/^#regexp\n//) {  
         $pattern{create_pattern1 (normalize ($en))} = $ja;  
       }  
     }  
   }  
 }  
18  my @pattern = sort {length $b <=> length $a} keys %pattern;  my @pattern = sort {length $b <=> length $a} keys %pattern;
19    
20  my $source_text;  my $source_text;
# Line 60  my $source_text; Line 26  my $source_text;
26    $source_text = <$source_file>;    $source_text = <$source_file>;
27  }  }
28    
29    my $part = 'cover';
30    my $status = {};
31    my $all_status = {};
32    my $tbt_added = {};
33    
34    open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
35        die "$0: $tbt_file_name_stem.dat: $!";
36    open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
37        die "$0: $tbt_file_name_stem-$part.dat: $!";
38    
39  warn "Generating...\n";  warn "Generating...\n";
40  $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t[dh]|h[1-6]|ul|ol)(?>\s[^>]*)?>).)+)}  $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
41  {  {
42    my ($tag, $text) = ($1, $2);    my ($tag, $text) = ($1, $2);
43    my $n_text = normalize ($text);    my $n_text = normalize ($text);
44    my $ja_text = $data{$n_text};  
45    if (defined $ja_text) {    if ($tag =~ /^<h2 id=(\w+)/) {
46      $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];      $part = $1;
47    } else {      open $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
48      my $v = $tag . $text;          die "$0: $tbt_file_name_stem-$part.dat: $!";
49      for my $pattern (@pattern) {    }
50        if ($n_text =~ /^$pattern$/) {    
51          $v = $tag . q[<span class=ja-translation lang=ja>] .    if (length $n_text) {
52              replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .      my $ja_text = $data{$n_text};
53              q[</span>];      
54          last;      $status->{all}++;
55        $all_status->{$part}->{all}++;
56        
57        if (defined $ja_text) {
58          $status->{ja}++;
59          $all_status->{$part}->{ja}++;
60          $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
61        } else {
62          my $v = $tag . $text;
63          my $has_ja;
64          for my $pattern (@pattern) {
65            if ($n_text =~ /^$pattern$/) {
66              $status->{ja}++;
67              $all_status->{$part}->{ja}++;
68              $v = $tag . q[<span class=ja-translation lang=ja>] .
69                  replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
70                  q[</span>];
71              $has_ja = 1;
72              last;
73            }
74          }
75    
76          unless ($has_ja) {
77            $text =~ s/^\s+//;
78            $text =~ s/\s+\z//;
79            $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
80            unless ($tbt_added->{$text}) {
81              print $tbt_file ($text);
82              print $tbt_file ("\n\n");
83              print $part_tbt_file ($text);
84              print $part_tbt_file ("\n\n");
85              $tbt_added->{$text} = 1;
86            }
87        }        }
88    
89          $v;
90      }      }
91      $v;    } else {
92        $1 . $2;
93    }    }
94  }ges;  }ges;
95  $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{  $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
# Line 103  $source_text =~ s{\[\[([A-Z ]+):([^]]+)\ Line 114  $source_text =~ s{\[\[([A-Z ]+):([^]]+)\
114        or die "$0: $result_file_name: $!";        or die "$0: $result_file_name: $!";
115    print $result_file $source_text;    print $result_file $source_text;
116  }  }
117    
118    {
119      my $time = time;
120      my @item = ($time, $status->{ja}, $status->{all});
121      for my $part (qw(cover introduction infrastructure dom semantics browsers
122                       editing comms syntax rendering no)) {
123        push @item, $all_status->{$part}->{ja};
124        push @item, $all_status->{$part}->{all};
125      }
126    
127      open my $status_file, '>>', $status_file_name
128          or die "$0: $status_file_name: $!";
129      print $status_file join "\t", @item;
130      print $status_file "\n";
131      close $status_file;
132    }

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.18

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24