/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Contents of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Sat Jul 12 04:12:00 2008 UTC (17 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +15 -0 lines
File MIME type: text/plain
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.6 my $source_file_name = shift;
5 wakaba 1.1 my $data_dir_name = q[data/];
6     my $data_suffix = q[.dat];
7 wakaba 1.6 my $result_file_name = shift;
8 wakaba 1.9 my $status_file_name = shift;
9 wakaba 1.1
10     sub normalize ($) {
11     my $s = shift;
12     $s =~ s/\s+/ /g;
13     $s =~ s/^ //;
14     $s =~ s/ $//g;
15     return $s;
16     } # normalize
17    
18     sub create_pattern1 ($) {
19     my $s = quotemeta shift;
20 wakaba 1.3 $s =~ s/\\\*/(.+)/g;
21 wakaba 1.1 return $s;
22     } # create_pattern1
23    
24     sub replace_pattern2 ($@) {
25     my $s = shift;
26     my @arg = @_;
27     $s =~ s/\$(\d+)/$arg[$1 - 1]/g;
28     return $s;
29     } # replace_pattern2
30    
31     my %data;
32     my %pattern;
33     {
34     opendir my $data_dir, $data_dir_name or die "$0: $data_dir_name: $!";
35     local $/ = undef;
36     for (readdir $data_dir) {
37     next if /^\./; # hidden files
38     my $data_file_name = qq[$data_dir_name$_];
39     next unless $data_file_name =~ /\Q$data_suffix\E$/;
40     warn "$data_file_name...\n";
41     open my $data_file, '<:utf8', $data_file_name
42     or die "$0: $data_file_name: $!";
43 wakaba 1.8 my $data = <$data_file>;
44     $data =~ s/\x0D?\x0A/\n/g;
45     for (split /\n\n+(?=#)/, $data) {
46 wakaba 1.1 my ($en, $ja) = split /\n#ja\n/, $_;
47     if ($en =~ s/^#en\n//) {
48     $data{normalize ($en)} = $ja;
49 wakaba 1.7 } elsif ($en =~ s/^#pattern\n//) {
50 wakaba 1.1 $pattern{create_pattern1 (normalize ($en))} = $ja;
51     }
52     }
53     }
54     }
55 wakaba 1.2 my @pattern = sort {length $b <=> length $a} keys %pattern;
56 wakaba 1.1
57     my $source_text;
58     {
59     warn "$source_file_name...\n";
60     open my $source_file, '<:utf8', $source_file_name
61     or die "$0: $source_file_name: $!";
62     local $/ = undef;
63     $source_text = <$source_file>;
64     }
65    
66 wakaba 1.9 my $status = {};
67    
68 wakaba 1.1 warn "Generating...\n";
69     $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t[dh]|h[1-6]|ul|ol)(?>\s[^>]*)?>).)+)}
70     {
71     my ($tag, $text) = ($1, $2);
72     my $n_text = normalize ($text);
73     my $ja_text = $data{$n_text};
74 wakaba 1.9
75     $status->{all}++;
76    
77 wakaba 1.1 if (defined $ja_text) {
78 wakaba 1.9 $status->{ja}++;
79 wakaba 1.1 $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
80     } else {
81     my $v = $tag . $text;
82 wakaba 1.2 for my $pattern (@pattern) {
83 wakaba 1.1 if ($n_text =~ /^$pattern$/) {
84 wakaba 1.9 $status->{ja}++;
85 wakaba 1.1 $v = $tag . q[<span class=ja-translation lang=ja>] .
86     replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
87     q[</span>];
88 wakaba 1.5 last;
89 wakaba 1.1 }
90     }
91     $v;
92     }
93     }ges;
94     $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
95     my $tag = $1;
96     my $n_text = normalize ($tag);
97     my $ja_text = $data{$n_text};
98     if (defined $ja_text) {
99     $ja_text;
100     } else {
101     $tag;
102     }
103     }ges;
104    
105 wakaba 1.4 $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
106 wakaba 1.1 {<em class=rfc2119 title="$1">$2</em>}gs;
107    
108     #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
109    
110     {
111     warn "$result_file_name...\n";
112     open my $result_file, '>:utf8', $result_file_name
113     or die "$0: $result_file_name: $!";
114     print $result_file $source_text;
115 wakaba 1.9 }
116    
117     {
118     my $time = time;
119     open my $status_file, '>>', $status_file_name
120     or die "$0: $status_file_name: $!";
121     print $status_file "$time\t$status->{ja}\t$status->{all}\n";
122 wakaba 1.1 }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24