/[suikacvs]/markup/html/html5/spec-ja/make2.pl
Suika

Contents of /markup/html/html5/spec-ja/make2.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Sun Nov 2 06:31:30 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +26 -10 lines
File MIME type: text/plain
make

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     BEGIN { require 'common.pl' }
5    
6     my $source_file_name = shift;
7     my $result_file_name = shift;
8     my $status_file_name = shift;
9    
10     my %data;
11     my %pattern;
12    
13     my $all = get_all_entries ();
14     for my $key (keys %$all) {
15     for (keys %{$all->{$key}->{exact} or {}}) {
16     my $entry = $all->{$key}->{exact}->{$_};
17     $data{normalize ($entry->{en})} = [$entry->{ja}, $_];
18     }
19    
20     for (keys %{$all->{$key}->{pattern} or {}}) {
21     my $entry = $all->{$key}->{pattern}->{$_};
22     $pattern{create_pattern1 (normalize ($entry->{en}))}
23     = [$entry->{ja}, $_];
24     }
25     }
26    
27     my @pattern = sort {length $b <=> length $a} keys %pattern;
28    
29     my $source_text;
30     {
31     warn "$source_file_name...\n";
32     open my $source_file, '<:utf8', $source_file_name
33     or die "$0: $source_file_name: $!";
34     local $/ = undef;
35     $source_text = <$source_file>;
36     }
37    
38     my $part = 'cover';
39    
40     my $status = {};
41     my $all_status = {};
42     my $tbt_added = {};
43    
44     warn "Generating...\n";
45 wakaba 1.8 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6]|caption)(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|b(?>ody|lockquote)|foot)|h[1-6r]|ul|ol|caption|section)(?>\s[^>]*)?>).)+)}
46 wakaba 1.1 {
47     my ($tag, $text) = ($1, $2);
48 wakaba 1.9
49 wakaba 1.1 my $prefix = '';
50     if ($text =~ s#^(<span class=secno>[^<>]+</span>)##) {
51     $prefix = $1;
52     }
53 wakaba 1.9
54     my $suffix = '';
55     my $suffix2 = '';
56     if ($text =~ s#(<!--(?:(?!-->).)+)$##s) {
57     $suffix = $1 . '-->';
58     $suffix2 = '<!--';
59     }
60     while ($text =~ s#(<!--(?:(?!-->).)+-->\s*)$##s) {
61     $suffix = $1 . $suffix;
62     }
63    
64 wakaba 1.1 my $n_text = normalize ($text);
65    
66     if ($tag =~ /^<h2 id=(\w+)/) {
67     $part = $1;
68     }
69    
70     if (length $n_text) {
71     my $ja_text = $data{$n_text};
72    
73     $status->{all}++;
74     $all_status->{$part}->{all}++;
75    
76     if (defined $ja_text) {
77     $status->{ja}++;
78     $all_status->{$part}->{ja}++;
79 wakaba 1.7
80     my $jt = $ja_text->[0];
81     $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
82     {<em class=rfc2119 title="$1">$2</em>}gs;
83    
84 wakaba 1.1 add_class ($tag, 'has-ja-translation', $ja_text->[1]) .
85     q[<span class=en-original lang=en>] .
86 wakaba 1.9 $prefix . escape_id ($text, 'en-') . $suffix .
87     q[</span>] .
88 wakaba 1.1 q[<span class=ja-translation lang=ja>] .
89 wakaba 1.9 $prefix . $jt . $suffix .
90     q[</span>] .
91     $suffix2;
92 wakaba 1.1 } else {
93     my $v;
94     for my $pattern (@pattern) {
95     if ($n_text =~ /^$pattern$/) {
96     $status->{ja}++;
97     $all_status->{$part}->{ja}++;
98 wakaba 1.7
99     my $jt = replace_pattern2 ($pattern{$pattern}->[0],
100     $1, $2, $3, $4, $5, $6, $7, $8, $9);
101     $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
102     {<em class=rfc2119 title="$1">$2</em>}gs;
103    
104 wakaba 1.2 my $real_hash = get_hash ($n_text);
105 wakaba 1.1 $v = add_class ($tag, 'has-ja-translation',
106 wakaba 1.2 $pattern{$pattern}->[1], $real_hash) .
107 wakaba 1.1 q[<span class=en-original lang=en>] .
108 wakaba 1.9 $prefix . escape_id ($text, 'en-') . $suffix .
109     q[</span>] .
110 wakaba 1.1 q[<span class=ja-translation lang=ja>] .
111 wakaba 1.9 $prefix . $jt . $suffix .
112     q[</span>] .
113     $suffix2;
114 wakaba 1.2
115     unless ($tbt_added->{$n_text}) {
116     set_fallback_entry ($real_hash => {en => $text});
117     $tbt_added->{$n_text} = 1;
118     }
119 wakaba 1.1 last;
120     }
121     }
122    
123     unless (defined $v) {
124     my $hash = get_hash ($n_text);
125 wakaba 1.2 $v = add_class ($tag, 'no-ja-translation', $hash) .
126     '<span class=en-original lang=en>' .
127 wakaba 1.9 $prefix . $text . $suffix .
128     '</span>' .
129     $suffix2;
130 wakaba 1.1
131 wakaba 1.2 unless ($tbt_added->{$n_text}) {
132 wakaba 1.1 set_fallback_entry ($hash => {en => $text});
133 wakaba 1.2 $tbt_added->{$n_text} = 1;
134 wakaba 1.1 }
135     }
136    
137     $v;
138     }
139     } else {
140     $1 . $2;
141     }
142     }ges;
143 wakaba 1.5 $source_text =~ s{(<((?>link|img|script))\s[^>]+>)}{
144 wakaba 1.1 my $tag = $1;
145 wakaba 1.5 my $tag_name = $2;
146 wakaba 1.1 my $n_text = normalize ($tag);
147     my $ja_text = $data{$n_text};
148     if (defined $ja_text) {
149     $ja_text->[0];
150     } else {
151 wakaba 1.5 unless ($tbt_added->{$n_text}) {
152     set_fallback_entry (scalar (get_hash ($n_text)),
153     {en => $tag, tags => [$tag_name . '-tag']});
154     $tbt_added->{$n_text} = 1;
155     }
156 wakaba 1.1 $tag;
157     }
158     }ges;
159    
160     #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
161    
162     {
163     warn "$result_file_name...\n";
164     open my $result_file, '>:utf8', $result_file_name
165     or die "$0: $result_file_name: $!";
166     print $result_file $source_text;
167     }
168    
169     {
170     my $time = time;
171     my @item = ($time, $status->{ja}, $status->{all});
172     for my $part (qw(cover introduction infrastructure dom semantics browsers
173     editing comms syntax rendering no)) {
174     push @item, $all_status->{$part}->{ja};
175     push @item, $all_status->{$part}->{all};
176     }
177    
178     open my $status_file, '>>', $status_file_name
179     or die "$0: $status_file_name: $!";
180     print $status_file join "\t", @item;
181     print $status_file "\n";
182     close $status_file;
183     }
184    
185     save_fallback_entries ();
186    
187 wakaba 1.2 sub add_class ($$$;$) {
188 wakaba 1.1 my $tag = shift;
189     my $new_class = shift; # should not contain bare & and bare "
190     my $hash = shift;
191 wakaba 1.2 my $real_hash = shift;
192     $real_hash = qq[ data-ja-real-hash="$real_hash"] if defined $real_hash;
193 wakaba 1.1
194     if ($tag =~ /^<li\b/) {
195     ## NOTE: This |p| wrapper is necessary, otherwise, if |li| element
196     ## is set to |display: table|, then no list marker is shown.
197 wakaba 1.2 $tag .= qq[<p class="$new_class ja-translation-inserted" data-ja-hash="$hash"$real_hash>];
198 wakaba 1.1 } elsif ($tag =~ /\bclass="/) {
199 wakaba 1.2 $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class" data-ja-hash="$hash"$real_hash/;
200 wakaba 1.1 } elsif ($tag =~ /\bclass=/) {
201 wakaba 1.2 $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class" data-ja-hash="$hash"$real_hash/g;
202 wakaba 1.1 } else {
203 wakaba 1.2 $tag =~ s/>/ class="$new_class" data-ja-hash="$hash"$real_hash>/;
204 wakaba 1.1 }
205    
206     return $tag;
207     } # add_class
208    
209     sub escape_id ($$) {
210     my $content = shift;
211     my $id_prefix = shift; # should not contain bare & and bare "
212    
213     $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
214     my $tag_content = $1;
215    
216     if ($tag_content =~ /\bid="/) {
217     $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
218     } elsif ($tag_content =~ /\bid=/) {
219     $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
220     }
221    
222     # if ($tag_content =~ /\bhref=#/) {
223     # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
224     # }
225    
226     '<' . $tag_content . '>';
227     }ge;
228    
229     return $content;
230     } # escape_id
231 wakaba 1.6
232     ## Author: Wakaba <w@suika.fam.cx>.
233     ## License: Copyright 2008 Wakaba. You are granted a license to use,
234     ## reproduce and create derivative works of this script.
235 wakaba 1.9 ## $Date: 2008/10/31 06:32:25 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24