/[suikacvs]/markup/html/html5/spec-ja/make2.pl
Suika

Contents of /markup/html/html5/spec-ja/make2.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (show annotations) (download)
Sun Nov 2 06:31:30 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +26 -10 lines
File MIME type: text/plain
make

1 #!/usr/bin/perl
2 use strict;
3
4 BEGIN { require 'common.pl' }
5
6 my $source_file_name = shift;
7 my $result_file_name = shift;
8 my $status_file_name = shift;
9
10 my %data;
11 my %pattern;
12
13 my $all = get_all_entries ();
14 for my $key (keys %$all) {
15 for (keys %{$all->{$key}->{exact} or {}}) {
16 my $entry = $all->{$key}->{exact}->{$_};
17 $data{normalize ($entry->{en})} = [$entry->{ja}, $_];
18 }
19
20 for (keys %{$all->{$key}->{pattern} or {}}) {
21 my $entry = $all->{$key}->{pattern}->{$_};
22 $pattern{create_pattern1 (normalize ($entry->{en}))}
23 = [$entry->{ja}, $_];
24 }
25 }
26
27 my @pattern = sort {length $b <=> length $a} keys %pattern;
28
29 my $source_text;
30 {
31 warn "$source_file_name...\n";
32 open my $source_file, '<:utf8', $source_file_name
33 or die "$0: $source_file_name: $!";
34 local $/ = undef;
35 $source_text = <$source_file>;
36 }
37
38 my $part = 'cover';
39
40 my $status = {};
41 my $all_status = {};
42 my $tbt_added = {};
43
44 warn "Generating...\n";
45 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6]|caption)(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|b(?>ody|lockquote)|foot)|h[1-6r]|ul|ol|caption|section)(?>\s[^>]*)?>).)+)}
46 {
47 my ($tag, $text) = ($1, $2);
48
49 my $prefix = '';
50 if ($text =~ s#^(<span class=secno>[^<>]+</span>)##) {
51 $prefix = $1;
52 }
53
54 my $suffix = '';
55 my $suffix2 = '';
56 if ($text =~ s#(<!--(?:(?!-->).)+)$##s) {
57 $suffix = $1 . '-->';
58 $suffix2 = '<!--';
59 }
60 while ($text =~ s#(<!--(?:(?!-->).)+-->\s*)$##s) {
61 $suffix = $1 . $suffix;
62 }
63
64 my $n_text = normalize ($text);
65
66 if ($tag =~ /^<h2 id=(\w+)/) {
67 $part = $1;
68 }
69
70 if (length $n_text) {
71 my $ja_text = $data{$n_text};
72
73 $status->{all}++;
74 $all_status->{$part}->{all}++;
75
76 if (defined $ja_text) {
77 $status->{ja}++;
78 $all_status->{$part}->{ja}++;
79
80 my $jt = $ja_text->[0];
81 $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
82 {<em class=rfc2119 title="$1">$2</em>}gs;
83
84 add_class ($tag, 'has-ja-translation', $ja_text->[1]) .
85 q[<span class=en-original lang=en>] .
86 $prefix . escape_id ($text, 'en-') . $suffix .
87 q[</span>] .
88 q[<span class=ja-translation lang=ja>] .
89 $prefix . $jt . $suffix .
90 q[</span>] .
91 $suffix2;
92 } else {
93 my $v;
94 for my $pattern (@pattern) {
95 if ($n_text =~ /^$pattern$/) {
96 $status->{ja}++;
97 $all_status->{$part}->{ja}++;
98
99 my $jt = replace_pattern2 ($pattern{$pattern}->[0],
100 $1, $2, $3, $4, $5, $6, $7, $8, $9);
101 $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
102 {<em class=rfc2119 title="$1">$2</em>}gs;
103
104 my $real_hash = get_hash ($n_text);
105 $v = add_class ($tag, 'has-ja-translation',
106 $pattern{$pattern}->[1], $real_hash) .
107 q[<span class=en-original lang=en>] .
108 $prefix . escape_id ($text, 'en-') . $suffix .
109 q[</span>] .
110 q[<span class=ja-translation lang=ja>] .
111 $prefix . $jt . $suffix .
112 q[</span>] .
113 $suffix2;
114
115 unless ($tbt_added->{$n_text}) {
116 set_fallback_entry ($real_hash => {en => $text});
117 $tbt_added->{$n_text} = 1;
118 }
119 last;
120 }
121 }
122
123 unless (defined $v) {
124 my $hash = get_hash ($n_text);
125 $v = add_class ($tag, 'no-ja-translation', $hash) .
126 '<span class=en-original lang=en>' .
127 $prefix . $text . $suffix .
128 '</span>' .
129 $suffix2;
130
131 unless ($tbt_added->{$n_text}) {
132 set_fallback_entry ($hash => {en => $text});
133 $tbt_added->{$n_text} = 1;
134 }
135 }
136
137 $v;
138 }
139 } else {
140 $1 . $2;
141 }
142 }ges;
143 $source_text =~ s{(<((?>link|img|script))\s[^>]+>)}{
144 my $tag = $1;
145 my $tag_name = $2;
146 my $n_text = normalize ($tag);
147 my $ja_text = $data{$n_text};
148 if (defined $ja_text) {
149 $ja_text->[0];
150 } else {
151 unless ($tbt_added->{$n_text}) {
152 set_fallback_entry (scalar (get_hash ($n_text)),
153 {en => $tag, tags => [$tag_name . '-tag']});
154 $tbt_added->{$n_text} = 1;
155 }
156 $tag;
157 }
158 }ges;
159
160 #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
161
162 {
163 warn "$result_file_name...\n";
164 open my $result_file, '>:utf8', $result_file_name
165 or die "$0: $result_file_name: $!";
166 print $result_file $source_text;
167 }
168
169 {
170 my $time = time;
171 my @item = ($time, $status->{ja}, $status->{all});
172 for my $part (qw(cover introduction infrastructure dom semantics browsers
173 editing comms syntax rendering no)) {
174 push @item, $all_status->{$part}->{ja};
175 push @item, $all_status->{$part}->{all};
176 }
177
178 open my $status_file, '>>', $status_file_name
179 or die "$0: $status_file_name: $!";
180 print $status_file join "\t", @item;
181 print $status_file "\n";
182 close $status_file;
183 }
184
185 save_fallback_entries ();
186
187 sub add_class ($$$;$) {
188 my $tag = shift;
189 my $new_class = shift; # should not contain bare & and bare "
190 my $hash = shift;
191 my $real_hash = shift;
192 $real_hash = qq[ data-ja-real-hash="$real_hash"] if defined $real_hash;
193
194 if ($tag =~ /^<li\b/) {
195 ## NOTE: This |p| wrapper is necessary, otherwise, if |li| element
196 ## is set to |display: table|, then no list marker is shown.
197 $tag .= qq[<p class="$new_class ja-translation-inserted" data-ja-hash="$hash"$real_hash>];
198 } elsif ($tag =~ /\bclass="/) {
199 $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class" data-ja-hash="$hash"$real_hash/;
200 } elsif ($tag =~ /\bclass=/) {
201 $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class" data-ja-hash="$hash"$real_hash/g;
202 } else {
203 $tag =~ s/>/ class="$new_class" data-ja-hash="$hash"$real_hash>/;
204 }
205
206 return $tag;
207 } # add_class
208
209 sub escape_id ($$) {
210 my $content = shift;
211 my $id_prefix = shift; # should not contain bare & and bare "
212
213 $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
214 my $tag_content = $1;
215
216 if ($tag_content =~ /\bid="/) {
217 $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
218 } elsif ($tag_content =~ /\bid=/) {
219 $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
220 }
221
222 # if ($tag_content =~ /\bhref=#/) {
223 # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
224 # }
225
226 '<' . $tag_content . '>';
227 }ge;
228
229 return $content;
230 } # escape_id
231
232 ## Author: Wakaba <w@suika.fam.cx>.
233 ## License: Copyright 2008 Wakaba. You are granted a license to use,
234 ## reproduce and create derivative works of this script.
235 ## $Date: 2008/10/31 06:32:25 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24