/[suikacvs]/markup/html/html5/spec-ja/make2.pl
Suika

Contents of /markup/html/html5/spec-ja/make2.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Fri Oct 31 06:32:25 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +2 -2 lines
File MIME type: text/plain
Minor fixes

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     BEGIN { require 'common.pl' }
5    
6     my $source_file_name = shift;
7     my $result_file_name = shift;
8     my $status_file_name = shift;
9    
10     my %data;
11     my %pattern;
12    
13     my $all = get_all_entries ();
14     for my $key (keys %$all) {
15     for (keys %{$all->{$key}->{exact} or {}}) {
16     my $entry = $all->{$key}->{exact}->{$_};
17     $data{normalize ($entry->{en})} = [$entry->{ja}, $_];
18     }
19    
20     for (keys %{$all->{$key}->{pattern} or {}}) {
21     my $entry = $all->{$key}->{pattern}->{$_};
22     $pattern{create_pattern1 (normalize ($entry->{en}))}
23     = [$entry->{ja}, $_];
24     }
25     }
26    
27     my @pattern = sort {length $b <=> length $a} keys %pattern;
28    
29     my $source_text;
30     {
31     warn "$source_file_name...\n";
32     open my $source_file, '<:utf8', $source_file_name
33     or die "$0: $source_file_name: $!";
34     local $/ = undef;
35     $source_text = <$source_file>;
36     }
37    
38     my $part = 'cover';
39    
40     my $status = {};
41     my $all_status = {};
42     my $tbt_added = {};
43    
44     warn "Generating...\n";
45 wakaba 1.8 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6]|caption)(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|b(?>ody|lockquote)|foot)|h[1-6r]|ul|ol|caption|section)(?>\s[^>]*)?>).)+)}
46 wakaba 1.1 {
47     my ($tag, $text) = ($1, $2);
48     my $prefix = '';
49     if ($text =~ s#^(<span class=secno>[^<>]+</span>)##) {
50     $prefix = $1;
51     }
52     my $n_text = normalize ($text);
53    
54     if ($tag =~ /^<h2 id=(\w+)/) {
55     $part = $1;
56     }
57    
58     if (length $n_text) {
59     my $ja_text = $data{$n_text};
60    
61     $status->{all}++;
62     $all_status->{$part}->{all}++;
63    
64     if (defined $ja_text) {
65     $status->{ja}++;
66     $all_status->{$part}->{ja}++;
67 wakaba 1.7
68     my $jt = $ja_text->[0];
69     $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
70     {<em class=rfc2119 title="$1">$2</em>}gs;
71    
72 wakaba 1.1 add_class ($tag, 'has-ja-translation', $ja_text->[1]) .
73     q[<span class=en-original lang=en>] .
74     $prefix .
75     escape_id ($text, 'en-') . q[</span>] .
76     q[<span class=ja-translation lang=ja>] .
77 wakaba 1.7 $prefix . $jt . q[</span>];
78 wakaba 1.1 } else {
79     my $v;
80     for my $pattern (@pattern) {
81     if ($n_text =~ /^$pattern$/) {
82     $status->{ja}++;
83     $all_status->{$part}->{ja}++;
84 wakaba 1.7
85     my $jt = replace_pattern2 ($pattern{$pattern}->[0],
86     $1, $2, $3, $4, $5, $6, $7, $8, $9);
87     $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
88     {<em class=rfc2119 title="$1">$2</em>}gs;
89    
90 wakaba 1.2 my $real_hash = get_hash ($n_text);
91 wakaba 1.1 $v = add_class ($tag, 'has-ja-translation',
92 wakaba 1.2 $pattern{$pattern}->[1], $real_hash) .
93 wakaba 1.1 q[<span class=en-original lang=en>] .
94     $prefix .
95     escape_id ($text, 'en-') . q[</span>] .
96     q[<span class=ja-translation lang=ja>] .
97 wakaba 1.7 $prefix . $jt .
98 wakaba 1.1 q[</span>];
99 wakaba 1.2
100     unless ($tbt_added->{$n_text}) {
101     set_fallback_entry ($real_hash => {en => $text});
102     $tbt_added->{$n_text} = 1;
103     }
104 wakaba 1.1 last;
105     }
106     }
107    
108     unless (defined $v) {
109     my $hash = get_hash ($n_text);
110 wakaba 1.2 $v = add_class ($tag, 'no-ja-translation', $hash) .
111     '<span class=en-original lang=en>' .
112     $prefix . $text .
113     '</span>';
114 wakaba 1.1
115 wakaba 1.2 unless ($tbt_added->{$n_text}) {
116 wakaba 1.1 set_fallback_entry ($hash => {en => $text});
117 wakaba 1.2 $tbt_added->{$n_text} = 1;
118 wakaba 1.1 }
119     }
120    
121     $v;
122     }
123     } else {
124     $1 . $2;
125     }
126     }ges;
127 wakaba 1.5 $source_text =~ s{(<((?>link|img|script))\s[^>]+>)}{
128 wakaba 1.1 my $tag = $1;
129 wakaba 1.5 my $tag_name = $2;
130 wakaba 1.1 my $n_text = normalize ($tag);
131     my $ja_text = $data{$n_text};
132     if (defined $ja_text) {
133     $ja_text->[0];
134     } else {
135 wakaba 1.5 unless ($tbt_added->{$n_text}) {
136     set_fallback_entry (scalar (get_hash ($n_text)),
137     {en => $tag, tags => [$tag_name . '-tag']});
138     $tbt_added->{$n_text} = 1;
139     }
140 wakaba 1.1 $tag;
141     }
142     }ges;
143    
144     #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
145    
146     {
147     warn "$result_file_name...\n";
148     open my $result_file, '>:utf8', $result_file_name
149     or die "$0: $result_file_name: $!";
150     print $result_file $source_text;
151     }
152    
153     {
154     my $time = time;
155     my @item = ($time, $status->{ja}, $status->{all});
156     for my $part (qw(cover introduction infrastructure dom semantics browsers
157     editing comms syntax rendering no)) {
158     push @item, $all_status->{$part}->{ja};
159     push @item, $all_status->{$part}->{all};
160     }
161    
162     open my $status_file, '>>', $status_file_name
163     or die "$0: $status_file_name: $!";
164     print $status_file join "\t", @item;
165     print $status_file "\n";
166     close $status_file;
167     }
168    
169     save_fallback_entries ();
170    
171 wakaba 1.2 sub add_class ($$$;$) {
172 wakaba 1.1 my $tag = shift;
173     my $new_class = shift; # should not contain bare & and bare "
174     my $hash = shift;
175 wakaba 1.2 my $real_hash = shift;
176     $real_hash = qq[ data-ja-real-hash="$real_hash"] if defined $real_hash;
177 wakaba 1.1
178     if ($tag =~ /^<li\b/) {
179     ## NOTE: This |p| wrapper is necessary, otherwise, if |li| element
180     ## is set to |display: table|, then no list marker is shown.
181 wakaba 1.2 $tag .= qq[<p class="$new_class ja-translation-inserted" data-ja-hash="$hash"$real_hash>];
182 wakaba 1.1 } elsif ($tag =~ /\bclass="/) {
183 wakaba 1.2 $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class" data-ja-hash="$hash"$real_hash/;
184 wakaba 1.1 } elsif ($tag =~ /\bclass=/) {
185 wakaba 1.2 $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class" data-ja-hash="$hash"$real_hash/g;
186 wakaba 1.1 } else {
187 wakaba 1.2 $tag =~ s/>/ class="$new_class" data-ja-hash="$hash"$real_hash>/;
188 wakaba 1.1 }
189    
190     return $tag;
191     } # add_class
192    
193     sub escape_id ($$) {
194     my $content = shift;
195     my $id_prefix = shift; # should not contain bare & and bare "
196    
197     $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
198     my $tag_content = $1;
199    
200     if ($tag_content =~ /\bid="/) {
201     $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
202     } elsif ($tag_content =~ /\bid=/) {
203     $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
204     }
205    
206     # if ($tag_content =~ /\bhref=#/) {
207     # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
208     # }
209    
210     '<' . $tag_content . '>';
211     }ge;
212    
213     return $content;
214     } # escape_id
215 wakaba 1.6
216     ## Author: Wakaba <w@suika.fam.cx>.
217     ## License: Copyright 2008 Wakaba. You are granted a license to use,
218     ## reproduce and create derivative works of this script.
219 wakaba 1.8 ## $Date: 2008/10/31 06:22:59 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24