/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Contents of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.19 - (show annotations) (download)
Sat Oct 25 06:25:57 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.18: +50 -6 lines
File MIME type: text/plain
make

1 #!/usr/bin/perl
2 use strict;
3
4 BEGIN { require 'common.pl' }
5
6 my $source_file_name = shift;
7 my $result_file_name = shift;
8 my $status_file_name = shift;
9 my $tbt_file_name_stem = shift;
10
11 my %data;
12 my %pattern;
13 for_each_data_file (sub ($) {
14 my $data_file_name = shift;
15 warn "$data_file_name...\n";
16 load_data_file ($data_file_name, \%data, \%pattern);
17 });
18 my @pattern = sort {length $b <=> length $a} keys %pattern;
19
20 my $source_text;
21 {
22 warn "$source_file_name...\n";
23 open my $source_file, '<:utf8', $source_file_name
24 or die "$0: $source_file_name: $!";
25 local $/ = undef;
26 $source_text = <$source_file>;
27 }
28
29 my $part = 'cover';
30 my $status = {};
31 my $all_status = {};
32 my $tbt_added = {};
33
34 open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
35 die "$0: $tbt_file_name_stem.dat: $!";
36 open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
37 die "$0: $tbt_file_name_stem-$part.dat: $!";
38
39 warn "Generating...\n";
40 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
41 {
42 my ($tag, $text) = ($1, $2);
43 my $n_text = normalize ($text);
44
45 if ($tag =~ /^<h2 id=(\w+)/) {
46 $part = $1;
47 open $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
48 die "$0: $tbt_file_name_stem-$part.dat: $!";
49 }
50
51 if (length $n_text) {
52 my $ja_text = $data{$n_text};
53
54 $status->{all}++;
55 $all_status->{$part}->{all}++;
56
57 if (defined $ja_text) {
58 $status->{ja}++;
59 $all_status->{$part}->{ja}++;
60 add_class ($tag, 'has-ja-translation') .
61 q[<span class=en-original lang=en>] . escape_id ($text, 'en-') .
62 q[</span>] .
63 q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
64 } else {
65 my $v;
66 for my $pattern (@pattern) {
67 if ($n_text =~ /^$pattern$/) {
68 $status->{ja}++;
69 $all_status->{$part}->{ja}++;
70 $v = add_class ($tag, 'has-ja-translation') .
71 q[<span class=en-original lang=en>] .
72 escape_id ($text, 'en-') . q[</span>] .
73 q[<span class=ja-translation lang=ja>] .
74 replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
75 q[</span>];
76 last;
77 }
78 }
79
80 unless (defined $v) {
81 $v = add_class ($tag, 'no-ja-translation') . $text;
82
83 $text =~ s/^\s+//;
84 $text =~ s/\s+\z//;
85 $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
86 unless ($tbt_added->{$text}) {
87 print $tbt_file ($text);
88 print $tbt_file ("\n\n");
89 print $part_tbt_file ($text);
90 print $part_tbt_file ("\n\n");
91 $tbt_added->{$text} = 1;
92 }
93 }
94
95 $v;
96 }
97 } else {
98 $1 . $2;
99 }
100 }ges;
101 $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
102 my $tag = $1;
103 my $n_text = normalize ($tag);
104 my $ja_text = $data{$n_text};
105 if (defined $ja_text) {
106 $ja_text;
107 } else {
108 $tag;
109 }
110 }ges;
111
112 $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
113 {<em class=rfc2119 title="$1">$2</em>}gs;
114
115 #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
116
117 {
118 warn "$result_file_name...\n";
119 open my $result_file, '>:utf8', $result_file_name
120 or die "$0: $result_file_name: $!";
121 print $result_file $source_text;
122 }
123
124 {
125 my $time = time;
126 my @item = ($time, $status->{ja}, $status->{all});
127 for my $part (qw(cover introduction infrastructure dom semantics browsers
128 editing comms syntax rendering no)) {
129 push @item, $all_status->{$part}->{ja};
130 push @item, $all_status->{$part}->{all};
131 }
132
133 open my $status_file, '>>', $status_file_name
134 or die "$0: $status_file_name: $!";
135 print $status_file join "\t", @item;
136 print $status_file "\n";
137 close $status_file;
138 }
139
140 sub add_class ($$) {
141 my $tag = shift;
142 my $new_class = shift; # should not contain bare & and bare "
143
144 if ($tag =~ /\bclass="/) {
145 $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class"/;
146 } elsif ($tag =~ /\bclass=/) {
147 $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class"/g;
148 } else {
149 $tag =~ s/>/ class="$new_class">/;
150 }
151
152 return $tag;
153 } # add_class
154
155 sub escape_id ($$) {
156 my $content = shift;
157 my $id_prefix = shift; # should not contain bare & and bare "
158
159 $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
160 my $tag_content = $1;
161
162 if ($tag_content =~ /\bid="/) {
163 $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
164 } elsif ($tag_content =~ /\bid=/) {
165 $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
166 }
167
168 # if ($tag_content =~ /\bhref=#/) {
169 # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
170 # }
171
172 '<' . $tag_content . '>';
173 }ge;
174
175 return $content;
176 } # escape_id

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24