/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Contents of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (show annotations) (download)
Wed Aug 13 10:00:09 2008 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +36 -1 lines
File MIME type: text/plain
Try to autotrack fragment id change (it's too heavy process, for regexp matches; much worse, it does not work orz)

1 #!/usr/bin/perl
2 use strict;
3
4 BEGIN { require 'common.pl' }
5
6 my $source_file_name = shift;
7 my $result_file_name = shift;
8 my $status_file_name = shift;
9 my $tbt_file_name_stem = shift;
10
11 my %data;
12 my %pattern;
13 my %id_variant_pattern;
14 for_each_data_file (sub ($) {
15 my $data_file_name = shift;
16 warn "$data_file_name...\n";
17 load_data_file ($data_file_name, \%data, \%pattern, \%id_variant_pattern);
18 });
19 my @pattern = sort {length $b <=> length $a} keys %pattern;
20
21 my $source_text;
22 {
23 warn "$source_file_name...\n";
24 open my $source_file, '<:utf8', $source_file_name
25 or die "$0: $source_file_name: $!";
26 local $/ = undef;
27 $source_text = <$source_file>;
28 }
29
30 my $part = 'cover';
31 my $status = {};
32 my $all_status = {};
33 my $tbt_added = {};
34
35 open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
36 die "$0: $tbt_file_name_stem.dat: $!";
37 open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
38 die "$0: $tbt_file_name_stem-$part.dat: $!";
39
40 warn "Generating...\n";
41 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
42 {
43 my ($tag, $text) = ($1, $2);
44 my $n_text = normalize ($text);
45
46 if ($tag =~ /^<h2 id=(\w+)/) {
47 $part = $1;
48 open $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
49 die "$0: $tbt_file_name_stem-$part.dat: $!";
50 }
51
52 if (length $n_text) {
53 my $ja_text = $data{$n_text};
54
55 $status->{all}++;
56 $all_status->{$part}->{all}++;
57
58 if (defined $ja_text) {
59 $status->{ja}++;
60 $all_status->{$part}->{ja}++;
61 $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
62 } else {
63 my $v = $tag . $text;
64 my $has_ja;
65 for my $pattern (@pattern) {
66 if ($n_text =~ /^$pattern$/) {
67 $status->{ja}++;
68 $v = $tag . q[<span class=ja-translation lang=ja>] .
69 replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
70 q[</span>];
71 $has_ja = 1;
72 last;
73 }
74 }
75
76 if ($n_text =~ /\bhref="#/) {
77 for my $pattern (keys %id_variant_pattern) {
78 if ($n_text =~ /^$pattern$/) {
79 $status->{ja}++;
80 my $id_map = {};
81 my $old_ids = $id_variant_pattern{$pattern}->[1];
82 for (0..$#$old_ids) {
83 $id_map->{$old_ids->[$_]}
84 = substr ($n_text, $-[$_ + 1], $+[$_ + 1] - $-[$_ + 1]);
85 }
86 my $w = $id_variant_pattern{$pattern}->[0];
87 for (keys %$id_map) {
88 $w =~ s/\bhref="#([^"]+)"/href="#$id_map->{$1}"/g;
89 }
90
91 $v = $tag . q[<span class=ja-translation lang=ja>] .
92 $w .
93 q[</span>];
94 $has_ja = 1;
95
96 my $o = $id_variant_pattern{$pattern}->[2];
97 for (keys %$id_map) {
98 $o =~ s/\bhref="#([^"]+)"/href="#$id_map->{$1}"/g;
99 }
100 $data{$o} ||= $w;
101 delete $id_variant_pattern{$pattern};
102
103 warn $o;
104
105 last;
106 }
107 }
108 }
109
110 unless ($has_ja) {
111 $text =~ s/^\s+//;
112 $text =~ s/\s+\z//;
113 $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
114 unless ($tbt_added->{$text}) {
115 print $tbt_file ($text);
116 print $tbt_file ("\n\n");
117 print $part_tbt_file ($text);
118 print $part_tbt_file ("\n\n");
119 $tbt_added->{$text} = 1;
120 }
121 }
122
123 $v;
124 }
125 } else {
126 $1 . $2;
127 }
128 }ges;
129 $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
130 my $tag = $1;
131 my $n_text = normalize ($tag);
132 my $ja_text = $data{$n_text};
133 if (defined $ja_text) {
134 $ja_text;
135 } else {
136 $tag;
137 }
138 }ges;
139
140 $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
141 {<em class=rfc2119 title="$1">$2</em>}gs;
142
143 #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
144
145 {
146 warn "$result_file_name...\n";
147 open my $result_file, '>:utf8', $result_file_name
148 or die "$0: $result_file_name: $!";
149 print $result_file $source_text;
150 }
151
152 {
153 my $time = time;
154 my @item = ($time, $status->{ja}, $status->{all});
155 for my $part (qw(cover introduction infrastructure dom semantics browsers
156 editing comms syntax rendering no)) {
157 push @item, $all_status->{$part}->{ja};
158 push @item, $all_status->{$part}->{all};
159 }
160
161 open my $status_file, '>>', $status_file_name
162 or die "$0: $status_file_name: $!";
163 print $status_file join "\t", @item;
164 print $status_file "\n";
165 close $status_file;
166 }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24