/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Contents of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (hide annotations) (download)
Wed Aug 13 10:00:09 2008 UTC (17 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +36 -1 lines
File MIME type: text/plain
Try to autotrack fragment id change (it's too heavy process, for regexp matches; much worse, it does not work orz)

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.11 BEGIN { require 'common.pl' }
5    
6 wakaba 1.6 my $source_file_name = shift;
7     my $result_file_name = shift;
8 wakaba 1.9 my $status_file_name = shift;
9 wakaba 1.15 my $tbt_file_name_stem = shift;
10 wakaba 1.1
11     my %data;
12     my %pattern;
13 wakaba 1.16 my %id_variant_pattern;
14 wakaba 1.11 for_each_data_file (sub ($) {
15     my $data_file_name = shift;
16     warn "$data_file_name...\n";
17 wakaba 1.16 load_data_file ($data_file_name, \%data, \%pattern, \%id_variant_pattern);
18 wakaba 1.11 });
19 wakaba 1.2 my @pattern = sort {length $b <=> length $a} keys %pattern;
20 wakaba 1.1
21     my $source_text;
22     {
23     warn "$source_file_name...\n";
24     open my $source_file, '<:utf8', $source_file_name
25     or die "$0: $source_file_name: $!";
26     local $/ = undef;
27     $source_text = <$source_file>;
28     }
29    
30 wakaba 1.15 my $part = 'cover';
31 wakaba 1.9 my $status = {};
32 wakaba 1.15 my $all_status = {};
33 wakaba 1.14 my $tbt_added = {};
34 wakaba 1.9
35 wakaba 1.15 open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
36     die "$0: $tbt_file_name_stem.dat: $!";
37     open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
38     die "$0: $tbt_file_name_stem-$part.dat: $!";
39    
40 wakaba 1.1 warn "Generating...\n";
41 wakaba 1.12 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
42 wakaba 1.1 {
43     my ($tag, $text) = ($1, $2);
44     my $n_text = normalize ($text);
45 wakaba 1.15
46     if ($tag =~ /^<h2 id=(\w+)/) {
47     $part = $1;
48     open $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
49     die "$0: $tbt_file_name_stem-$part.dat: $!";
50     }
51 wakaba 1.10
52     if (length $n_text) {
53     my $ja_text = $data{$n_text};
54    
55     $status->{all}++;
56 wakaba 1.15 $all_status->{$part}->{all}++;
57 wakaba 1.10
58     if (defined $ja_text) {
59     $status->{ja}++;
60 wakaba 1.15 $all_status->{$part}->{ja}++;
61 wakaba 1.10 $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
62     } else {
63     my $v = $tag . $text;
64 wakaba 1.13 my $has_ja;
65 wakaba 1.10 for my $pattern (@pattern) {
66     if ($n_text =~ /^$pattern$/) {
67     $status->{ja}++;
68     $v = $tag . q[<span class=ja-translation lang=ja>] .
69     replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
70     q[</span>];
71 wakaba 1.13 $has_ja = 1;
72 wakaba 1.10 last;
73 wakaba 1.16 }
74     }
75    
76     if ($n_text =~ /\bhref="#/) {
77     for my $pattern (keys %id_variant_pattern) {
78     if ($n_text =~ /^$pattern$/) {
79     $status->{ja}++;
80     my $id_map = {};
81     my $old_ids = $id_variant_pattern{$pattern}->[1];
82     for (0..$#$old_ids) {
83     $id_map->{$old_ids->[$_]}
84     = substr ($n_text, $-[$_ + 1], $+[$_ + 1] - $-[$_ + 1]);
85     }
86     my $w = $id_variant_pattern{$pattern}->[0];
87     for (keys %$id_map) {
88     $w =~ s/\bhref="#([^"]+)"/href="#$id_map->{$1}"/g;
89     }
90    
91     $v = $tag . q[<span class=ja-translation lang=ja>] .
92     $w .
93     q[</span>];
94     $has_ja = 1;
95    
96     my $o = $id_variant_pattern{$pattern}->[2];
97     for (keys %$id_map) {
98     $o =~ s/\bhref="#([^"]+)"/href="#$id_map->{$1}"/g;
99     }
100     $data{$o} ||= $w;
101     delete $id_variant_pattern{$pattern};
102    
103     warn $o;
104    
105     last;
106     }
107 wakaba 1.10 }
108 wakaba 1.1 }
109 wakaba 1.13
110     unless ($has_ja) {
111     $text =~ s/^\s+//;
112     $text =~ s/\s+\z//;
113     $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
114 wakaba 1.14 unless ($tbt_added->{$text}) {
115 wakaba 1.15 print $tbt_file ($text);
116     print $tbt_file ("\n\n");
117     print $part_tbt_file ($text);
118     print $part_tbt_file ("\n\n");
119 wakaba 1.14 $tbt_added->{$text} = 1;
120     }
121 wakaba 1.13 }
122    
123 wakaba 1.10 $v;
124 wakaba 1.1 }
125 wakaba 1.10 } else {
126     $1 . $2;
127 wakaba 1.1 }
128     }ges;
129     $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
130     my $tag = $1;
131     my $n_text = normalize ($tag);
132     my $ja_text = $data{$n_text};
133     if (defined $ja_text) {
134     $ja_text;
135     } else {
136     $tag;
137     }
138     }ges;
139    
140 wakaba 1.4 $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
141 wakaba 1.1 {<em class=rfc2119 title="$1">$2</em>}gs;
142    
143     #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
144    
145     {
146     warn "$result_file_name...\n";
147     open my $result_file, '>:utf8', $result_file_name
148     or die "$0: $result_file_name: $!";
149     print $result_file $source_text;
150 wakaba 1.9 }
151    
152     {
153     my $time = time;
154 wakaba 1.15 my @item = ($time, $status->{ja}, $status->{all});
155     for my $part (qw(cover introduction infrastructure dom semantics browsers
156     editing comms syntax rendering no)) {
157     push @item, $all_status->{$part}->{ja};
158     push @item, $all_status->{$part}->{all};
159     }
160    
161 wakaba 1.9 open my $status_file, '>>', $status_file_name
162     or die "$0: $status_file_name: $!";
163 wakaba 1.15 print $status_file join "\t", @item;
164     print $status_file "\n";
165     close $status_file;
166 wakaba 1.1 }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24