/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Contents of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (hide annotations) (download)
Sat Oct 25 11:28:27 2008 UTC (17 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.19: +5 -1 lines
File MIME type: text/plain
make

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.11 BEGIN { require 'common.pl' }
5    
6 wakaba 1.6 my $source_file_name = shift;
7     my $result_file_name = shift;
8 wakaba 1.9 my $status_file_name = shift;
9 wakaba 1.15 my $tbt_file_name_stem = shift;
10 wakaba 1.1
11     my %data;
12     my %pattern;
13 wakaba 1.11 for_each_data_file (sub ($) {
14     my $data_file_name = shift;
15     warn "$data_file_name...\n";
16 wakaba 1.17 load_data_file ($data_file_name, \%data, \%pattern);
17 wakaba 1.11 });
18 wakaba 1.2 my @pattern = sort {length $b <=> length $a} keys %pattern;
19 wakaba 1.1
20     my $source_text;
21     {
22     warn "$source_file_name...\n";
23     open my $source_file, '<:utf8', $source_file_name
24     or die "$0: $source_file_name: $!";
25     local $/ = undef;
26     $source_text = <$source_file>;
27     }
28    
29 wakaba 1.15 my $part = 'cover';
30 wakaba 1.9 my $status = {};
31 wakaba 1.15 my $all_status = {};
32 wakaba 1.14 my $tbt_added = {};
33 wakaba 1.9
34 wakaba 1.15 open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
35     die "$0: $tbt_file_name_stem.dat: $!";
36     open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
37     die "$0: $tbt_file_name_stem-$part.dat: $!";
38    
39 wakaba 1.1 warn "Generating...\n";
40 wakaba 1.12 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
41 wakaba 1.1 {
42     my ($tag, $text) = ($1, $2);
43     my $n_text = normalize ($text);
44 wakaba 1.15
45     if ($tag =~ /^<h2 id=(\w+)/) {
46     $part = $1;
47     open $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
48     die "$0: $tbt_file_name_stem-$part.dat: $!";
49     }
50 wakaba 1.10
51     if (length $n_text) {
52     my $ja_text = $data{$n_text};
53    
54     $status->{all}++;
55 wakaba 1.15 $all_status->{$part}->{all}++;
56 wakaba 1.10
57     if (defined $ja_text) {
58     $status->{ja}++;
59 wakaba 1.15 $all_status->{$part}->{ja}++;
60 wakaba 1.19 add_class ($tag, 'has-ja-translation') .
61     q[<span class=en-original lang=en>] . escape_id ($text, 'en-') .
62     q[</span>] .
63     q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
64 wakaba 1.10 } else {
65 wakaba 1.19 my $v;
66 wakaba 1.10 for my $pattern (@pattern) {
67     if ($n_text =~ /^$pattern$/) {
68     $status->{ja}++;
69 wakaba 1.18 $all_status->{$part}->{ja}++;
70 wakaba 1.19 $v = add_class ($tag, 'has-ja-translation') .
71     q[<span class=en-original lang=en>] .
72     escape_id ($text, 'en-') . q[</span>] .
73     q[<span class=ja-translation lang=ja>] .
74 wakaba 1.10 replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
75     q[</span>];
76     last;
77     }
78 wakaba 1.1 }
79 wakaba 1.13
80 wakaba 1.19 unless (defined $v) {
81     $v = add_class ($tag, 'no-ja-translation') . $text;
82    
83 wakaba 1.13 $text =~ s/^\s+//;
84     $text =~ s/\s+\z//;
85     $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
86 wakaba 1.14 unless ($tbt_added->{$text}) {
87 wakaba 1.15 print $tbt_file ($text);
88     print $tbt_file ("\n\n");
89     print $part_tbt_file ($text);
90     print $part_tbt_file ("\n\n");
91 wakaba 1.14 $tbt_added->{$text} = 1;
92     }
93 wakaba 1.13 }
94    
95 wakaba 1.10 $v;
96 wakaba 1.1 }
97 wakaba 1.10 } else {
98     $1 . $2;
99 wakaba 1.1 }
100     }ges;
101     $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
102     my $tag = $1;
103     my $n_text = normalize ($tag);
104     my $ja_text = $data{$n_text};
105     if (defined $ja_text) {
106     $ja_text;
107     } else {
108     $tag;
109     }
110     }ges;
111    
112 wakaba 1.4 $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
113 wakaba 1.1 {<em class=rfc2119 title="$1">$2</em>}gs;
114    
115     #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
116    
117     {
118     warn "$result_file_name...\n";
119     open my $result_file, '>:utf8', $result_file_name
120     or die "$0: $result_file_name: $!";
121     print $result_file $source_text;
122 wakaba 1.9 }
123    
124     {
125     my $time = time;
126 wakaba 1.15 my @item = ($time, $status->{ja}, $status->{all});
127     for my $part (qw(cover introduction infrastructure dom semantics browsers
128     editing comms syntax rendering no)) {
129     push @item, $all_status->{$part}->{ja};
130     push @item, $all_status->{$part}->{all};
131     }
132    
133 wakaba 1.9 open my $status_file, '>>', $status_file_name
134     or die "$0: $status_file_name: $!";
135 wakaba 1.15 print $status_file join "\t", @item;
136     print $status_file "\n";
137     close $status_file;
138 wakaba 1.1 }
139 wakaba 1.19
140     sub add_class ($$) {
141     my $tag = shift;
142     my $new_class = shift; # should not contain bare & and bare "
143    
144 wakaba 1.20 if ($tag =~ /^<li\b/) {
145     ## NOTE: This |p| wrapper is necessary, otherwise, if |li| element
146     ## is set to |display: table|, then no list marker is shown.
147     $tag .= qq[<p class="$new_class ja-translation-inserted">];
148     } elsif ($tag =~ /\bclass="/) {
149 wakaba 1.19 $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class"/;
150     } elsif ($tag =~ /\bclass=/) {
151     $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class"/g;
152     } else {
153     $tag =~ s/>/ class="$new_class">/;
154     }
155    
156     return $tag;
157     } # add_class
158    
159     sub escape_id ($$) {
160     my $content = shift;
161     my $id_prefix = shift; # should not contain bare & and bare "
162    
163     $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
164     my $tag_content = $1;
165    
166     if ($tag_content =~ /\bid="/) {
167     $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
168     } elsif ($tag_content =~ /\bid=/) {
169     $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
170     }
171    
172     # if ($tag_content =~ /\bhref=#/) {
173     # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
174     # }
175    
176     '<' . $tag_content . '>';
177     }ge;
178    
179     return $content;
180     } # escape_id

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24