/[suikacvs]/markup/html/html5/spec-ja/make.pl
Suika

Contents of /markup/html/html5/spec-ja/make.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download)
Fri Jul 18 19:12:04 2008 UTC (17 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +23 -18 lines
File MIME type: text/plain
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.6 my $source_file_name = shift;
5 wakaba 1.1 my $data_dir_name = q[data/];
6     my $data_suffix = q[.dat];
7 wakaba 1.6 my $result_file_name = shift;
8 wakaba 1.9 my $status_file_name = shift;
9 wakaba 1.1
10     sub normalize ($) {
11     my $s = shift;
12     $s =~ s/\s+/ /g;
13     $s =~ s/^ //;
14     $s =~ s/ $//g;
15     return $s;
16     } # normalize
17    
18     sub create_pattern1 ($) {
19     my $s = quotemeta shift;
20 wakaba 1.3 $s =~ s/\\\*/(.+)/g;
21 wakaba 1.1 return $s;
22     } # create_pattern1
23    
24     sub replace_pattern2 ($@) {
25     my $s = shift;
26     my @arg = @_;
27     $s =~ s/\$(\d+)/$arg[$1 - 1]/g;
28     return $s;
29     } # replace_pattern2
30    
31     my %data;
32     my %pattern;
33     {
34     opendir my $data_dir, $data_dir_name or die "$0: $data_dir_name: $!";
35     local $/ = undef;
36     for (readdir $data_dir) {
37     next if /^\./; # hidden files
38     my $data_file_name = qq[$data_dir_name$_];
39     next unless $data_file_name =~ /\Q$data_suffix\E$/;
40     warn "$data_file_name...\n";
41     open my $data_file, '<:utf8', $data_file_name
42     or die "$0: $data_file_name: $!";
43 wakaba 1.8 my $data = <$data_file>;
44     $data =~ s/\x0D?\x0A/\n/g;
45     for (split /\n\n+(?=#)/, $data) {
46 wakaba 1.1 my ($en, $ja) = split /\n#ja\n/, $_;
47     if ($en =~ s/^#en\n//) {
48     $data{normalize ($en)} = $ja;
49 wakaba 1.7 } elsif ($en =~ s/^#pattern\n//) {
50 wakaba 1.1 $pattern{create_pattern1 (normalize ($en))} = $ja;
51     }
52     }
53     }
54     }
55 wakaba 1.2 my @pattern = sort {length $b <=> length $a} keys %pattern;
56 wakaba 1.1
57     my $source_text;
58     {
59     warn "$source_file_name...\n";
60     open my $source_file, '<:utf8', $source_file_name
61     or die "$0: $source_file_name: $!";
62     local $/ = undef;
63     $source_text = <$source_file>;
64     }
65    
66 wakaba 1.9 my $status = {};
67    
68 wakaba 1.1 warn "Generating...\n";
69 wakaba 1.10 $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t[dh]|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
70 wakaba 1.1 {
71     my ($tag, $text) = ($1, $2);
72     my $n_text = normalize ($text);
73 wakaba 1.10
74     if (length $n_text) {
75     my $ja_text = $data{$n_text};
76    
77     $status->{all}++;
78    
79     if (defined $ja_text) {
80     $status->{ja}++;
81     $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
82     } else {
83     my $v = $tag . $text;
84     for my $pattern (@pattern) {
85     if ($n_text =~ /^$pattern$/) {
86     $status->{ja}++;
87     $v = $tag . q[<span class=ja-translation lang=ja>] .
88     replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
89     q[</span>];
90     last;
91     }
92 wakaba 1.1 }
93 wakaba 1.10 $v;
94 wakaba 1.1 }
95 wakaba 1.10 } else {
96     $1 . $2;
97 wakaba 1.1 }
98     }ges;
99     $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
100     my $tag = $1;
101     my $n_text = normalize ($tag);
102     my $ja_text = $data{$n_text};
103     if (defined $ja_text) {
104     $ja_text;
105     } else {
106     $tag;
107     }
108     }ges;
109    
110 wakaba 1.4 $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
111 wakaba 1.1 {<em class=rfc2119 title="$1">$2</em>}gs;
112    
113     #$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
114    
115     {
116     warn "$result_file_name...\n";
117     open my $result_file, '>:utf8', $result_file_name
118     or die "$0: $result_file_name: $!";
119     print $result_file $source_text;
120 wakaba 1.9 }
121    
122     {
123     my $time = time;
124     open my $status_file, '>>', $status_file_name
125     or die "$0: $status_file_name: $!";
126     print $status_file "$time\t$status->{ja}\t$status->{all}\n";
127 wakaba 1.1 }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24