html5/spec-ja/make.pl

#!/usr/bin/perl
use strict;

BEGIN { require 'common.pl' }

my $source_file_name = shift;
my $result_file_name = shift;
my $status_file_name = shift;
my $tbt_file_name = shift;

my %data;
my %pattern;
for_each_data_file (sub ($) {
  my $data_file_name = shift;
  warn "$data_file_name...\n";
  load_data_file ($data_file_name, \%data, \%pattern);
});
my @pattern = sort {length $b <=> length $a} keys %pattern;

my $source_text;
{
  warn "$source_file_name...\n";
  open my $source_file, '<:utf8', $source_file_name
      or die "$0: $source_file_name: $!";
  local $/ = undef;
  $source_text = <$source_file>;
}

open my $tbt_file, '>:utf8', $tbt_file_name or die "$0: $tbt_file_name: $!";

my $status = {};
my $tbt_added = {};

warn "Generating...\n";
$source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!</?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
{
  my ($tag, $text) = ($1, $2);
  my $n_text = normalize ($text);
  
  if (length $n_text) {
    my $ja_text = $data{$n_text};
    
    $status->{all}++;
    
    if (defined $ja_text) {
      $status->{ja}++;
      $tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
    } else {
      my $v = $tag . $text;
      my $has_ja;
      for my $pattern (@pattern) {
        if ($n_text =~ /^$pattern$/) {
          $status->{ja}++;
          $v = $tag . q[<span class=ja-translation lang=ja>] .
              replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
              q[</span>];
          $has_ja = 1;
          last;
        }
      }

      unless ($has_ja) {
        $text =~ s/^\s+//;
        $text =~ s/\s+\z//;
        $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
        unless ($tbt_added->{$text}) {
          print $tbt_file $text;
          print $tbt_file "\n\n";
          $tbt_added->{$text} = 1;
        }
      }

      $v;
    }
  } else {
    $1 . $2;
  }
}ges;
$source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
  my $tag = $1;
  my $n_text = normalize ($tag);
  my $ja_text = $data{$n_text};
  if (defined $ja_text) {
    $ja_text;
  } else {
    $tag;
  }
}ges;

$source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
{<em class=rfc2119 title="$1">$2</em>}gs;

#$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];

{
  warn "$result_file_name...\n";
  open my $result_file, '>:utf8', $result_file_name
      or die "$0: $result_file_name: $!";
  print $result_file $source_text;
}

{
  my $time = time;
  open my $status_file, '>>', $status_file_name
      or die "$0: $status_file_name: $!";
  print $status_file "$time\t$status->{ja}\t$status->{all}\n";
}
1	#!/usr/bin/perl
2	use strict;
3
4	BEGIN { require 'common.pl' }
5
6	my $source_file_name = shift;
7	my $result_file_name = shift;
8	my $status_file_name = shift;
9	my $tbt_file_name = shift;
10
11	my %data;
12	my %pattern;
13	for_each_data_file (sub ($) {
14	my $data_file_name = shift;
15	warn "$data_file_name...\n";
16	load_data_file ($data_file_name, \%data, \%pattern);
17	});
18	my @pattern = sort {length $b <=> length $a} keys %pattern;
19
20	my $source_text;
21	{
22	warn "$source_file_name...\n";
23	open my $source_file, '<:utf8', $source_file_name
24	or die "$0: $source_file_name: $!";
25	local $/ = undef;
26	$source_text = <$source_file>;
27	}
28
29	open my $tbt_file, '>:utf8', $tbt_file_name or die "$0: $tbt_file_name: $!";
30
31	my $status = {};
32	my $tbt_added = {};
33
34	warn "Generating...\n";
35	$source_text =~ s{(<(?>p(?>re)?\|li\|d[td]\|t[dh]\|h[1-6])(?>\s[^>])?>)((?>(?!</?(?>p(?>re)?\|li\|d(?>[tdl]\|iv)\|t(?>[dr]\|h(?>ead)?\|able\|body\|foot)\|h[1-6r]\|ul\|ol)(?>\s[^>])?>).)+)}
36	{
37	my ($tag, $text) = ($1, $2);
38	my $n_text = normalize ($text);
39
40	if (length $n_text) {
41	my $ja_text = $data{$n_text};
42
43	$status->{all}++;
44
45	if (defined $ja_text) {
46	$status->{ja}++;
47	$tag . q[<span class=ja-translation lang=ja>] . $ja_text . q[</span>];
48	} else {
49	my $v = $tag . $text;
50	my $has_ja;
51	for my $pattern (@pattern) {
52	if ($n_text =~ /^$pattern$/) {
53	$status->{ja}++;
54	$v = $tag . q[<span class=ja-translation lang=ja>] .
55	replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
56	q[</span>];
57	$has_ja = 1;
58	last;
59	}
60	}
61
62	unless ($has_ja) {
63	$text =~ s/^\s+//;
64	$text =~ s/\s+\z//;
65	$text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
66	unless ($tbt_added->{$text}) {
67	print $tbt_file $text;
68	print $tbt_file "\n\n";
69	$tbt_added->{$text} = 1;
70	}
71	}
72
73	$v;
74	}
75	} else {
76	$1 . $2;
77	}
78	}ges;
79	$source_text =~ s{(<(?>link\|img\|script)\s[^>]+>)}{
80	my $tag = $1;
81	my $n_text = normalize ($tag);
82	my $ja_text = $data{$n_text};
83	if (defined $ja_text) {
84	$ja_text;
85	} else {
86	$tag;
87	}
88	}ges;
89
90	$source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
91	{<em class=rfc2119 title="$1">$2</em>}gs;
92
93	#$source_text =~ s[<title>][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>];
94
95	{
96	warn "$result_file_name...\n";
97	open my $result_file, '>:utf8', $result_file_name
98	or die "$0: $result_file_name: $!";
99	print $result_file $source_text;
100	}
101
102	{
103	my $time = time;
104	open my $status_file, '>>', $status_file_name
105	or die "$0: $status_file_name: $!";
106	print $status_file "$time\t$status->{ja}\t$status->{all}\n";
107	}