#!/usr/bin/perl
use strict;
BEGIN { require 'common.pl' }
my $source_file_name = shift;
my $result_file_name = shift;
my $status_file_name = shift;
my $tbt_file_name_stem = shift;
my %data;
my %pattern;
for_each_data_file (sub ($) {
my $data_file_name = shift;
warn "$data_file_name...\n";
load_data_file ($data_file_name, \%data, \%pattern);
});
my @pattern = sort {length $b <=> length $a} keys %pattern;
my $source_text;
{
warn "$source_file_name...\n";
open my $source_file, '<:utf8', $source_file_name
or die "$0: $source_file_name: $!";
local $/ = undef;
$source_text = <$source_file>;
}
my $part = 'cover';
my $status = {};
my $all_status = {};
my $tbt_added = {};
open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or
die "$0: $tbt_file_name_stem.dat: $!";
open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
die "$0: $tbt_file_name_stem-$part.dat: $!";
warn "Generating...\n";
$source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)}
{
my ($tag, $text) = ($1, $2);
my $n_text = normalize ($text);
if ($tag =~ /^
:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or
die "$0: $tbt_file_name_stem-$part.dat: $!";
}
if (length $n_text) {
my $ja_text = $data{$n_text};
$status->{all}++;
$all_status->{$part}->{all}++;
if (defined $ja_text) {
$status->{ja}++;
$all_status->{$part}->{ja}++;
add_class ($tag, 'has-ja-translation') .
q[] . escape_id ($text, 'en-') .
q[] .
q[] . $ja_text . q[];
} else {
my $v;
for my $pattern (@pattern) {
if ($n_text =~ /^$pattern$/) {
$status->{ja}++;
$all_status->{$part}->{ja}++;
$v = add_class ($tag, 'has-ja-translation') .
q[] .
escape_id ($text, 'en-') . q[] .
q[] .
replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) .
q[];
last;
}
}
unless (defined $v) {
$v = add_class ($tag, 'no-ja-translation') . $text;
$text =~ s/^\s+//;
$text =~ s/\s+\z//;
$text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g;
unless ($tbt_added->{$text}) {
print $tbt_file ($text);
print $tbt_file ("\n\n");
print $part_tbt_file ($text);
print $part_tbt_file ("\n\n");
$tbt_added->{$text} = 1;
}
}
$v;
}
} else {
$1 . $2;
}
}ges;
$source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{
my $tag = $1;
my $n_text = normalize ($tag);
my $ja_text = $data{$n_text};
if (defined $ja_text) {
$ja_text;
} else {
$tag;
}
}ges;
$source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
{$2}gs;
#$source_text =~ s[][];
{
warn "$result_file_name...\n";
open my $result_file, '>:utf8', $result_file_name
or die "$0: $result_file_name: $!";
print $result_file $source_text;
}
{
my $time = time;
my @item = ($time, $status->{ja}, $status->{all});
for my $part (qw(cover introduction infrastructure dom semantics browsers
editing comms syntax rendering no)) {
push @item, $all_status->{$part}->{ja};
push @item, $all_status->{$part}->{all};
}
open my $status_file, '>>', $status_file_name
or die "$0: $status_file_name: $!";
print $status_file join "\t", @item;
print $status_file "\n";
close $status_file;
}
sub add_class ($$) {
my $tag = shift;
my $new_class = shift; # should not contain bare & and bare "
if ($tag =~ /^];
} elsif ($tag =~ /\bclass="/) {
$tag =~ s/\bclass="([^"]*)"/class="$1 $new_class"/;
} elsif ($tag =~ /\bclass=/) {
$tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class"/g;
} else {
$tag =~ s/>/ class="$new_class">/;
}
return $tag;
} # add_class
sub escape_id ($$) {
my $content = shift;
my $id_prefix = shift; # should not contain bare & and bare "
$content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
my $tag_content = $1;
if ($tag_content =~ /\bid="/) {
$tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
} elsif ($tag_content =~ /\bid=/) {
$tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
}
# if ($tag_content =~ /\bhref=#/) {
# $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
# }
'<' . $tag_content . '>';
}ge;
return $content;
} # escape_id