#!/usr/bin/perl
use strict;
BEGIN { require 'common.pl' }
my $source_file_name = shift;
my $result_file_name = shift;
my $status_file_name = shift;
my %data;
my %pattern;
my $all = get_all_entries ();
for my $key (keys %$all) {
for (keys %{$all->{$key}->{exact} or {}}) {
my $entry = $all->{$key}->{exact}->{$_};
$data{normalize ($entry->{en})} = [$entry->{ja}, $_];
}
for (keys %{$all->{$key}->{pattern} or {}}) {
my $entry = $all->{$key}->{pattern}->{$_};
$pattern{create_pattern1 (normalize ($entry->{en}))}
= [$entry->{ja}, $_];
}
}
my @pattern = sort {length $b <=> length $a} keys %pattern;
my $source_text;
{
warn "$source_file_name...\n";
open my $source_file, '<:utf8', $source_file_name
or die "$0: $source_file_name: $!";
local $/ = undef;
$source_text = <$source_file>;
}
my $part = 'cover';
my $status = {};
my $all_status = {};
my $tbt_added = {};
warn "Generating...\n";
$source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6]|caption)(?>\s[^>]*)?>)((?>(?!?(?>p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|b(?>ody|lockquote)|foot)|h[1-6r]|ul|ol|caption|section)(?>\s[^>]*)?>).)+)}
{
my ($tag, $text) = ($1, $2);
my $prefix = '';
if ($text =~ s#^([^<>]+)##) {
$prefix = $1;
}
my $suffix = '';
my $suffix2 = '';
if ($text =~ s#().)+)$##s) {
$suffix = $1 . '-->';
$suffix2 = ').)+-->\s*)$##s) {
$suffix = $1 . $suffix;
}
my $n_text = normalize ($text);
if ($tag =~ /^
{all}++;
$all_status->{$part}->{all}++;
if (defined $ja_text) {
$status->{ja}++;
$all_status->{$part}->{ja}++;
my $jt = $ja_text->[0];
$jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
{$2}gs;
add_class ($tag, 'has-ja-translation', $ja_text->[1]) .
q[] .
$prefix . escape_id ($text, 'en-') . $suffix .
q[] .
q[] .
$prefix . $jt . $suffix .
q[] .
$suffix2;
} else {
my $v;
for my $pattern (@pattern) {
if ($n_text =~ /^$pattern$/) {
$status->{ja}++;
$all_status->{$part}->{ja}++;
my $jt = replace_pattern2 ($pattern{$pattern}->[0],
$1, $2, $3, $4, $5, $6, $7, $8, $9);
$jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]}
{$2}gs;
my $real_hash = get_hash ($n_text);
$v = add_class ($tag, 'has-ja-translation',
$pattern{$pattern}->[1], $real_hash) .
q[] .
$prefix . escape_id ($text, 'en-') . $suffix .
q[] .
q[] .
$prefix . $jt . $suffix .
q[] .
$suffix2;
unless ($tbt_added->{$n_text}) {
set_fallback_entry ($real_hash => {en => $text});
$tbt_added->{$n_text} = 1;
}
last;
}
}
unless (defined $v) {
my $hash = get_hash ($n_text);
$v = add_class ($tag, 'no-ja-translation', $hash) .
'' .
$prefix . $text . $suffix .
'' .
$suffix2;
unless ($tbt_added->{$n_text}) {
set_fallback_entry ($hash => {en => $text});
$tbt_added->{$n_text} = 1;
}
}
$v;
}
} else {
$1 . $2;
}
}ges;
$source_text =~ s{(<((?>link|img|script))\s[^>]+>)}{
my $tag = $1;
my $tag_name = $2;
my $n_text = normalize ($tag);
my $ja_text = $data{$n_text};
if (defined $ja_text) {
$ja_text->[0];
} else {
unless ($tbt_added->{$n_text}) {
set_fallback_entry (scalar (get_hash ($n_text)),
{en => $tag, tags => [$tag_name . '-tag']});
$tbt_added->{$n_text} = 1;
}
$tag;
}
}ges;
#$source_text =~ s[][];
{
warn "$result_file_name...\n";
open my $result_file, '>:utf8', $result_file_name
or die "$0: $result_file_name: $!";
print $result_file $source_text;
}
{
my $time = time;
my @item = ($time, $status->{ja}, $status->{all});
for my $part (qw(cover introduction infrastructure dom semantics browsers
editing comms syntax rendering no)) {
push @item, $all_status->{$part}->{ja};
push @item, $all_status->{$part}->{all};
}
open my $status_file, '>>', $status_file_name
or die "$0: $status_file_name: $!";
print $status_file join "\t", @item;
print $status_file "\n";
close $status_file;
}
save_fallback_entries ();
sub add_class ($$$;$) {
my $tag = shift;
my $new_class = shift; # should not contain bare & and bare "
my $hash = shift;
my $real_hash = shift;
$real_hash = qq[ data-ja-real-hash="$real_hash"] if defined $real_hash;
if ($tag =~ /^];
} elsif ($tag =~ /\bclass="/) {
$tag =~ s/\bclass="([^"]*)"/class="$1 $new_class" data-ja-hash="$hash"$real_hash/;
} elsif ($tag =~ /\bclass=/) {
$tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class" data-ja-hash="$hash"$real_hash/g;
} else {
$tag =~ s/>/ class="$new_class" data-ja-hash="$hash"$real_hash>/;
}
return $tag;
} # add_class
sub escape_id ($$) {
my $content = shift;
my $id_prefix = shift; # should not contain bare & and bare "
$content =~ s{<([a-zA-Z0-9-][^<>]+)>}{
my $tag_content = $1;
if ($tag_content =~ /\bid="/) {
$tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/;
} elsif ($tag_content =~ /\bid=/) {
$tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/;
}
# if ($tag_content =~ /\bhref=#/) {
# $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/;
# }
'<' . $tag_content . '>';
}ge;
return $content;
} # escape_id
## Author: Wakaba .
## License: Copyright 2008 Wakaba. You are granted a license to use,
## reproduce and create derivative works of this script.
## $Date: 2008/11/02 06:31:30 $