#!/usr/bin/perl use strict; BEGIN { require 'common.pl' } my $source_file_name = shift; my $result_file_name = shift; my $status_file_name = shift; my $tbt_file_name_stem = shift; my %data; my %pattern; for_each_data_file (sub ($) { my $data_file_name = shift; warn "$data_file_name...\n"; load_data_file ($data_file_name, \%data, \%pattern); }); my @pattern = sort {length $b <=> length $a} keys %pattern; my $source_text; { warn "$source_file_name...\n"; open my $source_file, '<:utf8', $source_file_name or die "$0: $source_file_name: $!"; local $/ = undef; $source_text = <$source_file>; } my $part = 'cover'; my $status = {}; my $all_status = {}; my $tbt_added = {}; open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or die "$0: $tbt_file_name_stem.dat: $!"; open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or die "$0: $tbt_file_name_stem-$part.dat: $!"; warn "Generating...\n"; $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)} { my ($tag, $text) = ($1, $2); my $n_text = normalize ($text); if ($tag =~ /^

:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or die "$0: $tbt_file_name_stem-$part.dat: $!"; } if (length $n_text) { my $ja_text = $data{$n_text}; $status->{all}++; $all_status->{$part}->{all}++; if (defined $ja_text) { $status->{ja}++; $all_status->{$part}->{ja}++; add_class ($tag, 'has-ja-translation') . q[] . escape_id ($text, 'en-') . q[] . q[] . $ja_text . q[]; } else { my $v; for my $pattern (@pattern) { if ($n_text =~ /^$pattern$/) { $status->{ja}++; $all_status->{$part}->{ja}++; $v = add_class ($tag, 'has-ja-translation') . q[] . escape_id ($text, 'en-') . q[] . q[] . replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) . q[]; last; } } unless (defined $v) { $v = add_class ($tag, 'no-ja-translation') . $text; $text =~ s/^\s+//; $text =~ s/\s+\z//; $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g; unless ($tbt_added->{$text}) { print $tbt_file ($text); print $tbt_file ("\n\n"); print $part_tbt_file ($text); print $part_tbt_file ("\n\n"); $tbt_added->{$text} = 1; } } $v; } } else { $1 . $2; } }ges; $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{ my $tag = $1; my $n_text = normalize ($tag); my $ja_text = $data{$n_text}; if (defined $ja_text) { $ja_text; } else { $tag; } }ges; $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]} {$2}gs; #$source_text =~ s[][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>]; { warn "$result_file_name...\n"; open my $result_file, '>:utf8', $result_file_name or die "$0: $result_file_name: $!"; print $result_file $source_text; } { my $time = time; my @item = ($time, $status->{ja}, $status->{all}); for my $part (qw(cover introduction infrastructure dom semantics browsers editing comms syntax rendering no)) { push @item, $all_status->{$part}->{ja}; push @item, $all_status->{$part}->{all}; } open my $status_file, '>>', $status_file_name or die "$0: $status_file_name: $!"; print $status_file join "\t", @item; print $status_file "\n"; close $status_file; } sub add_class ($$) { my $tag = shift; my $new_class = shift; # should not contain bare & and bare " if ($tag =~ /^<li\b/) { ## NOTE: This |p| wrapper is necessary, otherwise, if |li| element ## is set to |display: table|, then no list marker is shown. $tag .= qq[<p class="$new_class ja-translation-inserted">]; } elsif ($tag =~ /\bclass="/) { $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class"/; } elsif ($tag =~ /\bclass=/) { $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class"/g; } else { $tag =~ s/>/ class="$new_class">/; } return $tag; } # add_class sub escape_id ($$) { my $content = shift; my $id_prefix = shift; # should not contain bare & and bare " $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{ my $tag_content = $1; if ($tag_content =~ /\bid="/) { $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/; } elsif ($tag_content =~ /\bid=/) { $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/; } # if ($tag_content =~ /\bhref=#/) { # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/; # } '<' . $tag_content . '>'; }ge; return $content; } # escape_id