#!/usr/bin/perl use strict; BEGIN { require 'common.pl' } my $source_file_name = shift; my $result_file_name = shift; my $status_file_name = shift; my %data; my %pattern; my $all = get_all_entries (); for my $key (keys %$all) { for (keys %{$all->{$key}->{exact} or {}}) { my $entry = $all->{$key}->{exact}->{$_}; $data{normalize ($entry->{en})} = [$entry->{ja}, $_]; } for (keys %{$all->{$key}->{pattern} or {}}) { my $entry = $all->{$key}->{pattern}->{$_}; $pattern{create_pattern1 (normalize ($entry->{en}))} = [$entry->{ja}, $_]; } } my @pattern = sort {length $b <=> length $a} keys %pattern; my $source_text; { warn "$source_file_name...\n"; open my $source_file, '<:utf8', $source_file_name or die "$0: $source_file_name: $!"; local $/ = undef; $source_text = <$source_file>; } my $part = 'cover'; my $status = {}; my $all_status = {}; my $tbt_added = {}; warn "Generating...\n"; $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6]|caption)(?>\s[^>]*)?>)((?>(?!p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|b(?>ody|lockquote)|foot)|h[1-6r]|ul|ol|caption|section)(?>\s[^>]*)?>).)+)} { my ($tag, $text) = ($1, $2); my $prefix = ''; if ($text =~ s#^([^<>]+)##) { $prefix = $1; } my $suffix = ''; my $suffix2 = ''; if ($text =~ s#().)+)$##s) { $suffix = $1 . '-->'; $suffix2 = ').)+-->\s*)$##s) { $suffix = $1 . $suffix; } my $n_text = normalize ($text); if ($tag =~ /^

{all}++; $all_status->{$part}->{all}++; if (defined $ja_text) { $status->{ja}++; $all_status->{$part}->{ja}++; my $jt = $ja_text->[0]; $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]} {$2}gs; add_class ($tag, 'has-ja-translation', $ja_text->[1]) . q[] . $prefix . escape_id ($text, 'en-') . $suffix . q[] . q[] . $prefix . $jt . $suffix . q[] . $suffix2; } else { my $v; for my $pattern (@pattern) { if ($n_text =~ /^$pattern$/) { $status->{ja}++; $all_status->{$part}->{ja}++; my $jt = replace_pattern2 ($pattern{$pattern}->[0], $1, $2, $3, $4, $5, $6, $7, $8, $9); $jt =~ s{\[\[([A-Z ]+):([^]]+)\]\]} {$2}gs; my $real_hash = get_hash ($n_text); $v = add_class ($tag, 'has-ja-translation', $pattern{$pattern}->[1], $real_hash) . q[] . $prefix . escape_id ($text, 'en-') . $suffix . q[] . q[] . $prefix . $jt . $suffix . q[] . $suffix2; unless ($tbt_added->{$n_text}) { set_fallback_entry ($real_hash => {en => $text}); $tbt_added->{$n_text} = 1; } last; } } unless (defined $v) { my $hash = get_hash ($n_text); $v = add_class ($tag, 'no-ja-translation', $hash) . '' . $prefix . $text . $suffix . '' . $suffix2; unless ($tbt_added->{$n_text}) { set_fallback_entry ($hash => {en => $text}); $tbt_added->{$n_text} = 1; } } $v; } } else { $1 . $2; } }ges; $source_text =~ s{(<((?>link|img|script))\s[^>]+>)}{ my $tag = $1; my $tag_name = $2; my $n_text = normalize ($tag); my $ja_text = $data{$n_text}; if (defined $ja_text) { $ja_text->[0]; } else { unless ($tbt_added->{$n_text}) { set_fallback_entry (scalar (get_hash ($n_text)), {en => $tag, tags => [$tag_name . '-tag']}); $tbt_added->{$n_text} = 1; } $tag; } }ges; #$source_text =~ s[][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>]; { warn "$result_file_name...\n"; open my $result_file, '>:utf8', $result_file_name or die "$0: $result_file_name: $!"; print $result_file $source_text; } { my $time = time; my @item = ($time, $status->{ja}, $status->{all}); for my $part (qw(cover introduction infrastructure dom semantics browsers editing comms syntax rendering no)) { push @item, $all_status->{$part}->{ja}; push @item, $all_status->{$part}->{all}; } open my $status_file, '>>', $status_file_name or die "$0: $status_file_name: $!"; print $status_file join "\t", @item; print $status_file "\n"; close $status_file; } save_fallback_entries (); sub add_class ($$$;$) { my $tag = shift; my $new_class = shift; # should not contain bare & and bare " my $hash = shift; my $real_hash = shift; $real_hash = qq[ data-ja-real-hash="$real_hash"] if defined $real_hash; if ($tag =~ /^<li\b/) { ## NOTE: This |p| wrapper is necessary, otherwise, if |li| element ## is set to |display: table|, then no list marker is shown. $tag .= qq[<p class="$new_class ja-translation-inserted" data-ja-hash="$hash"$real_hash>]; } elsif ($tag =~ /\bclass="/) { $tag =~ s/\bclass="([^"]*)"/class="$1 $new_class" data-ja-hash="$hash"$real_hash/; } elsif ($tag =~ /\bclass=/) { $tag =~ s/\bclass=([^\s>]+)/class="$1 $new_class" data-ja-hash="$hash"$real_hash/g; } else { $tag =~ s/>/ class="$new_class" data-ja-hash="$hash"$real_hash>/; } return $tag; } # add_class sub escape_id ($$) { my $content = shift; my $id_prefix = shift; # should not contain bare & and bare " $content =~ s{<([a-zA-Z0-9-][^<>]+)>}{ my $tag_content = $1; if ($tag_content =~ /\bid="/) { $tag_content =~ s/\bid="([^"]*)"/id="$id_prefix$1"/; } elsif ($tag_content =~ /\bid=/) { $tag_content =~ s/\bid=(\S+)/id="$id_prefix$1"/; } # if ($tag_content =~ /\bhref=#/) { # $tag_content =~ s/\bhref=#(\S+)/href=#$id_prefix$1/; # } '<' . $tag_content . '>'; }ge; return $content; } # escape_id ## Author: Wakaba <w@suika.fam.cx>. ## License: Copyright 2008 Wakaba. You are granted a license to use, ## reproduce and create derivative works of this script. ## $Date: 2008/11/02 06:31:30 $