#!/usr/bin/perl use strict; BEGIN { require 'common.pl' } my $source_file_name = shift; my $result_file_name = shift; my $status_file_name = shift; my $tbt_file_name_stem = shift; my %data; my %pattern; for_each_data_file (sub ($) { my $data_file_name = shift; warn "$data_file_name...\n"; load_data_file ($data_file_name, \%data, \%pattern); }); my @pattern = sort {length $b <=> length $a} keys %pattern; my $source_text; { warn "$source_file_name...\n"; open my $source_file, '<:utf8', $source_file_name or die "$0: $source_file_name: $!"; local $/ = undef; $source_text = <$source_file>; } my $part = 'cover'; my $status = {}; my $all_status = {}; my $tbt_added = {}; open my $tbt_file, '>:utf8', $tbt_file_name_stem . '.dat' or die "$0: $tbt_file_name_stem.dat: $!"; open my $part_tbt_file, '>:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or die "$0: $tbt_file_name_stem-$part.dat: $!"; warn "Generating...\n"; $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!p(?>re)?|li|d(?>[tdl]|iv)|t(?>[dr]|h(?>ead)?|able|body|foot)|h[1-6r]|ul|ol)(?>\s[^>]*)?>).)+)} { my ($tag, $text) = ($1, $2); my $n_text = normalize ($text); if ($tag =~ /^

:utf8', $tbt_file_name_stem . '-' . $part . '.dat' or die "$0: $tbt_file_name_stem-$part.dat: $!"; } if (length $n_text) { my $ja_text = $data{$n_text}; $status->{all}++; $all_status->{$part}->{all}++; if (defined $ja_text) { $status->{ja}++; $all_status->{$part}->{ja}++; $tag . q[] . $ja_text . q[]; } else { my $v = $tag . $text; my $has_ja; for my $pattern (@pattern) { if ($n_text =~ /^$pattern$/) { $status->{ja}++; $v = $tag . q[] . replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) . q[]; $has_ja = 1; last; } } unless ($has_ja) { $text =~ s/^\s+//; $text =~ s/\s+\z//; $text =~ s/\x0D?\x0A(?:\x0D?\x0A)+/\n/g; unless ($tbt_added->{$text}) { print $tbt_file ($text); print $tbt_file ("\n\n"); print $part_tbt_file ($text); print $part_tbt_file ("\n\n"); $tbt_added->{$text} = 1; } } $v; } } else { $1 . $2; } }ges; $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{ my $tag = $1; my $n_text = normalize ($tag); my $ja_text = $data{$n_text}; if (defined $ja_text) { $ja_text; } else { $tag; } }ges; $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]} {$2}gs; #$source_text =~ s[][<base href="http://www.whatwg.org/specs/web-apps/current-work/"><title>]; { warn "$result_file_name...\n"; open my $result_file, '>:utf8', $result_file_name or die "$0: $result_file_name: $!"; print $result_file $source_text; } { my $time = time; my @item = ($time, $status->{ja}, $status->{all}); for my $part (qw(cover introduction infrastructure dom semantics browsers editing comms syntax rendering no)) { push @item, $all_status->{$part}->{ja}; push @item, $all_status->{$part}->{all}; } open my $status_file, '>>', $status_file_name or die "$0: $status_file_name: $!"; print $status_file join "\t", @item; print $status_file "\n"; close $status_file; }