#!/usr/bin/perl use strict; my $source_file_name = shift; my $data_dir_name = q[data/]; my $data_suffix = q[.dat]; my $result_file_name = shift; sub normalize ($) { my $s = shift; $s =~ s/\s+/ /g; $s =~ s/^ //; $s =~ s/ $//g; return $s; } # normalize sub create_pattern1 ($) { my $s = quotemeta shift; $s =~ s/\\\*/(.+)/g; return $s; } # create_pattern1 sub replace_pattern2 ($@) { my $s = shift; my @arg = @_; $s =~ s/\$(\d+)/$arg[$1 - 1]/g; return $s; } # replace_pattern2 my %data; my %pattern; { opendir my $data_dir, $data_dir_name or die "$0: $data_dir_name: $!"; local $/ = undef; for (readdir $data_dir) { next if /^\./; # hidden files my $data_file_name = qq[$data_dir_name$_]; next unless $data_file_name =~ /\Q$data_suffix\E$/; warn "$data_file_name...\n"; open my $data_file, '<:utf8', $data_file_name or die "$0: $data_file_name: $!"; for (split /\n\n+(?=#)/, scalar <$data_file>) { my ($en, $ja) = split /\n#ja\n/, $_; if ($en =~ s/^#en\n//) { $data{normalize ($en)} = $ja; } elsif ($en =~ s/^#pattern\n//) { $pattern{create_pattern1 (normalize ($en))} = $ja; } } } } my @pattern = sort {length $b <=> length $a} keys %pattern; my $source_text; { warn "$source_file_name...\n"; open my $source_file, '<:utf8', $source_file_name or die "$0: $source_file_name: $!"; local $/ = undef; $source_text = <$source_file>; } warn "Generating...\n"; $source_text =~ s{(<(?>p(?>re)?|li|d[td]|t[dh]|h[1-6])(?>\s[^>]*)?>)((?>(?!?(?>p(?>re)?|li|d(?>[tdl]|iv)|t[dh]|h[1-6]|ul|ol)(?>\s[^>]*)?>).)+)} { my ($tag, $text) = ($1, $2); my $n_text = normalize ($text); my $ja_text = $data{$n_text}; if (defined $ja_text) { $tag . q[] . $ja_text . q[]; } else { my $v = $tag . $text; for my $pattern (@pattern) { if ($n_text =~ /^$pattern$/) { $v = $tag . q[] . replace_pattern2 ($pattern{$pattern}, $1, $2, $3, $4, $5) . q[]; last; } } $v; } }ges; $source_text =~ s{(<(?>link|img|script)\s[^>]+>)}{ my $tag = $1; my $n_text = normalize ($tag); my $ja_text = $data{$n_text}; if (defined $ja_text) { $ja_text; } else { $tag; } }ges; $source_text =~ s{\[\[([A-Z ]+):([^]]+)\]\]} {$2}gs; #$source_text =~ s[