#!/usr/bin/perl use strict; BEGIN { require 'common.pl' } require Encode; my $max_result = 30; sub decode_url ($) { my $s = shift; $s =~ tr/+/ /; $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge; return Encode::decode ('utf-8', $s); } # decode_url sub htescape ($) { my $s = shift; $s =~ s/&/&/g; $s =~ s//>/g; $s =~ s/"/"/g; return $s; } # htescape my $param = {}; for (split /[&;]/, $ENV{QUERY_STRING} || '') { my ($name, $value) = split /=/, $_, 2; $param->{decode_url ($name)} = decode_url ($value); } my $pattern = quotemeta normalize $param->{word}; my $eword = htescape $param->{word}; print qq[Content-Type: text/html ; charset=utf-8 Search result for "$eword"

Search result for "$eword"

]; print_input_form (); print q[
]; my $has_match; for_each_data_file (sub ($) { my $data_file_name = shift; load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {}); $pattern_data = unescape_patterns ($pattern_data); $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data); }); unless ($has_match) { print q[

No match found.

]; } else { print q[]; print_input_form (); } sub unescape_patterns ($) { my $pattern_data = shift; my $new_data = {}; for (keys %$pattern_data) { my $w = $_; my $v = $_; $v =~ s/\(\.\+\)/*/g; $v =~ s/\\([\s\S])/$1/g; $new_data->{$v} = $pattern_data->{$w}; } return $new_data; } # unescape_patterns sub print_matches ($$$) { my ($file_name, $exact_data, $pattern_data) = @_; my $file_id = $file_name; if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) { $file_id = $1; } my $has_match; my $r = qq[

File "@{[htescape $file_id]}"

]; my $result = 0; my $added = {}; my %en2ja = (%$exact_data, %$pattern_data); for (sort {$a cmp $b} keys %en2ja) { if (/$pattern/) { $has_match = 1; $added->{$_} = 1; $r .= get_match_text ($_, $en2ja{$_}, $pattern); $r .= q[
...] and last if ++$result == $max_result; } } $result = 0; my %ja2en = reverse %en2ja; for (sort {$a cmp $b} keys %ja2en) { if (/$pattern/) { next if $added->{$ja2en{$_}}; $has_match = 1; $r .= get_match_text ($ja2en{$_}, $_, $pattern); $r .= q[
...] and last if ++$result == $max_result; } } $r .= q[
]; print $r if $has_match; return $has_match; } # print_matches sub get_match_text ($$) { my ($en, $ja) = @_; ## NOTE: Marking will not work well if it contains &, <, >, or ", or ## the pattern matches with charrefs, e.g. "t" (part of < and "). my $r = q[]; my $v = htescape ($en); $v =~ s[($pattern)][$1]g; $v =~ s[(<[\s\S]+?>)][$1]g; $v =~ s[(&[#0-9A-Za-z]+;)][$1]g; $v =~ s[\*][*]g; $r .= $v; $r .= q[]; my $v = htescape ($ja); $v =~ s[($pattern)][$1]g; $v =~ s[(<[\s\S]+?>)][$1]g; $v =~ s[(&[#0-9A-Za-z]+;)][$1]g; $v =~ s{(\[\[[^\[\]]+\]\])}[$1]g; $v =~ s[(\$[0-9]+)][$1]g; $r .= $v; return $r; } # get_match_text sub print_input_form () { print qq[
]; print qq[

]; print qq[

]; print qq[

]; } # print_input_form