/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Contents of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations) (download)
Wed Aug 13 10:04:07 2008 UTC (16 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: before-new-ids, after-new-ids
Changes since 1.5: +1 -1 lines
revert previous changes

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4 use CGI::Carp qw/fatalsToBrowser/;
5
6 BEGIN { require 'common.pl' }
7
8 require Encode;
9
10 my $max_result = 100;
11
12 sub decode_url ($) {
13 my $s = shift;
14 $s =~ tr/+/ /;
15 $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge;
16 return Encode::decode ('utf-8', $s);
17 } # decode_url
18
19 sub htescape ($) {
20 my $s = shift;
21 $s =~ s/&/&/g;
22 $s =~ s/</&lt;/g;
23 $s =~ s/>/&gt;/g;
24 $s =~ s/"/&quot;/g;
25 return $s;
26 } # htescape
27
28 my $param = {};
29 for (split /[&;]/, $ENV{QUERY_STRING} || '') {
30 my ($name, $value) = split /=/, $_, 2;
31 $param->{decode_url ($name)} = decode_url ($value);
32 }
33
34 my $eword = htescape $param->{word};
35
36 my $pattern = quotemeta normalize $param->{word};
37 $pattern =~ s/\\-/[- ]/g;
38 unless ($param->{cs}) {
39 $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
40 }
41 my $suffix_pattern = {
42 ku => qr/(?>[かこきいっくけ])/,
43 su => qr/(?>[さそしすせ])/,
44 tsu => qr/(?>[たとちっつて])/,
45 nu => qr/(?>[なのにんぬね])/,
46 mu => qr/(?>[まもみんむめ])/,
47 ru => qr/(?>[らろりっるれ])/,
48 u => qr/(?>[わおいっうえ])/,
49 gu => qr/(?>[がごぎいぐげ])/,
50 bu => qr/(?>[ばぼびんぶべ])/,
51 ichidan => qr/(?>[るれろよ])?/,
52 kuru => qr/(?>[るれい])?/,
53 suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
54 i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
55 da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
56 dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
57 }->{$param->{suffix}} || qr//;
58 $pattern =~ s/$suffix_pattern$//;
59 $pattern .= $suffix_pattern;
60 $pattern = '\b' . $pattern . '\b' if $param->{aw};
61
62 print qq[Content-Type: text/html ; charset=utf-8
63
64 <!DOCTYPE HTML>
65 <html lang=en>
66 <head>
67 <title>Search result for "$eword"</title>
68 <link rel=stylesheet href="/www/style/html/xhtml">
69 <style>
70 td {
71 vertical-align: top;
72 }
73 mark {
74 background-color: yellow;
75 }
76 .tag {
77 color: maroon;
78 }
79 .ref {
80 color: orange;
81 }
82 .rfc2119 {
83 font-weight: bolder;
84 }
85 .pattern-star {
86 font-style: normal;
87 color: gray;
88 text-decoration: none;
89 border-style: none;
90 }
91 .pattern-var {
92 font-style: italic;
93 color: gray;
94 text-decoration: none;
95 border-style: none;
96 }
97 input[type=text] {
98 width: 60%;
99 }
100 </style>
101 </head>
102 <body>
103 <h1>Search result for "$eword"</h1>];
104
105 print_input_form ();
106
107 unless ('' =~ /$pattern/) {
108 print q[<article>];
109 my $has_match;
110 for_each_data_file (sub ($) {
111 my $data_file_name = shift;
112 load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});
113 $pattern_data = unescape_patterns ($pattern_data);
114 $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);
115 });
116
117 unless ($has_match) {
118 print q[<p>No match found.</article>];
119 } else {
120 print q[</article>];
121 print_input_form ();
122 }
123 }
124
125 sub unescape_patterns ($) {
126 my $pattern_data = shift;
127 my $new_data = {};
128 for (keys %$pattern_data) {
129 my $w = $_;
130 my $v = $_;
131 $v =~ s/\(\.\+\)/*/g;
132 $v =~ s/\\([\s\S])/$1/g;
133 $new_data->{$v} = $pattern_data->{$w};
134 }
135 return $new_data;
136 } # unescape_patterns
137
138 sub print_matches ($$$) {
139 my ($file_name, $exact_data, $pattern_data) = @_;
140
141 my $file_id = $file_name;
142 if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {
143 $file_id = $1;
144 }
145
146 my $has_match;
147 my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];
148 my $result = 0;
149 my $added = {};
150 my %en2ja = (%$exact_data, %$pattern_data);
151 for (sort {$a cmp $b} keys %en2ja) {
152 if (/$pattern/) {
153 $has_match = 1;
154 $added->{$_} = 1;
155 $r .= get_match_text ($_, $en2ja{$_}, $pattern);
156 $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
157 }
158 }
159 $result = 0;
160 my %ja2en = reverse %en2ja;
161 for (sort {$a cmp $b} keys %ja2en) {
162 if (/$pattern/) {
163 next if $added->{$ja2en{$_}};
164 $has_match = 1;
165 $r .= get_match_text ($ja2en{$_}, $_, $pattern);
166 $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
167 }
168 }
169 $r .= q[</table>];
170 print $r if $has_match;
171 return $has_match;
172 } # print_matches
173
174 sub get_match_text ($$) {
175 my ($en, $ja) = @_;
176
177 ## NOTE: Marking will not work well if it contains &, <, >, or ", or
178 ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
179
180 my $r = q[<tr><td lang=en>];
181 my $v = htescape ($en);
182 $v =~ s[($pattern)][<mark>$1</mark>]g;
183 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
184 $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
185 $v =~ s[\*][<var class=pattern-star>*</var>]g;
186 $r .= $v;
187 $r .= q[<td lang=ja>];
188 my $v = htescape ($ja);
189 $v =~ s[($pattern)][<mark>$1</mark>]g;
190 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
191 $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
192 $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
193 $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
194 $r .= $v;
195
196 return $r;
197 } # get_match_text
198
199 sub print_input_form () {
200 print qq[<section><form action=find accept-charset=utf-8 method=get>];
201 print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
202 print qq[<select name=suffix>];
203 for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
204 print qq[<option value="@{[htescape $_]}"];
205 print qq[ selected] if $param->{suffix} eq $_;
206 print qq[>];
207 print htescape ({
208 none => 'No suffix',
209 ku => 'Ka-gyou Godan (-ku)',
210 su => 'Sa-gyou Godan (-su)',
211 tsu => 'Ta-gyou Godan (-tsu)',
212 nu => 'Na-gyou Godan (-nu)',
213 mu => 'Ma-gyou Godan (-mu)',
214 ru => 'Ra-gyou Godan (-ru)',
215 u => 'Wa/a-gyou Godan (-u)',
216 gu => 'Ga-gyou Godan (-gu)',
217 bu => 'Ba-gyou Godan (-bu)',
218 ichidan => 'Ichidan (-iru, -eru)',
219 suru => 'Sahen (-suru)',
220 kuru => 'Kahen (kuru)',
221 i => 'Keiyoushi (-i)',
222 da => 'Keiyou-doushi (-da)',
223 dasuru => 'Kei-dou-sahen (-da, -suru)',
224 }->{$_} || $_);
225 }
226 print qq[</select>];
227 print qq[<input type=submit value=Submit>];
228 print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
229 <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>];
230 print qq[</form></section>];
231 } # print_input_form

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24