/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Contents of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (show annotations) (download)
Mon Oct 27 04:52:39 2008 UTC (16 years ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.6: +102 -67 lines
Find script revised for new data format

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4 use CGI::Carp qw/fatalsToBrowser/;
5
6 BEGIN { require 'common.pl' }
7
8 require Encode;
9
10 my $max_result = 100;
11
12 sub decode_url ($) {
13 my $s = shift;
14 $s =~ tr/+/ /;
15 $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge;
16 return Encode::decode ('utf-8', $s);
17 } # decode_url
18
19 sub encode_url ($) {
20 my $s = Encode::encode ('utf-8', shift);
21 $s =~ s/([^0-9A-Za-z_~.-])/sprintf '%%%02X', ord $1/g;
22 return $s;
23 } # encode_url
24
25 sub htescape ($) {
26 my $s = shift;
27 $s =~ s/&/&/g;
28 $s =~ s/</&lt;/g;
29 $s =~ s/>/&gt;/g;
30 $s =~ s/"/&quot;/g;
31 return $s;
32 } # htescape
33
34 my $param = {};
35 for (split /[&;]/, $ENV{QUERY_STRING} || '') {
36 my ($name, $value) = split /=/, $_, 2;
37 $param->{decode_url ($name)} = decode_url ($value);
38 }
39
40 my $suffix_patterns = {
41 ku => qr/(?>[かこきいっくけ])/,
42 su => qr/(?>[さそしすせ])/,
43 tsu => qr/(?>[たとちっつて])/,
44 nu => qr/(?>[なのにんぬね])/,
45 mu => qr/(?>[まもみんむめ])/,
46 ru => qr/(?>[らろりっるれ])/,
47 u => qr/(?>[わおいっうえ])/,
48 gu => qr/(?>[がごぎいぐげ])/,
49 bu => qr/(?>[ばぼびんぶべ])/,
50 ichidan => qr/(?>[るれろよ])?/,
51 kuru => qr/(?>[るれい])?/,
52 suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
53 i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
54 da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
55 dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
56 };
57
58 my $eword = htescape $param->{word};
59
60 my @pattern;
61 my %tag;
62 for (split /\s+/, $param->{word}) {
63 if (s/^tag://) {
64 $tag{$_} = 1;
65 } else {
66 my $pattern = quotemeta normalize $_;
67 $pattern =~ s/\\-/[- ]/g;
68 unless ($param->{cs}) {
69 $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
70 }
71 my $suffix_pattern = $suffix_patterns->{$param->{suffix}} || qr//;
72 $pattern =~ s/$suffix_pattern$//;
73 $pattern .= $suffix_pattern;
74 $pattern = '\b' . $pattern . '\b' if $param->{aw};
75 push @pattern, $pattern;
76 }
77 }
78
79 $|=1;
80 print qq[Content-Type: text/html ; charset=utf-8
81
82 <!DOCTYPE HTML>
83 <html lang=en>
84 <head>
85 <title>Search result for "$eword"</title>
86 <link rel=stylesheet href="/www/style/html/xhtml">
87 <link rel=stylesheet href="ja-style">
88 <style>
89 td {
90 vertical-align: top;
91 }
92 mark {
93 background-color: yellow;
94 }
95 .tag {
96 color: maroon;
97 }
98 .ref {
99 color: orange;
100 }
101 .rfc2119 {
102 font-weight: bolder;
103 }
104 .pattern-star {
105 font-style: normal;
106 color: gray;
107 text-decoration: none;
108 border-style: none;
109 }
110 .pattern-var {
111 font-style: italic;
112 color: gray;
113 text-decoration: none;
114 border-style: none;
115 }
116 input[type=text] {
117 width: 60%;
118 }
119 </style>
120 <script src=ja-script async defer></script>
121 </head>
122 <body>
123 <h1>Search result for "$eword"</h1>];
124
125 print_input_form ();
126
127 unless (check_match ({en => '', ja => '', tags => []})) {
128 print q[<article>];
129 my $has_match;
130 for_each_entry_set (sub ($) {
131 my (undef, $entries) = @_;
132 $has_match |= print_matches ($entries);
133 }, 1);
134
135 if ($param->{fb}) {
136 $has_match |= print_matches ({exact => get_fallback_entries ()});
137 }
138
139 unless ($has_match) {
140 print q[<p>No match found.</article>];
141 } else {
142 print q[</table></article>];
143 print_input_form ();
144 }
145 }
146
147 exit;
148
149 sub check_match ($) {
150 my ($entry) = @_;
151
152 for my $tag (keys %tag) {
153 F: {
154 for (@{$entry->{tags} or []}) {
155 last F if $_ eq $tag;
156 }
157 return 0;
158 } # F
159 }
160
161 for my $pattern (@pattern) {
162 if ($entry->{en} =~ /$pattern/ or
163 $entry->{ja} =~ /$pattern/) {
164 #
165 } else {
166 return 0;
167 }
168 }
169
170 return 1;
171 } # check_match
172
173 sub print_matches ($) {
174 my ($entries) = @_;
175
176 use feature 'state';
177 state $result //= 0;
178 my $has_match;
179
180 my %en2ja = (%{$entries->{exact} or {}}, %{$entries->{pattern} or {}});
181 for (keys %en2ja) {
182 if (check_match ($en2ja{$_})) {
183 $has_match = 1;
184 print '<table>' unless $result;
185 print ''. get_match_text ($_, $en2ja{$_});
186 ++$result;
187 # print q[<tr><td colspan=2>...] and last if ++$result == $max_result;
188 }
189 }
190 return $has_match;
191 } # print_matches
192
193 sub get_match_text ($$) {
194 my $hash = shift;
195 my $entry = shift;
196
197 ## NOTE: Marking will not work well if it contains &, <, >, or ", or
198 ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
199
200 my $r = q[<tr data-ja-hash="] . htescape ($hash) . q["><td lang=en>];
201 my $v = htescape ($entry->{en});
202 for my $pattern (@pattern) {
203 $v =~ s[($pattern)][<mark>$1</mark>]g;
204 }
205 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
206 $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
207 $v =~ s[\*][<var class=pattern-star>*</var>]g;
208 $r .= $v;
209
210 $r .= q[<td lang=ja>];
211 my $v = htescape ($entry->{ja});
212 for my $pattern (@pattern) {
213 $v =~ s[($pattern)][<mark>$1</mark>]g;
214 }
215 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
216 $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
217 $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
218 $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
219 $r .= $v;
220
221 $r .= q[<td lang>];
222 $r .= join ' ', map {
223 ($tag{$_} ? '<mark>' : '') .
224 '<a href="find?word=tag:' . encode_url ($_) . '">' .
225 htescape ($_) .
226 '</a>' .
227 ($tag{$_} ? '</mark>' : '')
228 } @{$entry->{tags} or []};
229
230 return $r;
231 } # get_match_text
232
233 sub print_input_form () {
234 print qq[<section><form action=find accept-charset=utf-8 method=get>];
235 print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
236 print qq[<select name=suffix>];
237 for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
238 print qq[<option value="@{[htescape $_]}"];
239 print qq[ selected] if $param->{suffix} eq $_;
240 print qq[>];
241 print htescape ({
242 none => 'No suffix',
243 ku => 'Ka-gyou Godan (-ku)',
244 su => 'Sa-gyou Godan (-su)',
245 tsu => 'Ta-gyou Godan (-tsu)',
246 nu => 'Na-gyou Godan (-nu)',
247 mu => 'Ma-gyou Godan (-mu)',
248 ru => 'Ra-gyou Godan (-ru)',
249 u => 'Wa/a-gyou Godan (-u)',
250 gu => 'Ga-gyou Godan (-gu)',
251 bu => 'Ba-gyou Godan (-bu)',
252 ichidan => 'Ichidan (-iru, -eru)',
253 suru => 'Sahen (-suru)',
254 kuru => 'Kahen (kuru)',
255 i => 'Keiyoushi (-i)',
256 da => 'Keiyou-doushi (-da)',
257 dasuru => 'Kei-dou-sahen (-da, -suru)',
258 }->{$_} || $_);
259 }
260 print qq[</select>];
261 print qq[<input type=submit value=Submit>];
262 print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
263 <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>
264 <label><input type=checkbox name=fb @{[$param->{fb} ? 'checked' : '']}> Show non-translated entries</label>];
265 print qq[</form></section>];
266 } # print_input_form

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24