/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Contents of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Sun Jul 20 11:17:24 2008 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.2: +76 -16 lines
*** empty log message ***

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4 use CGI::Carp qw/fatalsToBrowser/;
5
6 BEGIN { require 'common.pl' }
7
8 require Encode;
9
10 my $max_result = 100;
11
12 sub decode_url ($) {
13 my $s = shift;
14 $s =~ tr/+/ /;
15 $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge;
16 return Encode::decode ('utf-8', $s);
17 } # decode_url
18
19 sub htescape ($) {
20 my $s = shift;
21 $s =~ s/&/&/g;
22 $s =~ s/</&lt;/g;
23 $s =~ s/>/&gt;/g;
24 $s =~ s/"/&quot;/g;
25 return $s;
26 } # htescape
27
28 my $param = {};
29 for (split /[&;]/, $ENV{QUERY_STRING} || '') {
30 my ($name, $value) = split /=/, $_, 2;
31 $param->{decode_url ($name)} = decode_url ($value);
32 }
33
34 my $eword = htescape $param->{word};
35
36 my $pattern = quotemeta normalize $param->{word};
37 unless ($param->{cs}) {
38 $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
39 }
40 my $suffix_pattern = {
41 ku => qr/(?>[かこきいっくけ])/,
42 su => qr/(?>[さそしすせ])/,
43 tsu => qr/(?>[たとちっつて])/,
44 nu => qr/(?>[なのにんぬね])/,
45 mu => qr/(?>[まもみんむめ])/,
46 ru => qr/(?>[らろりっるれ])/,
47 u => qr/(?>[わおいっうえ])/,
48 gu => qr/(?>[がごぎいぐげ])/,
49 bu => qr/(?>[ばぼびんぶべ])/,
50 ichidan => qr/(?>[るれろよ])?/,
51 kuru => qr/(?>[るれい])?/,
52 suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
53 i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
54 da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
55 dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
56 }->{$param->{suffix}} || qr//;
57 $pattern =~ s/$suffix_pattern$//;
58 $pattern .= $suffix_pattern;
59 $pattern = '\b' . $pattern . '\b' if $param->{aw};
60
61 print qq[Content-Type: text/html ; charset=utf-8
62
63 <!DOCTYPE HTML>
64 <html lang=en>
65 <head>
66 <title>Search result for "$eword"</title>
67 <link rel=stylesheet href="/www/style/html/xhtml">
68 <style>
69 td {
70 vertical-align: top;
71 }
72 mark {
73 background-color: yellow;
74 }
75 .tag {
76 color: maroon;
77 }
78 .ref {
79 color: orange;
80 }
81 .rfc2119 {
82 font-weight: bolder;
83 }
84 .pattern-star {
85 font-style: normal;
86 color: gray;
87 text-decoration: none;
88 border-style: none;
89 }
90 .pattern-var {
91 font-style: italic;
92 color: gray;
93 text-decoration: none;
94 border-style: none;
95 }
96 input[type=text] {
97 width: 60%;
98 }
99 </style>
100 </head>
101 <body>
102 <h1>Search result for "$eword"</h1>];
103
104 print_input_form ();
105
106 unless ('' =~ /$pattern/) {
107 print q[<article>];
108 my $has_match;
109 for_each_data_file (sub ($) {
110 my $data_file_name = shift;
111 load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});
112 $pattern_data = unescape_patterns ($pattern_data);
113 $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);
114 });
115
116 unless ($has_match) {
117 print q[<p>No match found.</article>];
118 } else {
119 print q[</article>];
120 print_input_form ();
121 }
122 }
123
124 sub unescape_patterns ($) {
125 my $pattern_data = shift;
126 my $new_data = {};
127 for (keys %$pattern_data) {
128 my $w = $_;
129 my $v = $_;
130 $v =~ s/\(\.\+\)/*/g;
131 $v =~ s/\\([\s\S])/$1/g;
132 $new_data->{$v} = $pattern_data->{$w};
133 }
134 return $new_data;
135 } # unescape_patterns
136
137 sub print_matches ($$$) {
138 my ($file_name, $exact_data, $pattern_data) = @_;
139
140 my $file_id = $file_name;
141 if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {
142 $file_id = $1;
143 }
144
145 my $has_match;
146 my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];
147 my $result = 0;
148 my $added = {};
149 my %en2ja = (%$exact_data, %$pattern_data);
150 for (sort {$a cmp $b} keys %en2ja) {
151 if (/$pattern/) {
152 $has_match = 1;
153 $added->{$_} = 1;
154 $r .= get_match_text ($_, $en2ja{$_}, $pattern);
155 $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
156 }
157 }
158 $result = 0;
159 my %ja2en = reverse %en2ja;
160 for (sort {$a cmp $b} keys %ja2en) {
161 if (/$pattern/) {
162 next if $added->{$ja2en{$_}};
163 $has_match = 1;
164 $r .= get_match_text ($ja2en{$_}, $_, $pattern);
165 $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
166 }
167 }
168 $r .= q[</table>];
169 print $r if $has_match;
170 return $has_match;
171 } # print_matches
172
173 sub get_match_text ($$) {
174 my ($en, $ja) = @_;
175
176 ## NOTE: Marking will not work well if it contains &, <, >, or ", or
177 ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
178
179 my $r = q[<tr><td lang=en>];
180 my $v = htescape ($en);
181 $v =~ s[($pattern)][<mark>$1</mark>]g;
182 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
183 $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
184 $v =~ s[\*][<var class=pattern-star>*</var>]g;
185 $r .= $v;
186 $r .= q[<td lang=ja>];
187 my $v = htescape ($ja);
188 $v =~ s[($pattern)][<mark>$1</mark>]g;
189 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
190 $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
191 $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
192 $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
193 $r .= $v;
194
195 return $r;
196 } # get_match_text
197
198 sub print_input_form () {
199 print qq[<section><form action=find accept-charset=utf-8 method=get>];
200 print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
201 print qq[<select name=suffix>];
202 for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
203 print qq[<option value="@{[htescape $_]}"];
204 print qq[ selected] if $param->{suffix} eq $_;
205 print qq[>];
206 print htescape ({
207 none => 'No suffix',
208 ku => 'Ka-gyou Godan (-ku)',
209 su => 'Sa-gyou Godan (-su)',
210 tsu => 'Ta-gyou Godan (-tsu)',
211 nu => 'Na-gyou Godan (-nu)',
212 mu => 'Ma-gyou Godan (-mu)',
213 ru => 'Ra-gyou Godan (-ru)',
214 u => 'Wa/a-gyou Godan (-u)',
215 gu => 'Ga-gyou Godan (-gu)',
216 bu => 'Ba-gyou Godan (-bu)',
217 ichidan => 'Ichidan (-iru, -eru)',
218 suru => 'Sahen (-suru)',
219 kuru => 'Kahen (kuru)',
220 i => 'Keiyoushi (-i)',
221 da => 'Keiyou-doushi (-da)',
222 dasuru => 'Kei-dou-sahen (-da, -suru)',
223 }->{$_} || $_);
224 }
225 print qq[</select>];
226 print qq[<input type=submit value=Submit>];
227 print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
228 <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>];
229 print qq[</form></section>];
230 } # print_input_form

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24