/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Contents of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download)
Mon Oct 27 04:52:39 2008 UTC (16 years ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.6: +102 -67 lines
Find script revised for new data format

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.3 use utf8;
4     use CGI::Carp qw/fatalsToBrowser/;
5 wakaba 1.1
6     BEGIN { require 'common.pl' }
7    
8     require Encode;
9    
10 wakaba 1.3 my $max_result = 100;
11 wakaba 1.1
12     sub decode_url ($) {
13     my $s = shift;
14     $s =~ tr/+/ /;
15     $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge;
16     return Encode::decode ('utf-8', $s);
17     } # decode_url
18    
19 wakaba 1.7 sub encode_url ($) {
20     my $s = Encode::encode ('utf-8', shift);
21     $s =~ s/([^0-9A-Za-z_~.-])/sprintf '%%%02X', ord $1/g;
22     return $s;
23     } # encode_url
24    
25 wakaba 1.1 sub htescape ($) {
26     my $s = shift;
27     $s =~ s/&/&/g;
28     $s =~ s/</&lt;/g;
29     $s =~ s/>/&gt;/g;
30     $s =~ s/"/&quot;/g;
31     return $s;
32     } # htescape
33    
34     my $param = {};
35     for (split /[&;]/, $ENV{QUERY_STRING} || '') {
36     my ($name, $value) = split /=/, $_, 2;
37     $param->{decode_url ($name)} = decode_url ($value);
38     }
39    
40 wakaba 1.7 my $suffix_patterns = {
41 wakaba 1.3 ku => qr/(?>[かこきいっくけ])/,
42     su => qr/(?>[さそしすせ])/,
43     tsu => qr/(?>[たとちっつて])/,
44     nu => qr/(?>[なのにんぬね])/,
45     mu => qr/(?>[まもみんむめ])/,
46     ru => qr/(?>[らろりっるれ])/,
47     u => qr/(?>[わおいっうえ])/,
48     gu => qr/(?>[がごぎいぐげ])/,
49     bu => qr/(?>[ばぼびんぶべ])/,
50     ichidan => qr/(?>[るれろよ])?/,
51     kuru => qr/(?>[るれい])?/,
52     suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
53     i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
54     da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
55     dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
56 wakaba 1.7 };
57    
58     my $eword = htescape $param->{word};
59 wakaba 1.1
60 wakaba 1.7 my @pattern;
61     my %tag;
62     for (split /\s+/, $param->{word}) {
63     if (s/^tag://) {
64     $tag{$_} = 1;
65     } else {
66     my $pattern = quotemeta normalize $_;
67     $pattern =~ s/\\-/[- ]/g;
68     unless ($param->{cs}) {
69     $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
70     }
71     my $suffix_pattern = $suffix_patterns->{$param->{suffix}} || qr//;
72     $pattern =~ s/$suffix_pattern$//;
73     $pattern .= $suffix_pattern;
74     $pattern = '\b' . $pattern . '\b' if $param->{aw};
75     push @pattern, $pattern;
76     }
77     }
78    
79     $|=1;
80 wakaba 1.1 print qq[Content-Type: text/html ; charset=utf-8
81    
82     <!DOCTYPE HTML>
83     <html lang=en>
84     <head>
85     <title>Search result for "$eword"</title>
86     <link rel=stylesheet href="/www/style/html/xhtml">
87 wakaba 1.7 <link rel=stylesheet href="ja-style">
88 wakaba 1.1 <style>
89 wakaba 1.3 td {
90     vertical-align: top;
91     }
92 wakaba 1.1 mark {
93     background-color: yellow;
94     }
95     .tag {
96     color: maroon;
97     }
98     .ref {
99     color: orange;
100     }
101     .rfc2119 {
102     font-weight: bolder;
103     }
104     .pattern-star {
105     font-style: normal;
106     color: gray;
107     text-decoration: none;
108     border-style: none;
109     }
110     .pattern-var {
111     font-style: italic;
112     color: gray;
113     text-decoration: none;
114     border-style: none;
115     }
116 wakaba 1.3 input[type=text] {
117     width: 60%;
118     }
119 wakaba 1.1 </style>
120 wakaba 1.7 <script src=ja-script async defer></script>
121 wakaba 1.1 </head>
122     <body>
123     <h1>Search result for "$eword"</h1>];
124    
125 wakaba 1.2 print_input_form ();
126    
127 wakaba 1.7 unless (check_match ({en => '', ja => '', tags => []})) {
128 wakaba 1.3 print q[<article>];
129     my $has_match;
130 wakaba 1.7 for_each_entry_set (sub ($) {
131     my (undef, $entries) = @_;
132     $has_match |= print_matches ($entries);
133     }, 1);
134    
135     if ($param->{fb}) {
136     $has_match |= print_matches ({exact => get_fallback_entries ()});
137     }
138 wakaba 1.3
139     unless ($has_match) {
140     print q[<p>No match found.</article>];
141     } else {
142 wakaba 1.7 print q[</table></article>];
143 wakaba 1.3 print_input_form ();
144     }
145 wakaba 1.1 }
146    
147 wakaba 1.7 exit;
148    
149     sub check_match ($) {
150     my ($entry) = @_;
151    
152     for my $tag (keys %tag) {
153     F: {
154     for (@{$entry->{tags} or []}) {
155     last F if $_ eq $tag;
156     }
157     return 0;
158     } # F
159     }
160    
161     for my $pattern (@pattern) {
162     if ($entry->{en} =~ /$pattern/ or
163     $entry->{ja} =~ /$pattern/) {
164     #
165     } else {
166     return 0;
167     }
168 wakaba 1.1 }
169    
170 wakaba 1.7 return 1;
171     } # check_match
172    
173     sub print_matches ($) {
174     my ($entries) = @_;
175    
176     use feature 'state';
177     state $result //= 0;
178 wakaba 1.1 my $has_match;
179 wakaba 1.7
180     my %en2ja = (%{$entries->{exact} or {}}, %{$entries->{pattern} or {}});
181     for (keys %en2ja) {
182     if (check_match ($en2ja{$_})) {
183 wakaba 1.1 $has_match = 1;
184 wakaba 1.7 print '<table>' unless $result;
185     print ''. get_match_text ($_, $en2ja{$_});
186     ++$result;
187     # print q[<tr><td colspan=2>...] and last if ++$result == $max_result;
188 wakaba 1.1 }
189     }
190     return $has_match;
191     } # print_matches
192    
193     sub get_match_text ($$) {
194 wakaba 1.7 my $hash = shift;
195     my $entry = shift;
196 wakaba 1.1
197     ## NOTE: Marking will not work well if it contains &, <, >, or ", or
198     ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
199    
200 wakaba 1.7 my $r = q[<tr data-ja-hash="] . htescape ($hash) . q["><td lang=en>];
201     my $v = htescape ($entry->{en});
202     for my $pattern (@pattern) {
203     $v =~ s[($pattern)][<mark>$1</mark>]g;
204     }
205 wakaba 1.1 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
206     $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
207     $v =~ s[\*][<var class=pattern-star>*</var>]g;
208     $r .= $v;
209 wakaba 1.7
210 wakaba 1.1 $r .= q[<td lang=ja>];
211 wakaba 1.7 my $v = htescape ($entry->{ja});
212     for my $pattern (@pattern) {
213     $v =~ s[($pattern)][<mark>$1</mark>]g;
214     }
215 wakaba 1.1 $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
216     $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
217     $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
218     $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
219     $r .= $v;
220    
221 wakaba 1.7 $r .= q[<td lang>];
222     $r .= join ' ', map {
223     ($tag{$_} ? '<mark>' : '') .
224     '<a href="find?word=tag:' . encode_url ($_) . '">' .
225     htescape ($_) .
226     '</a>' .
227     ($tag{$_} ? '</mark>' : '')
228     } @{$entry->{tags} or []};
229    
230 wakaba 1.1 return $r;
231     } # get_match_text
232 wakaba 1.2
233     sub print_input_form () {
234     print qq[<section><form action=find accept-charset=utf-8 method=get>];
235     print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
236 wakaba 1.3 print qq[<select name=suffix>];
237     for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
238     print qq[<option value="@{[htescape $_]}"];
239     print qq[ selected] if $param->{suffix} eq $_;
240     print qq[>];
241     print htescape ({
242     none => 'No suffix',
243     ku => 'Ka-gyou Godan (-ku)',
244     su => 'Sa-gyou Godan (-su)',
245     tsu => 'Ta-gyou Godan (-tsu)',
246     nu => 'Na-gyou Godan (-nu)',
247     mu => 'Ma-gyou Godan (-mu)',
248     ru => 'Ra-gyou Godan (-ru)',
249     u => 'Wa/a-gyou Godan (-u)',
250     gu => 'Ga-gyou Godan (-gu)',
251     bu => 'Ba-gyou Godan (-bu)',
252     ichidan => 'Ichidan (-iru, -eru)',
253     suru => 'Sahen (-suru)',
254     kuru => 'Kahen (kuru)',
255     i => 'Keiyoushi (-i)',
256     da => 'Keiyou-doushi (-da)',
257     dasuru => 'Kei-dou-sahen (-da, -suru)',
258     }->{$_} || $_);
259     }
260     print qq[</select>];
261     print qq[<input type=submit value=Submit>];
262 wakaba 1.2 print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
263 wakaba 1.7 <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>
264     <label><input type=checkbox name=fb @{[$param->{fb} ? 'checked' : '']}> Show non-translated entries</label>];
265 wakaba 1.2 print qq[</form></section>];
266     } # print_input_form

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24