/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Contents of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download)
Wed Aug 13 10:04:07 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: before-new-ids, after-new-ids
Changes since 1.5: +1 -1 lines
revert previous changes

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.3 use utf8;
4     use CGI::Carp qw/fatalsToBrowser/;
5 wakaba 1.1
6     BEGIN { require 'common.pl' }
7    
8     require Encode;
9    
10 wakaba 1.3 my $max_result = 100;
11 wakaba 1.1
12     sub decode_url ($) {
13     my $s = shift;
14     $s =~ tr/+/ /;
15     $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge;
16     return Encode::decode ('utf-8', $s);
17     } # decode_url
18    
19     sub htescape ($) {
20     my $s = shift;
21     $s =~ s/&/&/g;
22     $s =~ s/</&lt;/g;
23     $s =~ s/>/&gt;/g;
24     $s =~ s/"/&quot;/g;
25     return $s;
26     } # htescape
27    
28     my $param = {};
29     for (split /[&;]/, $ENV{QUERY_STRING} || '') {
30     my ($name, $value) = split /=/, $_, 2;
31     $param->{decode_url ($name)} = decode_url ($value);
32     }
33    
34 wakaba 1.3 my $eword = htescape $param->{word};
35    
36 wakaba 1.1 my $pattern = quotemeta normalize $param->{word};
37 wakaba 1.4 $pattern =~ s/\\-/[- ]/g;
38 wakaba 1.3 unless ($param->{cs}) {
39     $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
40     }
41     my $suffix_pattern = {
42     ku => qr/(?>[かこきいっくけ])/,
43     su => qr/(?>[さそしすせ])/,
44     tsu => qr/(?>[たとちっつて])/,
45     nu => qr/(?>[なのにんぬね])/,
46     mu => qr/(?>[まもみんむめ])/,
47     ru => qr/(?>[らろりっるれ])/,
48     u => qr/(?>[わおいっうえ])/,
49     gu => qr/(?>[がごぎいぐげ])/,
50     bu => qr/(?>[ばぼびんぶべ])/,
51     ichidan => qr/(?>[るれろよ])?/,
52     kuru => qr/(?>[るれい])?/,
53     suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
54     i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
55     da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
56     dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
57     }->{$param->{suffix}} || qr//;
58     $pattern =~ s/$suffix_pattern$//;
59     $pattern .= $suffix_pattern;
60     $pattern = '\b' . $pattern . '\b' if $param->{aw};
61 wakaba 1.1
62     print qq[Content-Type: text/html ; charset=utf-8
63    
64     <!DOCTYPE HTML>
65     <html lang=en>
66     <head>
67     <title>Search result for "$eword"</title>
68     <link rel=stylesheet href="/www/style/html/xhtml">
69     <style>
70 wakaba 1.3 td {
71     vertical-align: top;
72     }
73 wakaba 1.1 mark {
74     background-color: yellow;
75     }
76     .tag {
77     color: maroon;
78     }
79     .ref {
80     color: orange;
81     }
82     .rfc2119 {
83     font-weight: bolder;
84     }
85     .pattern-star {
86     font-style: normal;
87     color: gray;
88     text-decoration: none;
89     border-style: none;
90     }
91     .pattern-var {
92     font-style: italic;
93     color: gray;
94     text-decoration: none;
95     border-style: none;
96     }
97 wakaba 1.3 input[type=text] {
98     width: 60%;
99     }
100 wakaba 1.1 </style>
101     </head>
102     <body>
103     <h1>Search result for "$eword"</h1>];
104    
105 wakaba 1.2 print_input_form ();
106    
107 wakaba 1.3 unless ('' =~ /$pattern/) {
108     print q[<article>];
109     my $has_match;
110     for_each_data_file (sub ($) {
111     my $data_file_name = shift;
112 wakaba 1.6 load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});
113 wakaba 1.3 $pattern_data = unescape_patterns ($pattern_data);
114     $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);
115     });
116    
117     unless ($has_match) {
118     print q[<p>No match found.</article>];
119     } else {
120     print q[</article>];
121     print_input_form ();
122     }
123 wakaba 1.1 }
124    
125     sub unescape_patterns ($) {
126     my $pattern_data = shift;
127     my $new_data = {};
128     for (keys %$pattern_data) {
129     my $w = $_;
130     my $v = $_;
131     $v =~ s/\(\.\+\)/*/g;
132     $v =~ s/\\([\s\S])/$1/g;
133     $new_data->{$v} = $pattern_data->{$w};
134     }
135     return $new_data;
136     } # unescape_patterns
137    
138     sub print_matches ($$$) {
139     my ($file_name, $exact_data, $pattern_data) = @_;
140    
141     my $file_id = $file_name;
142     if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {
143     $file_id = $1;
144     }
145    
146     my $has_match;
147     my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];
148     my $result = 0;
149     my $added = {};
150     my %en2ja = (%$exact_data, %$pattern_data);
151     for (sort {$a cmp $b} keys %en2ja) {
152     if (/$pattern/) {
153     $has_match = 1;
154     $added->{$_} = 1;
155     $r .= get_match_text ($_, $en2ja{$_}, $pattern);
156     $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
157     }
158     }
159     $result = 0;
160     my %ja2en = reverse %en2ja;
161     for (sort {$a cmp $b} keys %ja2en) {
162     if (/$pattern/) {
163     next if $added->{$ja2en{$_}};
164     $has_match = 1;
165     $r .= get_match_text ($ja2en{$_}, $_, $pattern);
166     $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
167     }
168     }
169     $r .= q[</table>];
170     print $r if $has_match;
171     return $has_match;
172     } # print_matches
173    
174     sub get_match_text ($$) {
175     my ($en, $ja) = @_;
176    
177     ## NOTE: Marking will not work well if it contains &, <, >, or ", or
178     ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
179    
180     my $r = q[<tr><td lang=en>];
181     my $v = htescape ($en);
182     $v =~ s[($pattern)][<mark>$1</mark>]g;
183     $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
184     $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
185     $v =~ s[\*][<var class=pattern-star>*</var>]g;
186     $r .= $v;
187     $r .= q[<td lang=ja>];
188     my $v = htescape ($ja);
189     $v =~ s[($pattern)][<mark>$1</mark>]g;
190     $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
191     $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
192     $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
193     $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
194     $r .= $v;
195    
196     return $r;
197     } # get_match_text
198 wakaba 1.2
199     sub print_input_form () {
200     print qq[<section><form action=find accept-charset=utf-8 method=get>];
201     print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
202 wakaba 1.3 print qq[<select name=suffix>];
203     for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
204     print qq[<option value="@{[htescape $_]}"];
205     print qq[ selected] if $param->{suffix} eq $_;
206     print qq[>];
207     print htescape ({
208     none => 'No suffix',
209     ku => 'Ka-gyou Godan (-ku)',
210     su => 'Sa-gyou Godan (-su)',
211     tsu => 'Ta-gyou Godan (-tsu)',
212     nu => 'Na-gyou Godan (-nu)',
213     mu => 'Ma-gyou Godan (-mu)',
214     ru => 'Ra-gyou Godan (-ru)',
215     u => 'Wa/a-gyou Godan (-u)',
216     gu => 'Ga-gyou Godan (-gu)',
217     bu => 'Ba-gyou Godan (-bu)',
218     ichidan => 'Ichidan (-iru, -eru)',
219     suru => 'Sahen (-suru)',
220     kuru => 'Kahen (kuru)',
221     i => 'Keiyoushi (-i)',
222     da => 'Keiyou-doushi (-da)',
223     dasuru => 'Kei-dou-sahen (-da, -suru)',
224     }->{$_} || $_);
225     }
226     print qq[</select>];
227     print qq[<input type=submit value=Submit>];
228 wakaba 1.2 print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
229     <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>];
230     print qq[</form></section>];
231     } # print_input_form

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24