/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Contents of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Sun Jul 20 11:17:24 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +76 -16 lines
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.3 use utf8;
4     use CGI::Carp qw/fatalsToBrowser/;
5 wakaba 1.1
6     BEGIN { require 'common.pl' }
7    
8     require Encode;
9    
10 wakaba 1.3 my $max_result = 100;
11 wakaba 1.1
12     sub decode_url ($) {
13     my $s = shift;
14     $s =~ tr/+/ /;
15     $s =~ s/%([0-9A-Fa-f]{2})/pack 'C', hex $1/ge;
16     return Encode::decode ('utf-8', $s);
17     } # decode_url
18    
19     sub htescape ($) {
20     my $s = shift;
21     $s =~ s/&/&/g;
22     $s =~ s/</&lt;/g;
23     $s =~ s/>/&gt;/g;
24     $s =~ s/"/&quot;/g;
25     return $s;
26     } # htescape
27    
28     my $param = {};
29     for (split /[&;]/, $ENV{QUERY_STRING} || '') {
30     my ($name, $value) = split /=/, $_, 2;
31     $param->{decode_url ($name)} = decode_url ($value);
32     }
33    
34 wakaba 1.3 my $eword = htescape $param->{word};
35    
36 wakaba 1.1 my $pattern = quotemeta normalize $param->{word};
37 wakaba 1.3 unless ($param->{cs}) {
38     $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
39     }
40     my $suffix_pattern = {
41     ku => qr/(?>[かこきいっくけ])/,
42     su => qr/(?>[さそしすせ])/,
43     tsu => qr/(?>[たとちっつて])/,
44     nu => qr/(?>[なのにんぬね])/,
45     mu => qr/(?>[まもみんむめ])/,
46     ru => qr/(?>[らろりっるれ])/,
47     u => qr/(?>[わおいっうえ])/,
48     gu => qr/(?>[がごぎいぐげ])/,
49     bu => qr/(?>[ばぼびんぶべ])/,
50     ichidan => qr/(?>[るれろよ])?/,
51     kuru => qr/(?>[るれい])?/,
52     suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
53     i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
54     da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
55     dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
56     }->{$param->{suffix}} || qr//;
57     $pattern =~ s/$suffix_pattern$//;
58     $pattern .= $suffix_pattern;
59     $pattern = '\b' . $pattern . '\b' if $param->{aw};
60 wakaba 1.1
61     print qq[Content-Type: text/html ; charset=utf-8
62    
63     <!DOCTYPE HTML>
64     <html lang=en>
65     <head>
66     <title>Search result for "$eword"</title>
67     <link rel=stylesheet href="/www/style/html/xhtml">
68     <style>
69 wakaba 1.3 td {
70     vertical-align: top;
71     }
72 wakaba 1.1 mark {
73     background-color: yellow;
74     }
75     .tag {
76     color: maroon;
77     }
78     .ref {
79     color: orange;
80     }
81     .rfc2119 {
82     font-weight: bolder;
83     }
84     .pattern-star {
85     font-style: normal;
86     color: gray;
87     text-decoration: none;
88     border-style: none;
89     }
90     .pattern-var {
91     font-style: italic;
92     color: gray;
93     text-decoration: none;
94     border-style: none;
95     }
96 wakaba 1.3 input[type=text] {
97     width: 60%;
98     }
99 wakaba 1.1 </style>
100     </head>
101     <body>
102     <h1>Search result for "$eword"</h1>];
103    
104 wakaba 1.2 print_input_form ();
105    
106 wakaba 1.3 unless ('' =~ /$pattern/) {
107     print q[<article>];
108     my $has_match;
109     for_each_data_file (sub ($) {
110     my $data_file_name = shift;
111     load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});
112     $pattern_data = unescape_patterns ($pattern_data);
113     $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);
114     });
115    
116     unless ($has_match) {
117     print q[<p>No match found.</article>];
118     } else {
119     print q[</article>];
120     print_input_form ();
121     }
122 wakaba 1.1 }
123    
124     sub unescape_patterns ($) {
125     my $pattern_data = shift;
126     my $new_data = {};
127     for (keys %$pattern_data) {
128     my $w = $_;
129     my $v = $_;
130     $v =~ s/\(\.\+\)/*/g;
131     $v =~ s/\\([\s\S])/$1/g;
132     $new_data->{$v} = $pattern_data->{$w};
133     }
134     return $new_data;
135     } # unescape_patterns
136    
137     sub print_matches ($$$) {
138     my ($file_name, $exact_data, $pattern_data) = @_;
139    
140     my $file_id = $file_name;
141     if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {
142     $file_id = $1;
143     }
144    
145     my $has_match;
146     my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];
147     my $result = 0;
148     my $added = {};
149     my %en2ja = (%$exact_data, %$pattern_data);
150     for (sort {$a cmp $b} keys %en2ja) {
151     if (/$pattern/) {
152     $has_match = 1;
153     $added->{$_} = 1;
154     $r .= get_match_text ($_, $en2ja{$_}, $pattern);
155     $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
156     }
157     }
158     $result = 0;
159     my %ja2en = reverse %en2ja;
160     for (sort {$a cmp $b} keys %ja2en) {
161     if (/$pattern/) {
162     next if $added->{$ja2en{$_}};
163     $has_match = 1;
164     $r .= get_match_text ($ja2en{$_}, $_, $pattern);
165     $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
166     }
167     }
168     $r .= q[</table>];
169     print $r if $has_match;
170     return $has_match;
171     } # print_matches
172    
173     sub get_match_text ($$) {
174     my ($en, $ja) = @_;
175    
176     ## NOTE: Marking will not work well if it contains &, <, >, or ", or
177     ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
178    
179     my $r = q[<tr><td lang=en>];
180     my $v = htescape ($en);
181     $v =~ s[($pattern)][<mark>$1</mark>]g;
182     $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
183     $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
184     $v =~ s[\*][<var class=pattern-star>*</var>]g;
185     $r .= $v;
186     $r .= q[<td lang=ja>];
187     my $v = htescape ($ja);
188     $v =~ s[($pattern)][<mark>$1</mark>]g;
189     $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
190     $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
191     $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
192     $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
193     $r .= $v;
194    
195     return $r;
196     } # get_match_text
197 wakaba 1.2
198     sub print_input_form () {
199     print qq[<section><form action=find accept-charset=utf-8 method=get>];
200     print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
201 wakaba 1.3 print qq[<select name=suffix>];
202     for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
203     print qq[<option value="@{[htescape $_]}"];
204     print qq[ selected] if $param->{suffix} eq $_;
205     print qq[>];
206     print htescape ({
207     none => 'No suffix',
208     ku => 'Ka-gyou Godan (-ku)',
209     su => 'Sa-gyou Godan (-su)',
210     tsu => 'Ta-gyou Godan (-tsu)',
211     nu => 'Na-gyou Godan (-nu)',
212     mu => 'Ma-gyou Godan (-mu)',
213     ru => 'Ra-gyou Godan (-ru)',
214     u => 'Wa/a-gyou Godan (-u)',
215     gu => 'Ga-gyou Godan (-gu)',
216     bu => 'Ba-gyou Godan (-bu)',
217     ichidan => 'Ichidan (-iru, -eru)',
218     suru => 'Sahen (-suru)',
219     kuru => 'Kahen (kuru)',
220     i => 'Keiyoushi (-i)',
221     da => 'Keiyou-doushi (-da)',
222     dasuru => 'Kei-dou-sahen (-da, -suru)',
223     }->{$_} || $_);
224     }
225     print qq[</select>];
226     print qq[<input type=submit value=Submit>];
227 wakaba 1.2 print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
228     <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>];
229     print qq[</form></section>];
230     } # print_input_form

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24