/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Diff of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Sun Jul 20 07:45:21 2008 UTC revision 1.7 by wakaba, Mon Oct 27 04:52:39 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl  #!/usr/bin/perl
2  use strict;  use strict;
3    use utf8;
4    use CGI::Carp qw/fatalsToBrowser/;
5    
6  BEGIN { require 'common.pl' }  BEGIN { require 'common.pl' }
7    
8  require Encode;  require Encode;
9    
10  my $max_result = 30;  my $max_result = 100;
11    
12  sub decode_url ($) {  sub decode_url ($) {
13    my $s = shift;    my $s = shift;
# Line 14  sub decode_url ($) { Line 16  sub decode_url ($) {
16    return Encode::decode ('utf-8', $s);    return Encode::decode ('utf-8', $s);
17  } # decode_url  } # decode_url
18    
19    sub encode_url ($) {
20      my $s = Encode::encode ('utf-8', shift);
21      $s =~ s/([^0-9A-Za-z_~.-])/sprintf '%%%02X', ord $1/g;
22      return $s;
23    } # encode_url
24    
25  sub htescape ($) {  sub htescape ($) {
26    my $s = shift;    my $s = shift;
27    $s =~ s/&/&/g;    $s =~ s/&/&/g;
# Line 29  for (split /[&;]/, $ENV{QUERY_STRING} || Line 37  for (split /[&;]/, $ENV{QUERY_STRING} ||
37    $param->{decode_url ($name)} = decode_url ($value);    $param->{decode_url ($name)} = decode_url ($value);
38  }  }
39    
40  my $pattern = quotemeta normalize $param->{word};  my $suffix_patterns = {
41      ku => qr/(?>[かこきいっくけ])/,
42      su => qr/(?>[さそしすせ])/,
43      tsu => qr/(?>[たとちっつて])/,
44      nu => qr/(?>[なのにんぬね])/,
45      mu => qr/(?>[まもみんむめ])/,
46      ru => qr/(?>[らろりっるれ])/,
47      u => qr/(?>[わおいっうえ])/,
48      gu => qr/(?>[がごぎいぐげ])/,
49      bu => qr/(?>[ばぼびんぶべ])/,
50      ichidan => qr/(?>[るれろよ])?/,
51      kuru => qr/(?>[るれい])?/,
52      suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
53      i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
54      da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
55      dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
56    };
57    
58  my $eword = htescape $param->{word};  my $eword = htescape $param->{word};
59    
60    my @pattern;
61    my %tag;
62    for (split /\s+/, $param->{word}) {
63      if (s/^tag://) {
64        $tag{$_} = 1;
65      } else {
66        my $pattern = quotemeta normalize $_;
67        $pattern =~ s/\\-/[- ]/g;
68        unless ($param->{cs}) {
69          $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
70        }
71        my $suffix_pattern = $suffix_patterns->{$param->{suffix}} || qr//;
72        $pattern =~ s/$suffix_pattern$//;
73        $pattern .= $suffix_pattern;
74        $pattern = '\b' . $pattern . '\b' if $param->{aw};
75        push @pattern, $pattern;
76      }
77    }
78    
79    $|=1;
80  print qq[Content-Type: text/html ; charset=utf-8  print qq[Content-Type: text/html ; charset=utf-8
81    
82  <!DOCTYPE HTML>  <!DOCTYPE HTML>
# Line 39  print qq[Content-Type: text/html ; chars Line 84  print qq[Content-Type: text/html ; chars
84  <head>  <head>
85  <title>Search result for "$eword"</title>  <title>Search result for "$eword"</title>
86  <link rel=stylesheet href="/www/style/html/xhtml">  <link rel=stylesheet href="/www/style/html/xhtml">
87    <link rel=stylesheet href="ja-style">
88  <style>  <style>
89    td {
90      vertical-align: top;
91    }
92  mark {  mark {
93    background-color: yellow;    background-color: yellow;
94  }  }
# Line 64  mark { Line 113  mark {
113    text-decoration: none;    text-decoration: none;
114    border-style: none;    border-style: none;
115  }  }
116    input[type=text] {
117      width: 60%;
118    }
119  </style>  </style>
120    <script src=ja-script async defer></script>
121  </head>  </head>
122  <body>  <body>
123  <h1>Search result for "$eword"</h1>];  <h1>Search result for "$eword"</h1>];
124    
125  my $has_match;  print_input_form ();
126  for_each_data_file (sub ($) {  
127    my $data_file_name = shift;  unless (check_match ({en => '', ja => '', tags => []})) {
128    load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});    print q[<article>];
129    $pattern_data = unescape_patterns ($pattern_data);    my $has_match;
130    $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);    for_each_entry_set (sub ($) {
131  });      my (undef, $entries) = @_;
132        $has_match |= print_matches ($entries);
133  unless ($has_match) {    }, 1);
134    print q[<p>No match found.];  
135  }    if ($param->{fb}) {
136        $has_match |= print_matches ({exact => get_fallback_entries ()});
 sub unescape_patterns ($) {  
   my $pattern_data = shift;  
   my $new_data = {};  
   for (keys %$pattern_data) {  
     my $w = $_;  
     my $v = $_;  
     $v =~ s/\(\.\+\)/*/g;  
     $v =~ s/\\([\s\S])/$1/g;  
     $new_data->{$v} = $pattern_data->{$w};  
   }  
   return $new_data;  
 } # unescape_patterns  
   
 sub print_matches ($$$) {  
   my ($file_name, $exact_data, $pattern_data) = @_;  
   
   my $file_id = $file_name;  
   if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {  
     $file_id = $1;  
137    }    }
138        
139    my $has_match;    unless ($has_match) {
140    my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];      print q[<p>No match found.</article>];
141    my $result = 0;    } else {
142    my $added = {};      print q[</table></article>];
143    my %en2ja = (%$exact_data, %$pattern_data);      print_input_form ();
144    for (sort {$a cmp $b} keys %en2ja) {    }
145      if (/$pattern/) {  }
146        $has_match = 1;  
147        $added->{$_} = 1;  exit;
148        $r .= get_match_text ($_, $en2ja{$_}, $pattern);  
149        $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;  sub check_match ($) {
150      my ($entry) = @_;
151    
152      for my $tag (keys %tag) {
153        F: {
154          for (@{$entry->{tags} or []}) {
155            last F if $_ eq $tag;
156          }
157          return 0;
158        } # F
159      }
160    
161      for my $pattern (@pattern) {
162        if ($entry->{en} =~ /$pattern/ or
163            $entry->{ja} =~ /$pattern/) {
164          #
165        } else {
166          return 0;
167      }      }
168    }    }
169    $result = 0;    
170    my %ja2en = reverse %en2ja;    return 1;
171    for (sort {$a cmp $b} keys %ja2en) {  } # check_match
172      if (/$pattern/) {  
173        next if $added->{$ja2en{$_}};  sub print_matches ($) {
174      my ($entries) = @_;
175    
176      use feature 'state';
177      state $result //= 0;
178      my $has_match;
179    
180      my %en2ja = (%{$entries->{exact} or {}}, %{$entries->{pattern} or {}});
181      for (keys %en2ja) {
182        if (check_match ($en2ja{$_})) {
183        $has_match = 1;        $has_match = 1;
184        $r .= get_match_text ($ja2en{$_}, $_, $pattern);        print '<table>' unless $result;
185        $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;        print ''. get_match_text ($_, $en2ja{$_});
186          ++$result;
187    #      print q[<tr><td colspan=2>...] and last if ++$result == $max_result;
188      }      }
189    }    }
   $r .= q[</table>];  
   print $r if $has_match;  
190    return $has_match;    return $has_match;
191  } # print_matches  } # print_matches
192    
193  sub get_match_text ($$) {  sub get_match_text ($$) {
194    my ($en, $ja) = @_;    my $hash = shift;
195      my $entry = shift;
196    
197    ## NOTE: Marking will not work well if it contains &, <, >, or ", or    ## NOTE: Marking will not work well if it contains &, <, >, or ", or
198    ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).    ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
199    
200    my $r = q[<tr><td lang=en>];    my $r = q[<tr data-ja-hash="] . htescape ($hash) . q["><td lang=en>];
201    my $v = htescape ($en);    my $v = htescape ($entry->{en});
202    $v =~ s[($pattern)][<mark>$1</mark>]g;    for my $pattern (@pattern) {
203        $v =~ s[($pattern)][<mark>$1</mark>]g;
204      }
205    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
206    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
207    $v =~ s[\*][<var class=pattern-star>*</var>]g;    $v =~ s[\*][<var class=pattern-star>*</var>]g;
208    $r .= $v;    $r .= $v;
209      
210    $r .= q[<td lang=ja>];    $r .= q[<td lang=ja>];
211    my $v = htescape ($ja);    my $v = htescape ($entry->{ja});
212    $v =~ s[($pattern)][<mark>$1</mark>]g;    for my $pattern (@pattern) {
213        $v =~ s[($pattern)][<mark>$1</mark>]g;
214      }
215    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
216    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
217    $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;    $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
218    $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;    $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
219    $r .= $v;    $r .= $v;
220    
221      $r .= q[<td lang>];
222      $r .= join ' ', map {
223        ($tag{$_} ? '<mark>' : '') .
224        '<a href="find?word=tag:' . encode_url ($_) . '">' .
225        htescape ($_) .
226        '</a>' .
227        ($tag{$_} ? '</mark>' : '')
228      } @{$entry->{tags} or []};
229    
230    return $r;    return $r;
231  } # get_match_text  } # get_match_text
232    
233    sub print_input_form () {
234      print qq[<section><form action=find accept-charset=utf-8 method=get>];
235      print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
236      print qq[<select name=suffix>];
237      for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
238        print qq[<option value="@{[htescape $_]}"];
239        print qq[ selected] if $param->{suffix} eq $_;
240        print qq[>];
241        print htescape ({
242          none => 'No suffix',
243          ku => 'Ka-gyou Godan (-ku)',
244          su => 'Sa-gyou Godan (-su)',
245          tsu => 'Ta-gyou Godan (-tsu)',
246          nu => 'Na-gyou Godan (-nu)',
247          mu => 'Ma-gyou Godan (-mu)',
248          ru => 'Ra-gyou Godan (-ru)',
249          u => 'Wa/a-gyou Godan (-u)',
250          gu => 'Ga-gyou Godan (-gu)',
251          bu => 'Ba-gyou Godan (-bu)',
252          ichidan => 'Ichidan (-iru, -eru)',
253          suru => 'Sahen (-suru)',
254          kuru => 'Kahen (kuru)',
255          i => 'Keiyoushi (-i)',
256          da => 'Keiyou-doushi (-da)',
257          dasuru => 'Kei-dou-sahen (-da, -suru)',
258        }->{$_} || $_);
259      }
260      print qq[</select>];
261      print qq[<input type=submit value=Submit>];
262      print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
263          <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>
264          <label><input type=checkbox name=fb @{[$param->{fb} ? 'checked' : '']}> Show non-translated entries</label>];
265      print qq[</form></section>];
266    } # print_input_form

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.7

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24