/[suikacvs]/markup/html/html5/spec-ja/find.cgi

Diff of /markup/html/html5/spec-ja/find.cgi

Parent Directory | Revision Log | View Patch Patch

-revision 1.1 by wakaba,
Sun Jul 20 07:45:21 2008 UTC
+revision 1.7 by wakaba,
Mon Oct 27 04:52:39 2008 UTC
 Line 1
  #!/usr/bin/perl
  use strict;
+ use utf8;
+ use CGI::Carp qw/fatalsToBrowser/;
  BEGIN { require 'common.pl' }
  require Encode;
- my $max_result = 30;
+ my $max_result = 100;
  sub decode_url ($) {
    my $s = shift;
-Line 14 
 sub decode_url ($) {
+Line 16 
 sub decode_url ($) {
    return Encode::decode ('utf-8', $s);
  } # decode_url
+ sub encode_url ($) {
+   my $s = Encode::encode ('utf-8', shift);
+   $s =~ s/([^0-9A-Za-z_~.-])/sprintf '%%%02X', ord $1/g;
+   return $s;
+ } # encode_url
  sub htescape ($) {
    my $s = shift;
    $s =~ s/&/&amp;/g;
-Line 29 
 for (split /[&;]/, $ENV{QUERY_STRING} ||
+Line 37 
 for (split /[&;]/, $ENV{QUERY_STRING} ||
    $param->{decode_url ($name)} = decode_url ($value);
  }
- my $pattern = quotemeta normalize $param->{word};
+ my $suffix_patterns = {
+   ku => qr/(?>[かこきいっくけ])/,
+   su => qr/(?>[さそしすせ])/,
+   tsu => qr/(?>[たとちっつて])/,
+   nu => qr/(?>[なのにんぬね])/,
+   mu => qr/(?>[まもみんむめ])/,
+   ru => qr/(?>[らろりっるれ])/,
+   u => qr/(?>[わおいっうえ])/,
+   gu => qr/(?>[がごぎいぐげ])/,
+   bu => qr/(?>[ばぼびんぶべ])/,
+   ichidan => qr/(?>[るれろよ])?/,
+   kuru => qr/(?>[るれい])?/,
+   suru => qr/(?>す[るれ]|しろ?|せよ?|さ)?/,
+   i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
+   da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
+   dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
+ };
  my $eword = htescape $param->{word};
+ my @pattern;
+ my %tag;
+ for (split /\s+/, $param->{word}) {
+   if (s/^tag://) {
+     $tag{$_} = 1;
+   } else {
+     my $pattern = quotemeta normalize $_;
+     $pattern =~ s/\\-/[- ]/g;
+     unless ($param->{cs}) {
+       $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
+     }
+     my $suffix_pattern = $suffix_patterns->{$param->{suffix}} || qr//;
+     $pattern =~ s/$suffix_pattern$//;
+     $pattern .= $suffix_pattern;
+     $pattern = '\b' . $pattern . '\b' if $param->{aw};
+     push @pattern, $pattern;
+   }
+ }
+ $|=1;
  print qq[Content-Type: text/html ; charset=utf-8
  <!DOCTYPE HTML>
-Line 39 
 print qq[Content-Type: text/html ; chars
+Line 84 
 print qq[Content-Type: text/html ; chars
  <head>
  <title>Search result for "$eword"</title>
  <link rel=stylesheet href="/www/style/html/xhtml">
+ <link rel=stylesheet href="ja-style">
  <style>
+ td {
+   vertical-align: top;
+ }
  mark {
    background-color: yellow;
  }
-Line 64 
 mark {
+Line 113 
 mark {
    text-decoration: none;
    border-style: none;
  }
+ input[type=text] {
+   width: 60%;
+ }
  </style>
+ <script src=ja-script async defer></script>
  </head>
  <body>
  <h1>Search result for "$eword"</h1>];
- my $has_match;
+ print_input_form ();
- for_each_data_file (sub ($) {
-   my $data_file_name = shift;
+ unless (check_match ({en => '', ja => '', tags => []})) {
-   load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});
+   print q[<article>];
-   $pattern_data = unescape_patterns ($pattern_data);
+   my $has_match;
-   $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);
+   for_each_entry_set (sub ($) {
- });
+     my (undef, $entries) = @_;
+     $has_match |= print_matches ($entries);
- unless ($has_match) {
+   }, 1);
-   print q[<p>No match found.];
- }
+   if ($param->{fb}) {
+     $has_match |= print_matches ({exact => get_fallback_entries ()});
- sub unescape_patterns ($) {
-   my $pattern_data = shift;
-   my $new_data = {};
-   for (keys %$pattern_data) {
-     my $w = $_;
-     my $v = $_;
-     $v =~ s/\(\.\+\)/*/g;
-     $v =~ s/\\([\s\S])/$1/g;
-     $new_data->{$v} = $pattern_data->{$w};
-   }
-   return $new_data;
- } # unescape_patterns
- sub print_matches ($$$) {
-   my ($file_name, $exact_data, $pattern_data) = @_;
-   my $file_id = $file_name;
-   if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {
-     $file_id = $1;
    }
-   my $has_match;
+   unless ($has_match) {
-   my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];
+     print q[<p>No match found.</article>];
-   my $result = 0;
+   } else {
-   my $added = {};
+     print q[</table></article>];
-   my %en2ja = (%$exact_data, %$pattern_data);
+     print_input_form ();
-   for (sort {$a cmp $b} keys %en2ja) {
+   }
-     if (/$pattern/) {
+ }
-       $has_match = 1;
-       $added->{$_} = 1;
+ exit;
-       $r .= get_match_text ($_, $en2ja{$_}, $pattern);
-       $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
+ sub check_match ($) {
+   my ($entry) = @_;
+   for my $tag (keys %tag) {
+     F: {
+       for (@{$entry->{tags} or []}) {
+         last F if $_ eq $tag;
+       }
+       return 0;
+     } # F
+   }
+   for my $pattern (@pattern) {
+     if ($entry->{en} =~ /$pattern/ or
+         $entry->{ja} =~ /$pattern/) {
+       #
+     } else {
+       return 0;
      }
    }
-   $result = 0;
-   my %ja2en = reverse %en2ja;
+   return 1;
-   for (sort {$a cmp $b} keys %ja2en) {
+ } # check_match
-     if (/$pattern/) {
-       next if $added->{$ja2en{$_}};
+ sub print_matches ($) {
+   my ($entries) = @_;
+   use feature 'state';
+   state $result //= 0;
+   my $has_match;
+   my %en2ja = (%{$entries->{exact} or {}}, %{$entries->{pattern} or {}});
+   for (keys %en2ja) {
+     if (check_match ($en2ja{$_})) {
        $has_match = 1;
-       $r .= get_match_text ($ja2en{$_}, $_, $pattern);
+       print '<table>' unless $result;
-       $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;
+       print ''. get_match_text ($_, $en2ja{$_});
+       ++$result;
+ #      print q[<tr><td colspan=2>...] and last if ++$result == $max_result;
      }
    }
-   $r .= q[</table>];
-   print $r if $has_match;
    return $has_match;
  } # print_matches
  sub get_match_text ($$) {
-   my ($en, $ja) = @_;
+   my $hash = shift;
+   my $entry = shift;
    ## NOTE: Marking will not work well if it contains &, <, >, or ", or
    ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
-   my $r = q[<tr><td lang=en>];
+   my $r = q[<tr data-ja-hash="] . htescape ($hash) . q["><td lang=en>];
-   my $v = htescape ($en);
+   my $v = htescape ($entry->{en});
-   $v =~ s[($pattern)][<mark>$1</mark>]g;
+   for my $pattern (@pattern) {
+     $v =~ s[($pattern)][<mark>$1</mark>]g;
+   }
    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
    $v =~ s[\*][<var class=pattern-star>*</var>]g;
    $r .= $v;
    $r .= q[<td lang=ja>];
-   my $v = htescape ($ja);
+   my $v = htescape ($entry->{ja});
-   $v =~ s[($pattern)][<mark>$1</mark>]g;
+   for my $pattern (@pattern) {
+     $v =~ s[($pattern)][<mark>$1</mark>]g;
+   }
    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
    $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
    $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
    $r .= $v;
+   $r .= q[<td lang>];
+   $r .= join ' ', map {
+     ($tag{$_} ? '<mark>' : '') .
+     '<a href="find?word=tag:' . encode_url ($_) . '">' .
+     htescape ($_) .
+     '</a>' .
+     ($tag{$_} ? '</mark>' : '')
+   } @{$entry->{tags} or []};
    return $r;
  } # get_match_text
+ sub print_input_form () {
+   print qq[<section><form action=find accept-charset=utf-8 method=get>];
+   print qq[<p><input type=text name=word value="@{[htescape $param->{word}]}">];
+   print qq[<select name=suffix>];
+   for (qw/none ku su tsu nu mu ru u gu bu ichidan suru kuru i da dasuru/) {
+     print qq[<option value="@{[htescape $_]}"];
+     print qq[ selected] if $param->{suffix} eq $_;
+     print qq[>];
+     print htescape ({
+       none => 'No suffix',
+       ku => 'Ka-gyou Godan (-ku)',
+       su => 'Sa-gyou Godan (-su)',
+       tsu => 'Ta-gyou Godan (-tsu)',
+       nu => 'Na-gyou Godan (-nu)',
+       mu => 'Ma-gyou Godan (-mu)',
+       ru => 'Ra-gyou Godan (-ru)',
+       u => 'Wa/a-gyou Godan (-u)',
+       gu => 'Ga-gyou Godan (-gu)',
+       bu => 'Ba-gyou Godan (-bu)',
+       ichidan => 'Ichidan (-iru, -eru)',
+       suru => 'Sahen (-suru)',
+       kuru => 'Kahen (kuru)',
+       i => 'Keiyoushi (-i)',
+       da => 'Keiyou-doushi (-da)',
+       dasuru => 'Kei-dou-sahen (-da, -suru)',
+     }->{$_} || $_);
+   }
+   print qq[</select>];
+   print qq[<input type=submit value=Submit>];
+   print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
+       <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>
+       <label><input type=checkbox name=fb @{[$param->{fb} ? 'checked' : '']}> Show non-translated entries</label>];
+   print qq[</form></section>];
+ } # print_input_form

 Legend:



Removed from v.1.1
 


changed lines


 
Added in v.1.7
 Legend:



Removed from v.1.1
 


changed lines


 
Added in v.1.7
-Removed from v.1.1
+Added in v.1.7

admin@suikawiki.org	ViewVC Help
Powered by ViewVC 1.1.24