/[suikacvs]/markup/html/html5/spec-ja/find.cgi
Suika

Diff of /markup/html/html5/spec-ja/find.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4 by wakaba, Thu Jul 24 13:17:11 2008 UTC revision 1.7 by wakaba, Mon Oct 27 04:52:39 2008 UTC
# Line 16  sub decode_url ($) { Line 16  sub decode_url ($) {
16    return Encode::decode ('utf-8', $s);    return Encode::decode ('utf-8', $s);
17  } # decode_url  } # decode_url
18    
19    sub encode_url ($) {
20      my $s = Encode::encode ('utf-8', shift);
21      $s =~ s/([^0-9A-Za-z_~.-])/sprintf '%%%02X', ord $1/g;
22      return $s;
23    } # encode_url
24    
25  sub htescape ($) {  sub htescape ($) {
26    my $s = shift;    my $s = shift;
27    $s =~ s/&/&/g;    $s =~ s/&/&/g;
# Line 31  for (split /[&;]/, $ENV{QUERY_STRING} || Line 37  for (split /[&;]/, $ENV{QUERY_STRING} ||
37    $param->{decode_url ($name)} = decode_url ($value);    $param->{decode_url ($name)} = decode_url ($value);
38  }  }
39    
40  my $eword = htescape $param->{word};  my $suffix_patterns = {
   
 my $pattern = quotemeta normalize $param->{word};  
 $pattern =~ s/\\-/[- ]/g;  
 unless ($param->{cs}) {  
   $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;  
 }  
 my $suffix_pattern = {  
41    ku => qr/(?>[かこきいっくけ])/,    ku => qr/(?>[かこきいっくけ])/,
42    su => qr/(?>[さそしすせ])/,    su => qr/(?>[さそしすせ])/,
43    tsu => qr/(?>[たとちっつて])/,    tsu => qr/(?>[たとちっつて])/,
# Line 54  my $suffix_pattern = { Line 53  my $suffix_pattern = {
53    i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう    i => qr/(?>か[ろっ]|く|い|けれ|う)?/, ## BUG: ありがたい -> ありがとう
54    da => qr/(?>だ[ろっ]?|で|に|なら?)?/,    da => qr/(?>だ[ろっ]?|で|に|なら?)?/,
55    dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,    dasuru => qr/(?>だ[ろっ]?|で|に|なら?|す[るれ]|しろ?|せよ?|さ)?/,
56  }->{$param->{suffix}} || qr//;  };
57  $pattern =~ s/$suffix_pattern$//;  
58  $pattern .= $suffix_pattern;  my $eword = htescape $param->{word};
 $pattern = '\b' . $pattern . '\b' if $param->{aw};  
59    
60    my @pattern;
61    my %tag;
62    for (split /\s+/, $param->{word}) {
63      if (s/^tag://) {
64        $tag{$_} = 1;
65      } else {
66        my $pattern = quotemeta normalize $_;
67        $pattern =~ s/\\-/[- ]/g;
68        unless ($param->{cs}) {
69          $pattern =~ s/([A-Za-z])/'[' . uc ($1) . lc ($1) . ']'/ge;
70        }
71        my $suffix_pattern = $suffix_patterns->{$param->{suffix}} || qr//;
72        $pattern =~ s/$suffix_pattern$//;
73        $pattern .= $suffix_pattern;
74        $pattern = '\b' . $pattern . '\b' if $param->{aw};
75        push @pattern, $pattern;
76      }
77    }
78    
79    $|=1;
80  print qq[Content-Type: text/html ; charset=utf-8  print qq[Content-Type: text/html ; charset=utf-8
81    
82  <!DOCTYPE HTML>  <!DOCTYPE HTML>
# Line 66  print qq[Content-Type: text/html ; chars Line 84  print qq[Content-Type: text/html ; chars
84  <head>  <head>
85  <title>Search result for "$eword"</title>  <title>Search result for "$eword"</title>
86  <link rel=stylesheet href="/www/style/html/xhtml">  <link rel=stylesheet href="/www/style/html/xhtml">
87    <link rel=stylesheet href="ja-style">
88  <style>  <style>
89  td {  td {
90    vertical-align: top;    vertical-align: top;
# Line 98  input[type=text] { Line 117  input[type=text] {
117    width: 60%;    width: 60%;
118  }  }
119  </style>  </style>
120    <script src=ja-script async defer></script>
121  </head>  </head>
122  <body>  <body>
123  <h1>Search result for "$eword"</h1>];  <h1>Search result for "$eword"</h1>];
124    
125  print_input_form ();  print_input_form ();
126    
127  unless ('' =~ /$pattern/) {  unless (check_match ({en => '', ja => '', tags => []})) {
128    print q[<article>];    print q[<article>];
129    my $has_match;    my $has_match;
130    for_each_data_file (sub ($) {    for_each_entry_set (sub ($) {
131      my $data_file_name = shift;      my (undef, $entries) = @_;
132      load_data_file ($data_file_name, my $exact_data = {}, my $pattern_data = {});      $has_match |= print_matches ($entries);
133      $pattern_data = unescape_patterns ($pattern_data);    }, 1);
134      $has_match |= print_matches ($data_file_name, $exact_data, $pattern_data);  
135    });    if ($param->{fb}) {
136        $has_match |= print_matches ({exact => get_fallback_entries ()});
137      }
138        
139    unless ($has_match) {    unless ($has_match) {
140      print q[<p>No match found.</article>];      print q[<p>No match found.</article>];
141    } else {    } else {
142      print q[</article>];      print q[</table></article>];
143      print_input_form ();      print_input_form ();
144    }    }
145  }  }
146    
147  sub unescape_patterns ($) {  exit;
148    my $pattern_data = shift;  
149    my $new_data = {};  sub check_match ($) {
150    for (keys %$pattern_data) {    my ($entry) = @_;
151      my $w = $_;  
152      my $v = $_;    for my $tag (keys %tag) {
153      $v =~ s/\(\.\+\)/*/g;      F: {
154      $v =~ s/\\([\s\S])/$1/g;        for (@{$entry->{tags} or []}) {
155      $new_data->{$v} = $pattern_data->{$w};          last F if $_ eq $tag;
156    }        }
157    return $new_data;        return 0;
158  } # unescape_patterns      } # F
159      }
160  sub print_matches ($$$) {  
161    my ($file_name, $exact_data, $pattern_data) = @_;    for my $pattern (@pattern) {
162        if ($entry->{en} =~ /$pattern/ or
163    my $file_id = $file_name;          $entry->{ja} =~ /$pattern/) {
164    if ($file_name =~ /([0-9A-Za-z-]+)\.dat$/) {        #
165      $file_id = $1;      } else {
166          return 0;
167        }
168    }    }
169        
170      return 1;
171    } # check_match
172    
173    sub print_matches ($) {
174      my ($entries) = @_;
175    
176      use feature 'state';
177      state $result //= 0;
178    my $has_match;    my $has_match;
179    my $r = qq[<h2>File "<a href="edit/@{[htescape $file_id]}"><code class=file>@{[htescape $file_id]}</code></a>"</h2><table>];  
180    my $result = 0;    my %en2ja = (%{$entries->{exact} or {}}, %{$entries->{pattern} or {}});
181    my $added = {};    for (keys %en2ja) {
182    my %en2ja = (%$exact_data, %$pattern_data);      if (check_match ($en2ja{$_})) {
   for (sort {$a cmp $b} keys %en2ja) {  
     if (/$pattern/) {  
       $has_match = 1;  
       $added->{$_} = 1;  
       $r .= get_match_text ($_, $en2ja{$_}, $pattern);  
       $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;  
     }  
   }  
   $result = 0;  
   my %ja2en = reverse %en2ja;  
   for (sort {$a cmp $b} keys %ja2en) {  
     if (/$pattern/) {  
       next if $added->{$ja2en{$_}};  
183        $has_match = 1;        $has_match = 1;
184        $r .= get_match_text ($ja2en{$_}, $_, $pattern);        print '<table>' unless $result;
185        $r .= q[<tr><td colspan=2>...] and last if ++$result == $max_result;        print ''. get_match_text ($_, $en2ja{$_});
186          ++$result;
187    #      print q[<tr><td colspan=2>...] and last if ++$result == $max_result;
188      }      }
189    }    }
   $r .= q[</table>];  
   print $r if $has_match;  
190    return $has_match;    return $has_match;
191  } # print_matches  } # print_matches
192    
193  sub get_match_text ($$) {  sub get_match_text ($$) {
194    my ($en, $ja) = @_;    my $hash = shift;
195      my $entry = shift;
196    
197    ## NOTE: Marking will not work well if it contains &, <, >, or ", or    ## NOTE: Marking will not work well if it contains &, <, >, or ", or
198    ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).    ## the pattern matches with charrefs, e.g. "t" (part of &lt; and &quot;).
199    
200    my $r = q[<tr><td lang=en>];    my $r = q[<tr data-ja-hash="] . htescape ($hash) . q["><td lang=en>];
201    my $v = htescape ($en);    my $v = htescape ($entry->{en});
202    $v =~ s[($pattern)][<mark>$1</mark>]g;    for my $pattern (@pattern) {
203        $v =~ s[($pattern)][<mark>$1</mark>]g;
204      }
205    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
206    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
207    $v =~ s[\*][<var class=pattern-star>*</var>]g;    $v =~ s[\*][<var class=pattern-star>*</var>]g;
208    $r .= $v;    $r .= $v;
209      
210    $r .= q[<td lang=ja>];    $r .= q[<td lang=ja>];
211    my $v = htescape ($ja);    my $v = htescape ($entry->{ja});
212    $v =~ s[($pattern)][<mark>$1</mark>]g;    for my $pattern (@pattern) {
213        $v =~ s[($pattern)][<mark>$1</mark>]g;
214      }
215    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;    $v =~ s[(&lt;[\s\S]+?&gt;)][<span class=tag>$1</span>]g;
216    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;    $v =~ s[(&amp;[#0-9A-Za-z]+;)][<span class=ref>$1</span>]g;
217    $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;    $v =~ s{(\[\[[^\[\]]+\]\])}[<span class=rfc2119>$1</span>]g;
218    $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;    $v =~ s[(\$[0-9]+)][<var class=pattern-var>$1</var>]g;
219    $r .= $v;    $r .= $v;
220    
221      $r .= q[<td lang>];
222      $r .= join ' ', map {
223        ($tag{$_} ? '<mark>' : '') .
224        '<a href="find?word=tag:' . encode_url ($_) . '">' .
225        htescape ($_) .
226        '</a>' .
227        ($tag{$_} ? '</mark>' : '')
228      } @{$entry->{tags} or []};
229    
230    return $r;    return $r;
231  } # get_match_text  } # get_match_text
232    
# Line 226  sub print_input_form () { Line 260  sub print_input_form () {
260    print qq[</select>];    print qq[</select>];
261    print qq[<input type=submit value=Submit>];    print qq[<input type=submit value=Submit>];
262    print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>    print qq[<p><label><input type=checkbox name=cs @{[$param->{cs} ? 'checked' : '']}> Case-sensitive</label>
263        <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>];        <label><input type=checkbox name=aw @{[$param->{aw} ? 'checked' : '']}> As word(s)</label>
264          <label><input type=checkbox name=fb @{[$param->{fb} ? 'checked' : '']}> Show non-translated entries</label>];
265    print qq[</form></section>];    print qq[</form></section>];
266  } # print_input_form  } # print_input_form

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.7

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24