/[suikacvs]/webroot/www/ja1200/stat/htmlentities.pl
Suika

Contents of /webroot/www/ja1200/stat/htmlentities.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Sat Jun 9 07:56:19 2007 UTC (16 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
File MIME type: text/plain
New scripts for keitai sites; Report for attribute values

1 #!/usr/bin/perl
2 use strict;
3
4
5 my $entity_char = {
6 AElig => "\x{00C6}",
7 Aacute => "\x{00C1}",
8 Acirc => "\x{00C2}",
9 Agrave => "\x{00C0}",
10 Alpha => "\x{0391}",
11 Aring => "\x{00C5}",
12 Atilde => "\x{00C3}",
13 Auml => "\x{00C4}",
14 Beta => "\x{0392}",
15 Ccedil => "\x{00C7}",
16 Chi => "\x{03A7}",
17 Dagger => "\x{2021}",
18 Delta => "\x{0394}",
19 ETH => "\x{00D0}",
20 Eacute => "\x{00C9}",
21 Ecirc => "\x{00CA}",
22 Egrave => "\x{00C8}",
23 Epsilon => "\x{0395}",
24 Eta => "\x{0397}",
25 Euml => "\x{00CB}",
26 Gamma => "\x{0393}",
27 Iacute => "\x{00CD}",
28 Icirc => "\x{00CE}",
29 Igrave => "\x{00CC}",
30 Iota => "\x{0399}",
31 Iuml => "\x{00CF}",
32 Kappa => "\x{039A}",
33 Lambda => "\x{039B}",
34 Mu => "\x{039C}",
35 Ntilde => "\x{00D1}",
36 Nu => "\x{039D}",
37 OElig => "\x{0152}",
38 Oacute => "\x{00D3}",
39 Ocirc => "\x{00D4}",
40 Ograve => "\x{00D2}",
41 Omega => "\x{03A9}",
42 Omicron => "\x{039F}",
43 Oslash => "\x{00D8}",
44 Otilde => "\x{00D5}",
45 Ouml => "\x{00D6}",
46 Phi => "\x{03A6}",
47 Pi => "\x{03A0}",
48 Prime => "\x{2033}",
49 Psi => "\x{03A8}",
50 Rho => "\x{03A1}",
51 Scaron => "\x{0160}",
52 Sigma => "\x{03A3}",
53 THORN => "\x{00DE}",
54 Tau => "\x{03A4}",
55 Theta => "\x{0398}",
56 Uacute => "\x{00DA}",
57 Ucirc => "\x{00DB}",
58 Ugrave => "\x{00D9}",
59 Upsilon => "\x{03A5}",
60 Uuml => "\x{00DC}",
61 Xi => "\x{039E}",
62 Yacute => "\x{00DD}",
63 Yuml => "\x{0178}",
64 Zeta => "\x{0396}",
65 aacute => "\x{00E1}",
66 acirc => "\x{00E2}",
67 acute => "\x{00B4}",
68 aelig => "\x{00E6}",
69 agrave => "\x{00E0}",
70 alefsym => "\x{2135}",
71 alpha => "\x{03B1}",
72 amp => "\x{0026}",
73 AMP => "\x{0026}",
74 and => "\x{2227}",
75 ang => "\x{2220}",
76 apos => "\x{0027}",
77 aring => "\x{00E5}",
78 asymp => "\x{2248}",
79 atilde => "\x{00E3}",
80 auml => "\x{00E4}",
81 bdquo => "\x{201E}",
82 beta => "\x{03B2}",
83 brvbar => "\x{00A6}",
84 bull => "\x{2022}",
85 cap => "\x{2229}",
86 ccedil => "\x{00E7}",
87 cedil => "\x{00B8}",
88 cent => "\x{00A2}",
89 chi => "\x{03C7}",
90 circ => "\x{02C6}",
91 clubs => "\x{2663}",
92 cong => "\x{2245}",
93 copy => "\x{00A9}",
94 COPY => "\x{00A9}",
95 crarr => "\x{21B5}",
96 cup => "\x{222A}",
97 curren => "\x{00A4}",
98 dArr => "\x{21D3}",
99 dagger => "\x{2020}",
100 darr => "\x{2193}",
101 deg => "\x{00B0}",
102 delta => "\x{03B4}",
103 diams => "\x{2666}",
104 divide => "\x{00F7}",
105 eacute => "\x{00E9}",
106 ecirc => "\x{00EA}",
107 egrave => "\x{00E8}",
108 empty => "\x{2205}",
109 emsp => "\x{2003}",
110 ensp => "\x{2002}",
111 epsilon => "\x{03B5}",
112 equiv => "\x{2261}",
113 eta => "\x{03B7}",
114 eth => "\x{00F0}",
115 euml => "\x{00EB}",
116 euro => "\x{20AC}",
117 exist => "\x{2203}",
118 fnof => "\x{0192}",
119 forall => "\x{2200}",
120 frac12 => "\x{00BD}",
121 frac14 => "\x{00BC}",
122 frac34 => "\x{00BE}",
123 frasl => "\x{2044}",
124 gamma => "\x{03B3}",
125 ge => "\x{2265}",
126 gt => "\x{003E}",
127 GT => "\x{003E}",
128 hArr => "\x{21D4}",
129 harr => "\x{2194}",
130 hearts => "\x{2665}",
131 hellip => "\x{2026}",
132 iacute => "\x{00ED}",
133 icirc => "\x{00EE}",
134 iexcl => "\x{00A1}",
135 igrave => "\x{00EC}",
136 image => "\x{2111}",
137 infin => "\x{221E}",
138 int => "\x{222B}",
139 iota => "\x{03B9}",
140 iquest => "\x{00BF}",
141 isin => "\x{2208}",
142 iuml => "\x{00EF}",
143 kappa => "\x{03BA}",
144 lArr => "\x{21D0}",
145 lambda => "\x{03BB}",
146 lang => "\x{2329}",
147 laquo => "\x{00AB}",
148 larr => "\x{2190}",
149 lceil => "\x{2308}",
150 ldquo => "\x{201C}",
151 le => "\x{2264}",
152 lfloor => "\x{230A}",
153 lowast => "\x{2217}",
154 loz => "\x{25CA}",
155 lrm => "\x{200E}",
156 lsaquo => "\x{2039}",
157 lsquo => "\x{2018}",
158 lt => "\x{003C}",
159 LT => "\x{003C}",
160 macr => "\x{00AF}",
161 mdash => "\x{2014}",
162 micro => "\x{00B5}",
163 middot => "\x{00B7}",
164 minus => "\x{2212}",
165 mu => "\x{03BC}",
166 nabla => "\x{2207}",
167 nbsp => "\x{00A0}",
168 ndash => "\x{2013}",
169 ne => "\x{2260}",
170 ni => "\x{220B}",
171 not => "\x{00AC}",
172 notin => "\x{2209}",
173 nsub => "\x{2284}",
174 ntilde => "\x{00F1}",
175 nu => "\x{03BD}",
176 oacute => "\x{00F3}",
177 ocirc => "\x{00F4}",
178 oelig => "\x{0153}",
179 ograve => "\x{00F2}",
180 oline => "\x{203E}",
181 omega => "\x{03C9}",
182 omicron => "\x{03BF}",
183 oplus => "\x{2295}",
184 or => "\x{2228}",
185 ordf => "\x{00AA}",
186 ordm => "\x{00BA}",
187 oslash => "\x{00F8}",
188 otilde => "\x{00F5}",
189 otimes => "\x{2297}",
190 ouml => "\x{00F6}",
191 para => "\x{00B6}",
192 part => "\x{2202}",
193 permil => "\x{2030}",
194 perp => "\x{22A5}",
195 phi => "\x{03C6}",
196 pi => "\x{03C0}",
197 piv => "\x{03D6}",
198 plusmn => "\x{00B1}",
199 pound => "\x{00A3}",
200 prime => "\x{2032}",
201 prod => "\x{220F}",
202 prop => "\x{221D}",
203 psi => "\x{03C8}",
204 quot => "\x{0022}",
205 QUOT => "\x{0022}",
206 rArr => "\x{21D2}",
207 radic => "\x{221A}",
208 rang => "\x{232A}",
209 raquo => "\x{00BB}",
210 rarr => "\x{2192}",
211 rceil => "\x{2309}",
212 rdquo => "\x{201D}",
213 real => "\x{211C}",
214 reg => "\x{00AE}",
215 REG => "\x{00AE}",
216 rfloor => "\x{230B}",
217 rho => "\x{03C1}",
218 rlm => "\x{200F}",
219 rsaquo => "\x{203A}",
220 rsquo => "\x{2019}",
221 sbquo => "\x{201A}",
222 scaron => "\x{0161}",
223 sdot => "\x{22C5}",
224 sect => "\x{00A7}",
225 shy => "\x{00AD}",
226 sigma => "\x{03C3}",
227 sigmaf => "\x{03C2}",
228 sim => "\x{223C}",
229 spades => "\x{2660}",
230 sub => "\x{2282}",
231 sube => "\x{2286}",
232 sum => "\x{2211}",
233 sup => "\x{2283}",
234 sup1 => "\x{00B9}",
235 sup2 => "\x{00B2}",
236 sup3 => "\x{00B3}",
237 supe => "\x{2287}",
238 szlig => "\x{00DF}",
239 tau => "\x{03C4}",
240 there4 => "\x{2234}",
241 theta => "\x{03B8}",
242 thetasym => "\x{03D1}",
243 thinsp => "\x{2009}",
244 thorn => "\x{00FE}",
245 tilde => "\x{02DC}",
246 times => "\x{00D7}",
247 trade => "\x{2122}",
248 uArr => "\x{21D1}",
249 uacute => "\x{00FA}",
250 uarr => "\x{2191}",
251 ucirc => "\x{00FB}",
252 ugrave => "\x{00F9}",
253 uml => "\x{00A8}",
254 upsih => "\x{03D2}",
255 upsilon => "\x{03C5}",
256 uuml => "\x{00FC}",
257 weierp => "\x{2118}",
258 xi => "\x{03BE}",
259 yacute => "\x{00FD}",
260 yen => "\x{00A5}",
261 yuml => "\x{00FF}",
262 zeta => "\x{03B6}",
263 zwj => "\x{200D}",
264 zwnj => "\x{200C}",
265 }; # $entity_char
266
267 my $Entity = {};
268
269 our $target = shift;
270 our $code = sub {
271 my ($entity, $file_name) = @_;
272
273 my $htentity = {};
274 pos $entity->{body} = 0;
275 while ($entity->{body} =~ /(&#?[A-Za-z0-9_.:-]+;?)/gc) {
276 my $ent = $1;
277 if ($ent =~ /^&([A-Za-z0-9]+);$/ and $entity_char->{$1}) {
278 $htentity->{'#defined;'} = 1;
279 } elsif ($ent =~ /^&([A-Za-z0-9]+)$/ and $entity_char->{$1}) {
280 $htentity->{'#defined'} = 1;
281 } elsif ($ent =~ /^&#/) {
282 if ($ent =~ /^&#x/) {
283 $htentity->{$ent =~ /;$/ ? '#hex;' : '#hex'} = 1;
284 } else {
285 $htentity->{$ent =~ /;$/ ? '#num;' : '#num'} = 1;
286 }
287 } elsif ($ent =~ /;$/) {
288 $htentity->{'#undef;'} = 1;
289 } else {
290 $htentity->{'#undef'} = 1;
291 }
292 $htentity->{$ent} = 1;
293 }
294
295 for (keys %$htentity) {
296 $Entity->{$_}++;
297 }
298 };
299
300 require 'foreach.pl';
301
302 for (sort {$a cmp $b} keys %$Entity) {
303 print $_, "\t", $Entity->{$_}, "\n";
304 }
305
306 =head1 AUTHOR
307
308 Wakaba <w@suika.fam.cx>.
309
310 =head1 LICENSE
311
312 Copyright 2007 Wakaba <w@suika.fam.cx>
313
314 This library is free software; you can redistribute it
315 and/or modify it under the same terms as Perl itself.
316
317 =cut
318
319 1;
320 ## $Date: 2007/06/02 12:12:28 $
321

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24