1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
|
4 |
use Encode::Guess; |
5 |
|
6 |
my $Prolog = {}; |
7 |
|
8 |
our $target = shift; |
9 |
our $code = sub { |
10 |
my ($entity, $file_name) = @_; |
11 |
|
12 |
if ($entity->{body} =~ /^((?:(?!<html).)*?<!DOCTYPE[^>]*>)/is) { |
13 |
my $p = $1; |
14 |
$p =~ s/[\x09-\x0D\x20]+/ /g; |
15 |
$p =~ s/<!--[^>]*-->/<!---->/g; |
16 |
$p =~ s/<!----> ?(?><!----> ?)+/<!---->/g; |
17 |
$p =~ s/^ //; |
18 |
$Prolog->{$p}++; |
19 |
} else { |
20 |
$Prolog->{'(none)'}++; |
21 |
} |
22 |
}; |
23 |
|
24 |
require 'foreach.pl'; |
25 |
|
26 |
print "Prolog:\n"; |
27 |
for (sort {$a cmp $b} keys %$Prolog) { |
28 |
my $n = $_; |
29 |
$n =~ s/([\x00-\x1F\x25\x7F-\xFF])/sprintf '%%%02X', ord $1/ge; |
30 |
print $n, "\t", $Prolog->{$_}, "\n"; |
31 |
} |
32 |
|
33 |
=head1 AUTHOR |
34 |
|
35 |
Wakaba <w@suika.fam.cx>. |
36 |
|
37 |
=head1 LICENSE |
38 |
|
39 |
Copyright 2007 Wakaba <w@suika.fam.cx> |
40 |
|
41 |
This library is free software; you can redistribute it |
42 |
and/or modify it under the same terms as Perl itself. |
43 |
|
44 |
=cut |
45 |
|
46 |
1; |
47 |
## $Date:$ |
48 |
|