1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
|
4 |
our $target; |
5 |
our $code; |
6 |
|
7 |
my $data_dir_name = $target eq 'ruby' ? q<../.rubydata/> : $target ? q<../.> . $target . q</> : q<../.data/>; |
8 |
|
9 |
my $failed = 0; |
10 |
my $dir_id = 0; |
11 |
while (1) { |
12 |
last unless -d "$data_dir_name$dir_id"; |
13 |
print STDERR "*"; |
14 |
for my $t_l (qw/t l/, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100) { |
15 |
for my $local_id (0..9) { |
16 |
my $file_name = "$data_dir_name$dir_id/$t_l-$local_id.html"; |
17 |
next unless -f $file_name; |
18 |
open my $file, '<', $file_name or (++$failed and next); |
19 |
my $entity = {}; |
20 |
|
21 |
## Very simplified version of HTTP header processing |
22 |
my $line = <$file>; |
23 |
if (defined $line and $line =~ m!^(\S+)\s+(\S+)\s+(.*)!) { |
24 |
$entity->{protocol_version} = $1; |
25 |
$entity->{status_code} = $2; |
26 |
$entity->{status_phrase} = $3; |
27 |
$line = <$file>; |
28 |
} else { |
29 |
$failed++; |
30 |
next; |
31 |
} |
32 |
|
33 |
while (defined $line and $line !~ /^$/) { |
34 |
$line =~ tr/\x0D\x0A//d; |
35 |
if ($line =~ s/^([^:]+)://) { |
36 |
push @{$entity->{field}->{lc $1} ||= []}, $line; |
37 |
} |
38 |
$line = <$file>; |
39 |
} |
40 |
|
41 |
$line = <$file> if defined $line and $line =~ /^$/; |
42 |
|
43 |
$entity->{body} = ''; |
44 |
while (defined $line) { |
45 |
$entity->{body} .= $line; |
46 |
$line = <$file>; |
47 |
} |
48 |
|
49 |
$code->($entity, $file_name, $dir_id); |
50 |
} |
51 |
} |
52 |
$dir_id++; |
53 |
} |
54 |
|
55 |
print STDERR "\n"; |
56 |
print STDERR "$failed errors\n" if $failed; |
57 |
|
58 |
1; |
59 |
|
60 |
=head1 AUTHOR |
61 |
|
62 |
Wakaba <w@suika.fam.cx>. |
63 |
|
64 |
=head1 LICENSE |
65 |
|
66 |
Copyright 2007 Wakaba <w@suika.fam.cx> |
67 |
|
68 |
This library is free software; you can redistribute it |
69 |
and/or modify it under the same terms as Perl itself. |
70 |
|
71 |
=cut |
72 |
|
73 |
1; |
74 |
## $Date: 2007/06/09 07:56:19 $ |