1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
|
4 |
require Encode::EUCJPSW; |
5 |
|
6 |
my $root_dir_name = q[/home/wakaba/public_html/-temp/wiki/wikidata/page/]; |
7 |
|
8 |
chdir $root_dir_name; |
9 |
|
10 |
binmode STDOUT, ':encoding(utf8)'; |
11 |
|
12 |
my $has_keyword; |
13 |
my $index = 0; |
14 |
|
15 |
open my $file, "find -name '*.txt' |" |
16 |
or die "$0: $!"; |
17 |
while (<$file>) { |
18 |
my $file_name = $_; |
19 |
$file_name =~ s!^\./!!; |
20 |
$file_name =~ tr/\x0D\x0A//d; |
21 |
$file_name =~ s/\.txt$//; |
22 |
my $keyword = $file_name; |
23 |
$keyword =~ s!\.ns/!2F2F!g; |
24 |
$keyword =~ s/([0-9A-F]{2})/pack 'C', hex $1/ge; |
25 |
$keyword = Encode::decode ('euc-jp-sw', $keyword); |
26 |
$keyword =~ s/\s+/ /g; |
27 |
$keyword =~ s/^ //; |
28 |
$keyword =~ s/ $//; |
29 |
$keyword = 'SandBox' unless length $keyword; |
30 |
if ($has_keyword->{$keyword}) { |
31 |
$keyword .= ' #' . ++$index; |
32 |
} |
33 |
$has_keyword->{$keyword} = 1; |
34 |
print "$file_name $keyword\n"; |
35 |
} |