1 |
wakaba |
1.1 |
#!/usr/bin/perl |
2 |
|
|
use strict; |
3 |
|
|
require Getopt::Long; |
4 |
|
|
require Message::Markup::XML::EntityManager; |
5 |
|
|
require Message::Markup::XML::Parser; |
6 |
|
|
my %src = ( |
7 |
|
|
catalog => 'entities.xcat', |
8 |
|
|
catalog_dtd => 'dtd/xcatalog.dtd', |
9 |
|
|
output_parsed_document => 1, |
10 |
|
|
remove_reference => 1, |
11 |
|
|
); |
12 |
|
|
$src{output_charset} = $1 if $main::ENV{LANG} =~ /\.(\w+)/; |
13 |
|
|
Getopt::Long::GetOptions ( |
14 |
|
|
q(base=s) => \$src{base}, |
15 |
|
|
q(catalog=s) => \$src{catalog}, |
16 |
|
|
q(catalog-dtd=s) => \$src{catalog_dtd}, |
17 |
|
|
q(check-error-page!) => \$src{check_error_page}, |
18 |
|
|
## TODO: help |
19 |
|
|
q(output-charset=s) => \$src{output_charset}, |
20 |
|
|
q(output-parsed-document!) => \$src{output_parsed_document}, |
21 |
|
|
q(remove-reference!) => \$src{remove_reference}, |
22 |
|
|
q(stop-with-fatal!) => \$src{stop_with_fatal}, |
23 |
|
|
); |
24 |
|
|
$src{uri} = shift or die "$0: No URI specified"; |
25 |
|
|
binmode STDOUT; |
26 |
|
|
binmode STDERR; |
27 |
|
|
binmode STDOUT, ':encoding('.$src{output_charset}.')' if $src{output_charset}; |
28 |
|
|
|
29 |
|
|
require Cwd; |
30 |
|
|
require URI::file; |
31 |
|
|
my $cwd = URI::file->new (Cwd::getcwd ().'/'); |
32 |
|
|
$src{uri} = URI->new ($src{uri})->abs ($cwd); |
33 |
|
|
$src{catalog} = URI->new ($src{catalog})->abs ($cwd) if $src{catalog}; |
34 |
|
|
$src{catalog_dtd} = URI->new ($src{catalog_dtd})->abs ($cwd) if $src{catalog_dtd}; |
35 |
|
|
|
36 |
|
|
my $catalog; |
37 |
|
|
my $parser = Message::Markup::XML::Parser->new (option => { |
38 |
|
|
uri_resolver => sub { |
39 |
|
|
my ($self, $parser, $decl, $p) = @_; |
40 |
|
|
unless (defined $catalog) { |
41 |
|
|
require Message::Markup::XML::Catalog; |
42 |
|
|
$catalog = Message::Markup::XML::Catalog->new; |
43 |
|
|
$catalog->option (uri_resolver => sub { |
44 |
|
|
my ($self, $parser, $decl, $p) = @_; |
45 |
|
|
print STDERR "Retriving catalog entity <$p->{uri}>...\n"; |
46 |
|
|
return 1; |
47 |
|
|
}); |
48 |
|
|
$catalog->option (dtd_of_xml_catalog_1_0 => $src{catalog_dtd}); |
49 |
|
|
} |
50 |
|
|
$p->{uri} = $catalog->resolve_external_id ({public => $p->{PUBLIC}, |
51 |
|
|
system => $p->{uri}}, |
52 |
|
|
catalogs => [$src{catalog}], |
53 |
|
|
return_default => 1); |
54 |
|
|
print STDERR "Retriving external entity <$p->{uri}>...\n"; |
55 |
|
|
return 1; |
56 |
|
|
}, |
57 |
|
|
error_handler => sub { |
58 |
|
|
my ($caller, $o, $error_type, $error_msg) = @_; |
59 |
|
|
require Carp; |
60 |
|
|
if ($src{stop_with_fatal} |
61 |
|
|
&& {qw/fatal 1 wfc 1/}->{$error_type->{level}}) { |
62 |
|
|
$Carp::CarpLevel = 1; |
63 |
|
|
Carp::croak ('{'.$error_type->{level}.'} '.$error_msg); |
64 |
|
|
} else { |
65 |
|
|
$Carp::CarpLevel = 1; |
66 |
|
|
Carp::carp ('{'.$error_type->{level}.'} '.$error_msg); |
67 |
|
|
} |
68 |
|
|
return 0; |
69 |
|
|
}, |
70 |
|
|
}); |
71 |
|
|
|
72 |
|
|
my $p = {uri => $src{uri}, base_uri => $src{base_uri}}; |
73 |
|
|
my $o = {uri => $src{uri}, entity_type => 'document_entity'}; |
74 |
|
|
my $em = Message::Markup::XML::EntityManager->new; |
75 |
|
|
$em->option (uri_resolver => $parser->option ('uri_resolver')); |
76 |
|
|
$em->option (error_handler => $parser->option ('error_handler')); |
77 |
|
|
$em->default_uri_resolver ($parser, 'Message::Markup::XML', $p, $o, |
78 |
|
|
accept_error_page => $src{check_error_page}, |
79 |
|
|
dont_parse_text_declaration => 1); |
80 |
|
|
|
81 |
|
|
if ($p->{error}->{no_data}) { |
82 |
|
|
Message::Markup::XML::Error::raise ($parser, $o, type => 'ERR_EXT_ENTITY_NOT_FOUND', |
83 |
|
|
t => ['#document', $p->{uri}, $p->{error}->{reason_text}]); |
84 |
|
|
} else { |
85 |
|
|
$parser->option (document_entity_base_uri => $p->{base_uri}); |
86 |
|
|
my $doc = $parser->parse_text ($p->{text}, $o, entMan => $em); |
87 |
|
|
if ($src{output_parsed_document}) { |
88 |
|
|
if ($src{remove_reference}) { |
89 |
|
|
$doc->remove_references; |
90 |
|
|
$doc->merge_external_subset; |
91 |
|
|
} |
92 |
|
|
print $doc; |
93 |
|
|
} |
94 |
|
|
} |