1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
require Getopt::Long; |
4 |
require Message::Markup::XML::EntityManager; |
5 |
require Message::Markup::XML::Parser; |
6 |
my %src = ( |
7 |
catalog => 'entities.xcat', |
8 |
catalog_dtd => 'dtd/xcatalog.dtd', |
9 |
output_parsed_document => 1, |
10 |
remove_reference => 1, |
11 |
); |
12 |
$src{output_charset} = $1 if $main::ENV{LANG} =~ /\.(\w+)/; |
13 |
Getopt::Long::GetOptions ( |
14 |
q(base=s) => \$src{base}, |
15 |
q(catalog=s) => \$src{catalog}, |
16 |
q(catalog-dtd=s) => \$src{catalog_dtd}, |
17 |
q(check-error-page!) => \$src{check_error_page}, |
18 |
## TODO: help |
19 |
q(output-charset=s) => \$src{output_charset}, |
20 |
q(output-parsed-document!) => \$src{output_parsed_document}, |
21 |
q(remove-reference!) => \$src{remove_reference}, |
22 |
q(stop-with-fatal!) => \$src{stop_with_fatal}, |
23 |
); |
24 |
$src{uri} = shift or die "$0: No URI specified"; |
25 |
binmode STDOUT; |
26 |
binmode STDERR; |
27 |
binmode STDOUT, ':encoding('.$src{output_charset}.')' if $src{output_charset}; |
28 |
|
29 |
require Cwd; |
30 |
require URI::file; |
31 |
my $cwd = URI::file->new (Cwd::getcwd ().'/'); |
32 |
$src{uri} = URI->new ($src{uri})->abs ($cwd); |
33 |
$src{catalog} = URI->new ($src{catalog})->abs ($cwd) if $src{catalog}; |
34 |
$src{catalog_dtd} = URI->new ($src{catalog_dtd})->abs ($cwd) if $src{catalog_dtd}; |
35 |
|
36 |
my $catalog; |
37 |
my $parser = Message::Markup::XML::Parser->new (option => { |
38 |
uri_resolver => sub { |
39 |
my ($self, $parser, $decl, $p) = @_; |
40 |
unless (defined $catalog) { |
41 |
require Message::Markup::XML::Catalog; |
42 |
$catalog = Message::Markup::XML::Catalog->new; |
43 |
$catalog->option (uri_resolver => sub { |
44 |
my ($self, $parser, $decl, $p) = @_; |
45 |
print STDERR "Retriving catalog entity <$p->{uri}>...\n"; |
46 |
return 1; |
47 |
}); |
48 |
$catalog->option (dtd_of_xml_catalog_1_0 => $src{catalog_dtd}); |
49 |
} |
50 |
$p->{uri} = $catalog->resolve_external_id ({public => $p->{PUBLIC}, |
51 |
system => $p->{uri}}, |
52 |
catalogs => [$src{catalog}], |
53 |
return_default => 1); |
54 |
print STDERR "Retriving external entity <$p->{uri}>...\n"; |
55 |
return 1; |
56 |
}, |
57 |
error_handler => sub { |
58 |
my ($caller, $o, $error_type, $error_msg) = @_; |
59 |
require Carp; |
60 |
if ($src{stop_with_fatal} |
61 |
&& {qw/fatal 1 wfc 1/}->{$error_type->{level}}) { |
62 |
$Carp::CarpLevel = 1; |
63 |
Carp::croak ('{'.$error_type->{level}.'} '.$error_msg); |
64 |
} else { |
65 |
$Carp::CarpLevel = 1; |
66 |
Carp::carp ('{'.$error_type->{level}.'} '.$error_msg); |
67 |
} |
68 |
return 0; |
69 |
}, |
70 |
}); |
71 |
|
72 |
my $p = {uri => $src{uri}, base_uri => $src{base_uri}}; |
73 |
my $o = {uri => $src{uri}, entity_type => 'document_entity'}; |
74 |
my $em = Message::Markup::XML::EntityManager->new; |
75 |
$em->option (uri_resolver => $parser->option ('uri_resolver')); |
76 |
$em->option (error_handler => $parser->option ('error_handler')); |
77 |
$em->default_uri_resolver ($parser, 'Message::Markup::XML', $p, $o, |
78 |
accept_error_page => $src{check_error_page}, |
79 |
dont_parse_text_declaration => 1); |
80 |
|
81 |
if ($p->{error}->{no_data}) { |
82 |
Message::Markup::XML::Error::raise ($parser, $o, type => 'ERR_EXT_ENTITY_NOT_FOUND', |
83 |
t => ['#document', $p->{uri}, $p->{error}->{reason_text}]); |
84 |
} else { |
85 |
$parser->option (document_entity_base_uri => $p->{base_uri}); |
86 |
my $doc = $parser->parse_text ($p->{text}, $o, entMan => $em); |
87 |
if ($src{output_parsed_document}) { |
88 |
if ($src{remove_reference}) { |
89 |
$doc->remove_references; |
90 |
$doc->merge_external_subset; |
91 |
} |
92 |
print $doc; |
93 |
} |
94 |
} |