1 |
wakaba |
1.1 |
#!/usr/bin/perl |
2 |
|
|
use strict; |
3 |
|
|
require Getopt::Long; |
4 |
|
|
require Message::Markup::XML::EntityManager; |
5 |
|
|
require Message::Markup::XML::Parser; |
6 |
|
|
my %src = ( |
7 |
|
|
catalog => 'entities.xcat', |
8 |
|
|
catalog_dtd => 'dtd/xcatalog.dtd', |
9 |
|
|
output_parsed_document => 1, |
10 |
|
|
remove_reference => 1, |
11 |
wakaba |
1.2 |
validate => 1, |
12 |
wakaba |
1.1 |
); |
13 |
|
|
$src{output_charset} = $1 if $main::ENV{LANG} =~ /\.(\w+)/; |
14 |
|
|
Getopt::Long::GetOptions ( |
15 |
|
|
q(base=s) => \$src{base}, |
16 |
|
|
q(catalog=s) => \$src{catalog}, |
17 |
|
|
q(catalog-dtd=s) => \$src{catalog_dtd}, |
18 |
|
|
q(check-error-page!) => \$src{check_error_page}, |
19 |
wakaba |
1.2 |
q(dtd-external-subset=s) => \$src{dtd_extsubset}, |
20 |
wakaba |
1.1 |
## TODO: help |
21 |
|
|
q(output-charset=s) => \$src{output_charset}, |
22 |
|
|
q(output-parsed-document!) => \$src{output_parsed_document}, |
23 |
|
|
q(remove-reference!) => \$src{remove_reference}, |
24 |
|
|
q(stop-with-fatal!) => \$src{stop_with_fatal}, |
25 |
wakaba |
1.2 |
q(stop-with-vc!) => \$src{stop_with_vc}, |
26 |
|
|
q(validate!) => \$src{validate}, |
27 |
wakaba |
1.1 |
); |
28 |
|
|
$src{uri} = shift or die "$0: No URI specified"; |
29 |
|
|
binmode STDOUT; |
30 |
|
|
binmode STDERR; |
31 |
|
|
binmode STDOUT, ':encoding('.$src{output_charset}.')' if $src{output_charset}; |
32 |
|
|
|
33 |
|
|
require Cwd; |
34 |
|
|
require URI::file; |
35 |
|
|
my $cwd = URI::file->new (Cwd::getcwd ().'/'); |
36 |
|
|
$src{uri} = URI->new ($src{uri})->abs ($cwd); |
37 |
|
|
$src{catalog} = URI->new ($src{catalog})->abs ($cwd) if $src{catalog}; |
38 |
|
|
$src{catalog_dtd} = URI->new ($src{catalog_dtd})->abs ($cwd) if $src{catalog_dtd}; |
39 |
wakaba |
1.2 |
$src{dtd_extsubset} = URI->new ($src{dtd_extsubset})->abs ($cwd) if $src{dtd_extsubset}; |
40 |
wakaba |
1.1 |
|
41 |
wakaba |
1.2 |
my ($nswf, $nsvalid, $wf, $valid) = (1, 1, 1, 1); |
42 |
wakaba |
1.1 |
my $catalog; |
43 |
wakaba |
1.2 |
my $eh = sub { |
44 |
|
|
my ($caller, $o, $error_type, $error_msg, $err) = @_; |
45 |
|
|
require Carp; |
46 |
|
|
if ($err->{raiser_type} eq 'Message::Markup::XML::Validator') { |
47 |
|
|
$error_msg = $err->{node_path} . ': ' . $error_msg if $err->{node_path}; |
48 |
|
|
$error_msg = 'Document <'.$err->{uri}.'>: ' . $error_msg if $err->{uri}; |
49 |
|
|
} |
50 |
|
|
if (($src{stop_with_fatal} |
51 |
|
|
&& {qw/fatal 1 wfc 1 nswfc 1/}->{$error_type->{level}}) |
52 |
|
|
|| ($src{stop_with_vc} |
53 |
|
|
&& {qw/vc 1 nsvc 1/}->{$error_type->{level}})) { |
54 |
|
|
local $Carp::CarpLevel = 1; |
55 |
|
|
Carp::croak ('{'.$error_type->{level}.'} '.$error_msg); |
56 |
|
|
} else { |
57 |
|
|
local $Carp::CarpLevel = 1; |
58 |
|
|
Carp::carp ('{'.$error_type->{level}.'} '.$error_msg); |
59 |
|
|
} |
60 |
|
|
|
61 |
|
|
if ($error_type->{level} eq 'wfc') { $wf = 0 ; $valid = 0 } |
62 |
|
|
elsif ($error_type->{level} eq 'vc') { $valid = 0 } |
63 |
|
|
elsif ($error_type->{level} eq 'nswfc') { $nswf = 0 ; $nswf = 0 } |
64 |
|
|
elsif ($error_type->{level} eq 'nsvc') { $nsvalid = 0 } |
65 |
|
|
|
66 |
|
|
return 0; |
67 |
|
|
}; |
68 |
wakaba |
1.1 |
my $parser = Message::Markup::XML::Parser->new (option => { |
69 |
|
|
uri_resolver => sub { |
70 |
|
|
my ($self, $parser, $decl, $p) = @_; |
71 |
|
|
unless (defined $catalog) { |
72 |
|
|
require Message::Markup::XML::Catalog; |
73 |
|
|
$catalog = Message::Markup::XML::Catalog->new; |
74 |
|
|
$catalog->option (uri_resolver => sub { |
75 |
|
|
my ($self, $parser, $decl, $p) = @_; |
76 |
|
|
print STDERR "Retriving catalog entity <$p->{uri}>...\n"; |
77 |
|
|
return 1; |
78 |
|
|
}); |
79 |
|
|
$catalog->option (dtd_of_xml_catalog_1_0 => $src{catalog_dtd}); |
80 |
|
|
} |
81 |
|
|
$p->{uri} = $catalog->resolve_external_id ({public => $p->{PUBLIC}, |
82 |
|
|
system => $p->{uri}}, |
83 |
|
|
catalogs => [$src{catalog}], |
84 |
|
|
return_default => 1); |
85 |
|
|
print STDERR "Retriving external entity <$p->{uri}>...\n"; |
86 |
|
|
return 1; |
87 |
|
|
}, |
88 |
wakaba |
1.2 |
error_handler => $eh, |
89 |
wakaba |
1.1 |
}); |
90 |
|
|
|
91 |
|
|
my $p = {uri => $src{uri}, base_uri => $src{base_uri}}; |
92 |
|
|
my $o = {uri => $src{uri}, entity_type => 'document_entity'}; |
93 |
|
|
my $em = Message::Markup::XML::EntityManager->new; |
94 |
|
|
$em->option (uri_resolver => $parser->option ('uri_resolver')); |
95 |
|
|
$em->option (error_handler => $parser->option ('error_handler')); |
96 |
|
|
$em->default_uri_resolver ($parser, 'Message::Markup::XML', $p, $o, |
97 |
|
|
accept_error_page => $src{check_error_page}, |
98 |
|
|
dont_parse_text_declaration => 1); |
99 |
|
|
|
100 |
|
|
if ($p->{error}->{no_data}) { |
101 |
|
|
Message::Markup::XML::Error::raise ($parser, $o, type => 'ERR_EXT_ENTITY_NOT_FOUND', |
102 |
|
|
t => ['#document', $p->{uri}, $p->{error}->{reason_text}]); |
103 |
|
|
} else { |
104 |
|
|
$parser->option (document_entity_base_uri => $p->{base_uri}); |
105 |
wakaba |
1.2 |
my $doc = $parser->parse_text ($p->{text}, $o, |
106 |
|
|
entMan => $em, |
107 |
|
|
alt_dtd_external_subset => $src{dtd_extsubset}); |
108 |
|
|
|
109 |
|
|
if ($src{validate}) { |
110 |
|
|
require Message::Markup::XML::Validate; |
111 |
|
|
my $validator = Message::Markup::XML::Validate->new (option => { |
112 |
|
|
error_handler => $eh, |
113 |
|
|
}); |
114 |
|
|
$valid &= $validator->validate ($doc, entMan => $em); |
115 |
|
|
} else { |
116 |
|
|
$valid = 0; |
117 |
|
|
} |
118 |
|
|
|
119 |
wakaba |
1.1 |
if ($src{output_parsed_document}) { |
120 |
|
|
if ($src{remove_reference}) { |
121 |
|
|
$doc->remove_references; |
122 |
|
|
$doc->merge_external_subset; |
123 |
|
|
} |
124 |
|
|
print $doc; |
125 |
|
|
} |
126 |
wakaba |
1.2 |
|
127 |
|
|
print STDERR qq(<$p->{uri}> is @{[ |
128 |
|
|
$valid ? ($nsvalid ? 'a namespace valid' |
129 |
|
|
: ($nswf ? 'a valid and namespace well-formed' : 'a valid')) : |
130 |
|
|
$wf ? ($nswf ? 'a namespace well-formed' : 'a well-formed') : |
131 |
|
|
'not a well-formed' |
132 |
|
|
]} XML document\n); |
133 |
wakaba |
1.1 |
} |