1 |
|
2 |
=head1 NAME |
3 |
|
4 |
Message::Markup::XML --- manakai: Simple well-formed document fragment generator |
5 |
|
6 |
=head1 DESCRIPTION |
7 |
|
8 |
This module can be used to generate the document fragment of XML, SGML or |
9 |
other well-formed (in XML meaning) data formats with the object oriented manner. |
10 |
|
11 |
This module cannot be used to parse XML (or other marked-up) document (or its fragment) |
12 |
by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose |
13 |
of this module is to make it easy for tiny perl scripts to GENERATE well-formed |
14 |
markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-)) |
15 |
|
16 |
=cut |
17 |
|
18 |
package Message::Markup::XML; |
19 |
use strict; |
20 |
our $VERSION = do{my @r=(q$Revision: 1.27 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
21 |
use overload '""' => \&outer_xml, |
22 |
fallback => 1; |
23 |
use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!; |
24 |
use Message::Markup::XML::QName qw:NULL_URI UNDEF_URI DEFAULT_PFX:; |
25 |
require Carp; |
26 |
Carp::carp "Obsoleted module Message::Markup::XML loaded"; |
27 |
|
28 |
our %Namespace_URI_to_prefix = ( |
29 |
'DAV:' => [qw/dav webdav/], |
30 |
'http://greenbytes.de/2002/rfcedit' => [qw/ed/], |
31 |
'http://icl.com/saxon' => [qw/saxon/], |
32 |
'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/], |
33 |
'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/], |
34 |
'http://purl.org/rss/1.0/' => ['', qw/rss rss10/], |
35 |
'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/], |
36 |
'http://www.mozilla.org/xbl' => ['', qw/xbl/], |
37 |
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/], |
38 |
'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/], |
39 |
'http://www.w3.org/1999/xlink' => [qw/l xlink/], |
40 |
'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/], |
41 |
'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/], |
42 |
'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/], |
43 |
'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/], |
44 |
'http://www.w3.org/2000/svg' => ['', qw/s svg/], |
45 |
'http://www.w3.org/2002/06/hlink' => [qw/h hlink/], |
46 |
'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/], |
47 |
'http://www.w3.org/2002/07/owl' => [qw/owl/], |
48 |
'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/], |
49 |
'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/], |
50 |
'http://www.wapforum.org/2001/wml' => [qw/wap/], |
51 |
'http://xml.apache.org/xalan' => [qw/xalan/], |
52 |
'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/], |
53 |
'urn:schemas-microsoft-com:vml' => [qw/v vml/], |
54 |
'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/], |
55 |
'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/], |
56 |
'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/], |
57 |
); |
58 |
my %Cache; |
59 |
our %NS = ( |
60 |
SGML => 'urn:x-suika-fam-cx:markup:sgml:', |
61 |
XML => 'urn:x-suika-fam-cx:markup:xml:', |
62 |
default_base_uri => q<about:unknown>, |
63 |
internal_attr_duplicate => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/invalid-attr#', |
64 |
internal_invalid_sysid => 'http://system.identifier.invalid/', |
65 |
internal_ns_invalid => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/unknown-namespace#', |
66 |
xml => 'http://www.w3.org/XML/1998/namespace', |
67 |
xmlns => 'http://www.w3.org/2000/xmlns/', |
68 |
); |
69 |
|
70 |
=head1 METHODS |
71 |
|
72 |
=over 4 |
73 |
|
74 |
=item $x = Message::Markup::XML->new (%options) |
75 |
|
76 |
Returns new instance of the module. It is itself a node. |
77 |
|
78 |
Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>. |
79 |
|
80 |
=cut |
81 |
|
82 |
sub new ($;%) { |
83 |
my $class = shift; |
84 |
my $self = bless {@_}, $class; |
85 |
$self->{type} ||= '#element'; |
86 |
## Use of "qname" parameter is deprecated |
87 |
if ($self->{qname}) { |
88 |
my $q = Message::Markup::XML::QName::split_qname |
89 |
($self->{qname}, |
90 |
check_qname => 1, |
91 |
check_local_name => 1, |
92 |
use_prefix_default => 1); |
93 |
if ($q->{success}) { |
94 |
$self->{namespace_prefix} = $q->{prefix}; |
95 |
$self->{local_name} = $q->{local_name}; |
96 |
} |
97 |
} |
98 |
if (defined $self->{namespace_prefix}) { |
99 |
my $result = Message::Markup::XML::QName::register_prefix_to_name |
100 |
($self->_get_ns_decls_node, |
101 |
$self->{namespace_prefix} => $self->{namespace_uri}, |
102 |
use_prefix_default => 1, use_name_null => 1, |
103 |
check_xml => 1, check_xmlns => 1, |
104 |
check_registered_as_is => 1, ask_parent_node => 1); |
105 |
Carp::carp $result->{reason} if $result->{reason}; |
106 |
} |
107 |
for (qw/local_name value/) { |
108 |
$self->__set_parent_node ($self->{$_}); |
109 |
} |
110 |
$self->{node} = []; |
111 |
$self; |
112 |
} |
113 |
|
114 |
sub __set_parent_node ($$) { |
115 |
my ($parent, $child) = @_; |
116 |
if (!ref $child) { |
117 |
## |
118 |
} elsif (substr (ref ($child), 0, 20) eq 'Message::Markup::XML') { |
119 |
$child->{parent} = $parent; |
120 |
} elsif (ref ($child) && $parent->_is_same_class ($child)) { |
121 |
$child->{parent} = $parent; |
122 |
} |
123 |
} |
124 |
|
125 |
=item $x->append_node ($node) |
126 |
|
127 |
Appending given node to the object (as the last child). |
128 |
If the type of given node is C<#fragment>, its all children, not the node |
129 |
itself, are appended. |
130 |
|
131 |
This method returns the appended node unless the type of given node is C<#fragment>. |
132 |
In such cases, this node (C<$x>) is returned. |
133 |
|
134 |
Available options: C<node_or_text>. |
135 |
|
136 |
=cut |
137 |
|
138 |
sub append_node ($$;%) { |
139 |
my $self = shift; |
140 |
my ($new_node, %o) = @_; |
141 |
unless (ref $new_node) { |
142 |
if ($o{node_or_text}) { |
143 |
return $self->append_text ($new_node); |
144 |
} else { |
145 |
die "append_node: Invalid node"; |
146 |
} |
147 |
} |
148 |
if ($new_node->{type} eq '#fragment') { |
149 |
for (@{$new_node->{node}}) { |
150 |
push @{$self->{node}}, $_; |
151 |
$_->{parent} = $self; |
152 |
} |
153 |
$self; |
154 |
} else { |
155 |
push @{$self->{node}}, $new_node; |
156 |
$new_node->{parent} = $self; |
157 |
$new_node; |
158 |
} |
159 |
} |
160 |
|
161 |
=item $new_node = $x->append_new_node (%options) |
162 |
|
163 |
Appending a new node. The new node is returned. |
164 |
|
165 |
Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>. |
166 |
|
167 |
=cut |
168 |
|
169 |
sub append_new_node ($;%) { |
170 |
my $self = shift; |
171 |
my $new_node = ref ($self)->new (@_, parent => $self); |
172 |
push @{$self->{node}}, $new_node; |
173 |
$new_node; |
174 |
} |
175 |
|
176 |
=item $new_node = $x->append_text ($text) |
177 |
|
178 |
Appending given text as a new text node. The new text node is returned. |
179 |
|
180 |
=cut |
181 |
|
182 |
sub append_text ($$;%) { |
183 |
$_[0]->append_new_node (type => '#text', value => $_[1]); |
184 |
} |
185 |
|
186 |
## Non public interface |
187 |
sub append_baretext ($$;%) { |
188 |
$_[0]->append_new_node (type => '#xml', value => $_[1]); |
189 |
} |
190 |
|
191 |
sub remove_child_node ($$) { |
192 |
my ($self, $node) = @_; |
193 |
return unless ref $node; |
194 |
$node = overload::StrVal ($node); |
195 |
$self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}]; |
196 |
} |
197 |
|
198 |
=item $attr_node = $x->get_attribute ($local_name, %options) |
199 |
|
200 |
Returns the attribute node whose local-name is C<$local_name>. |
201 |
|
202 |
Available options: C<namespace_uri>, C<make_new_node>. |
203 |
|
204 |
=cut |
205 |
|
206 |
sub get_attribute ($$;%) { |
207 |
my ($self, $name, %o) = @_; |
208 |
for (@{$self->{node}}) { |
209 |
if ($_->{type} eq '#attribute' |
210 |
and $_->{local_name} eq $name) { |
211 |
if (defined $o{namespace_uri}) { |
212 |
if (defined $_->{namespace_uri}) { |
213 |
return $_ if $_->{namespace_uri} eq $o{namespace_uri}; |
214 |
} elsif ($o{namespace_uri} eq NULL_URI) { |
215 |
return $_; |
216 |
} |
217 |
} else { |
218 |
return $_ if ((not defined $_->{namespace_uri}) |
219 |
or ($_->{namespace_uri} eq NULL_URI)) |
220 |
} |
221 |
} |
222 |
} |
223 |
## Node is not exist |
224 |
if ($o{make_new_node}) { |
225 |
return $self->append_new_node (type => '#attribute', local_name => $name, |
226 |
namespace_uri => $o{namespace_uri}); |
227 |
} else { |
228 |
return undef; |
229 |
} |
230 |
} |
231 |
|
232 |
sub get_attribute_value ($$;%) { |
233 |
my ($self, $name, %opt) = @_; |
234 |
my $node = $self->get_attribute ($name, %opt); |
235 |
if (ref $node) { |
236 |
return $node->inner_text; |
237 |
} else { |
238 |
return $opt{default}; |
239 |
} |
240 |
} |
241 |
|
242 |
=item $attr_node = $x->set_attribute ($local_name => $value, %options) |
243 |
|
244 |
Set the value of the attribute. The attribute node is returned. |
245 |
|
246 |
Available options: C<namespace_uri>. |
247 |
|
248 |
=cut |
249 |
|
250 |
sub set_attribute ($$$;%) { |
251 |
my ($self, $name, $val, %o) = @_; |
252 |
if ({qw/ARRAY 1 HASH 1 CODE 1/}->{ref ($val)}) { |
253 |
die "set_attribute: new attribute value must be string or blessed object"; |
254 |
#return undef; |
255 |
} |
256 |
for (@{$self->{node}}) { |
257 |
if ($_->{type} eq '#attribute' |
258 |
&& $_->{local_name} eq $name |
259 |
&& $o{namespace_uri} eq $_->{namespace_uri}) { |
260 |
$_->{value} = $val; |
261 |
$self->__set_parent_node ($val); |
262 |
$_->{node} = []; |
263 |
return $_; |
264 |
} |
265 |
} |
266 |
return $self->append_new_node (type => '#attribute', local_name => $name, |
267 |
value => $val, namespace_uri => $o{namespace_uri}); |
268 |
} |
269 |
|
270 |
=item $attr_node = $x->remove_attribute ($local_name, %options) |
271 |
|
272 |
Remove the attribute. |
273 |
|
274 |
Available options: C<namespace_uri>. |
275 |
|
276 |
=cut |
277 |
|
278 |
sub remove_attribute ($$;%) { |
279 |
my ($self, $name, %o) = @_; |
280 |
$self->{node} = [grep { |
281 |
if ($_->{type} eq '#attribute' |
282 |
&& $_->{local_name} eq $name |
283 |
&& $o{namespace_uri} eq $_->{namespace_uri}) { |
284 |
0; |
285 |
} else { |
286 |
1; |
287 |
} |
288 |
} @{$self->{node}}]; |
289 |
1; |
290 |
} |
291 |
|
292 |
=item \@children = $x->child_nodes |
293 |
|
294 |
Returns an array reference to child nodes. |
295 |
|
296 |
=item $local_name = $x->local_name ([$new_name]) |
297 |
|
298 |
Returns or set the local-name. |
299 |
|
300 |
=item $uri = $x->namespace_uri ([$new_uri]) |
301 |
|
302 |
Returns or set namespace name (URI) of the element or the attribute |
303 |
|
304 |
=item $uri = $x->namespace_prefix ([$new_prefix]) |
305 |
|
306 |
Returns or set namespace prefix of the element or the attribute. |
307 |
You may give C<$new_prefix> in form either 'foo' or 'foo:'. |
308 |
To indicate "default" prefix, use '' (length == 0 string). |
309 |
|
310 |
=item $uri or ($uri, $name) = $x->expanded_name |
311 |
|
312 |
Returns expanded name of the node (element or attribute). |
313 |
In array context, array of namespace name (URI) and local part |
314 |
is returned; otherwise, a URI which identify name of the node |
315 |
(in RDF or WebDAV) is returned. |
316 |
|
317 |
=item $type = $x->node_type |
318 |
|
319 |
Returns the node type. |
320 |
|
321 |
=item $node = $x->parent_node |
322 |
|
323 |
Returns the parent node. If there is no parent node, undef is returned. |
324 |
|
325 |
=cut |
326 |
|
327 |
sub child_nodes ($) { $_[0]->{node} } |
328 |
sub local_name ($;$) { |
329 |
my ($self, $newname) = @_; |
330 |
$self->{local_name} = $newname if $newname; |
331 |
if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') { |
332 |
$self->{local_name}->{local_name}; |
333 |
} else { |
334 |
$self->{local_name} |
335 |
} |
336 |
} |
337 |
sub node_type ($) { $_[0]->{type} } |
338 |
sub parent_node ($) { $_[0]->{parent} } |
339 |
|
340 |
sub namespace_uri ($;$) { |
341 |
my ($self, $new_uri) = @_; |
342 |
$self->{namespace_uri} = $new_uri if defined $new_uri; |
343 |
$self->{namespace_uri}; |
344 |
} |
345 |
sub namespace_prefix ($;$%) { |
346 |
my ($self, $new_pfx, %opt) = @_; |
347 |
my $decls = $self->_get_ns_decls_node; |
348 |
if (defined ($new_pfx)) { |
349 |
my $result = Message::Markup::XML::QName::register_prefix_to_name |
350 |
($decls, $new_pfx => $self->{namespace_uri}, ask_parent_node => 1, |
351 |
check_prefix => 1, check_xml => 1, check_xmlns => 1, |
352 |
check_prefix_xml_ => 1, check_registered_as_is => 1, |
353 |
use_prefix_default => 1, use_name_null => 1, %opt); |
354 |
Carp::carp $result->{reason} if $result->{reason}; |
355 |
} |
356 |
my $result = Message::Markup::XML::QName::name_to_prefix |
357 |
($decls, $self->{namespace_uri}, make_new_prefix => 1, |
358 |
use_prefix_default => 1, use_name_null => 1, |
359 |
check_registered_as_is => 1, %opt); |
360 |
if ($result->{success}) { |
361 |
return $result->{prefix}; |
362 |
} else { |
363 |
return undef; |
364 |
} |
365 |
} |
366 |
|
367 |
sub expanded_name ($) { |
368 |
my $self = shift; |
369 |
wantarray ? ($self->{namespace_uri}, $self->{local_name}) |
370 |
: $self->{namespace_uri} . $self->{local_name}; |
371 |
} |
372 |
|
373 |
=item $i = $x->count |
374 |
|
375 |
Returns the number of child nodes. |
376 |
|
377 |
=cut |
378 |
|
379 |
# TODO: support counting by type |
380 |
sub count ($;@) { |
381 |
(defined $_[0]->{value} ? 1 : 0) + scalar @{$_[0]->{node}}; |
382 |
} |
383 |
|
384 |
# $prefix = $x->_get_namespace_prefix ($namespace_uri) |
385 |
sub _get_namespace_prefix ($$;%) { |
386 |
} |
387 |
|
388 |
sub _set_prefix_to_uri ($$$;%) { |
389 |
} |
390 |
|
391 |
## TODO: removing ns declare (1.1) support |
392 |
# $uri or undef = $x->_prefix_to_uri ($prefix) |
393 |
sub _prefix_to_uri ($$;$%) { |
394 |
} |
395 |
|
396 |
# $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options) |
397 |
# use_no_prefix (default: 1): Allow default namespace (no prefix). |
398 |
sub _uri_to_prefix ($$;$%) { |
399 |
} |
400 |
|
401 |
=item $x->define_new_namespace ($prefix => $uri) |
402 |
|
403 |
Defines a new XML Namespace. This method is useful for root or section-level |
404 |
element node. |
405 |
|
406 |
Returned value is unspecified in this version of this module. |
407 |
|
408 |
=cut |
409 |
|
410 |
sub define_new_namespace ($$$;%) { |
411 |
my ($self, $prefix, $uri, %opt) = @_; |
412 |
my $result = Message::Markup::XML::QName::register_prefix_to_name |
413 |
($self->_get_ns_decls_node, $prefix => $uri, |
414 |
check_name => 1, check_prefix => 1, check_xml => 1, |
415 |
check_xmlns => 1, check_prefix_xml_ => 1, |
416 |
use_prefix_default => 1, use_name_null => 1, |
417 |
%opt); |
418 |
Carp::carp $result->{reason} if $result->{reason}; |
419 |
$result->{success}; |
420 |
} |
421 |
|
422 |
=item $uri = $x->defined_namespace_prefix ($prefix) |
423 |
|
424 |
Query whether the namespace prefix is defined or not. |
425 |
If defined, return namespace name (URI). |
426 |
|
427 |
=cut |
428 |
|
429 |
sub defined_namespace_prefix ($$;%) { |
430 |
my ($self, $prefix, %opt) = @_; |
431 |
my $result = Message::Markup::XML::QName::prefix_to_name |
432 |
($self->_get_ns_decls_node, $prefix, |
433 |
use_prefix_default => 1, use_name_null => 1, |
434 |
use_xml => 1, use_xmlns => 1, |
435 |
ask_parent_node => 1, %opt); |
436 |
$result->{name}; |
437 |
} |
438 |
|
439 |
=item $qname = $x->qname |
440 |
|
441 |
Returns QName ((namespace-)qualified name) of the element type. |
442 |
Undef is retuened when the type does not have its QName |
443 |
(ie. when type is neither C<#element> or C<#attribute>). |
444 |
|
445 |
=cut |
446 |
|
447 |
sub qname ($;%) { |
448 |
my ($self, %opt) = @_; |
449 |
if ($self->{type} eq '#element') { |
450 |
my $result = Message::Markup::XML::QName::expanded_name_to_qname |
451 |
($self, $self->{namespace_uri} || NULL_URI, |
452 |
$self->{local_name}, |
453 |
make_new_prefix => 1, check_local_name => 1, |
454 |
use_prefix_default => 1, use_name_null => 1, |
455 |
use_xml => 1, use_xmlns => 1, |
456 |
ask_parent_node => 1, %opt); |
457 |
Carp::carp $result->{reason} if $result->{reason}; |
458 |
return $result->{qname}; |
459 |
} elsif ($self->{type} eq '#attribute') { |
460 |
my $result = Message::Markup::XML::QName::expanded_name_to_qname |
461 |
(((defined $self->{namespace_uri} |
462 |
and $self->{namespace_uri} ne NULL_URI) ? |
463 |
$self->_get_ns_decls_node : undef), |
464 |
$self->{namespace_uri} || NULL_URI, $self->{local_name}, |
465 |
make_new_prefix => 1, check_local_name => 1, |
466 |
use_xml => 1, use_xmlns => 1, |
467 |
ask_parent_node => 1, %opt); |
468 |
Carp::carp $result->{reason} if $result->{reason}; |
469 |
return $result->{qname}; |
470 |
} else { |
471 |
return $self->{qname}; |
472 |
} |
473 |
} |
474 |
|
475 |
sub merge_external_subset ($) { |
476 |
my $self = shift; |
477 |
unless ($self->{type} eq '#declaration' |
478 |
&& $self->{namespace_uri} eq $NS{SGML}.'doctype') { |
479 |
return unless $self->{type} eq '#document' || $self->{type} eq '#fragment'; |
480 |
for (@{$self->{node}}) { |
481 |
$_->merge_external_subset; |
482 |
} |
483 |
return; |
484 |
} |
485 |
my $xsub = $self->get_attribute ('external-subset'); |
486 |
return unless ref $xsub; |
487 |
for (@{$xsub->{node}}) { |
488 |
$_->{parent} = $self; |
489 |
} |
490 |
push @{$self->{node}}, @{$xsub->{node}}; |
491 |
$self->remove_child_node ($xsub); |
492 |
$self->remove_child_node ($self->get_attribute ('PUBLIC')); |
493 |
$self->remove_child_node ($self->get_attribute ('SYSTEM')); |
494 |
$self->remove_marked_section; |
495 |
} |
496 |
|
497 |
sub remove_marked_section ($) { |
498 |
my $self = shift; |
499 |
my @node; |
500 |
for (@{$self->{node}}) { |
501 |
if ({'#declaration' => 1, '#element' => 1, '#section' => 1, |
502 |
'#reference' => 1, '#attribute' => 1, |
503 |
'#document' => 1, '#fragment' => 1}->{$_->{type}}) { |
504 |
$_->remove_marked_section; |
505 |
} |
506 |
} |
507 |
for (@{$self->{node}}) { |
508 |
if ($_->{type} ne '#section') { |
509 |
push @node, $_; |
510 |
} else { |
511 |
my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text; |
512 |
if ($status eq 'CDATA') { |
513 |
$_->{type} = '#text'; |
514 |
$_->remove_attribute ('status'); |
515 |
push @node, $_; |
516 |
} elsif ($status ne 'IGNORE') { # INCLUDE |
517 |
for my $e (@{$_->{node}}) { |
518 |
if ($e->{type} ne '#attribute') { |
519 |
$e->{parent} = $self; |
520 |
push @node, $e; |
521 |
} |
522 |
} |
523 |
} |
524 |
} |
525 |
} |
526 |
$self->{node} = \@node; |
527 |
} |
528 |
|
529 |
## TODO: references in EntityValue |
530 |
sub remove_references ($) { |
531 |
my $self = shift; |
532 |
my @node; |
533 |
for (@{$self->{node}}) { |
534 |
if ({'#declaration' => 1, '#element' => 1, '#section' => 1, |
535 |
'#reference' => 1, '#attribute' => 1, |
536 |
'#document' => 1, '#fragment' => 1}->{$_->{type}}) { |
537 |
$_->remove_references; |
538 |
} |
539 |
} |
540 |
for (@{$self->{node}}) { |
541 |
if ($_->{type} ne '#reference' |
542 |
|| ($self->{type} eq '#declaration' |
543 |
&& $_->{namespace_uri} eq $NS{SGML}.'entity')) { |
544 |
push @node, $_; |
545 |
} else { |
546 |
if (index ($_->{namespace_uri}, 'char') > -1) { |
547 |
my $e = ref ($_)->new (type => '#text', value => chr $_->{value}); |
548 |
$e->{parent} = $self; |
549 |
push @node, $e; |
550 |
} elsif ($_->{flag}->{smxp__ref_expanded}) { |
551 |
for my $e (@{$_->{node}}) { |
552 |
if ($e->{type} ne '#attribute') { |
553 |
$e->{parent} = $self; |
554 |
push @node, $e; |
555 |
} |
556 |
} |
557 |
} else { ## reference is not expanded |
558 |
push @node, $_; |
559 |
} |
560 |
} |
561 |
$_->{flag}->{smxp__defined_with_param_ref} = 0 |
562 |
if $_->{flag}->{smxp__defined_with_param_ref} |
563 |
&& !$_->{flag}->{smxp__non_processed_declaration}; |
564 |
} |
565 |
$self->{node} = \@node; |
566 |
} |
567 |
|
568 |
sub resolve_relative_uri ($;$%) { |
569 |
my ($self, $rel, %o) = @_; |
570 |
require URI; |
571 |
if ($rel =~ /^[0-9A-Za-z.%+-]+:/) { |
572 |
return URI->new ($rel); |
573 |
} else { |
574 |
my $base = $self->get_attribute_value ('base', namespace_uri => $NS{xml}, |
575 |
default => ''); |
576 |
if ($base !~ /^[0-9A-Za-z.%+-]+:/) { # $base is relative |
577 |
$base = $self->_resolve_relative_uri_by_parent ($base, \%o); |
578 |
} |
579 |
eval { |
580 |
URI->new ($rel)->abs ($base || '.'); |
581 |
} or return $rel; |
582 |
} |
583 |
} |
584 |
sub _resolve_relative_uri_by_parent ($$$) { |
585 |
my ($self, $rel, $o) = @_; |
586 |
if (ref $self->{parent}) { |
587 |
if (not $o->{use_references_base_uri} |
588 |
and $self->{parent}->{type} eq '#reference') { |
589 |
## This case is necessary to work with |
590 |
## <element> <!-- element can have base URI --> |
591 |
## text <!-- text cannot have base URI --> |
592 |
## &ent; <!-- ref's base URI is referred entity's one (in this module) --> |
593 |
## <!-- expantion of ent --> |
594 |
## entity's text <!-- text cannot have base URI, so use <element>'s one --> |
595 |
## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one --> |
596 |
## </element> |
597 |
return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o); |
598 |
} else { |
599 |
return $self->{parent}->resolve_relative_uri ($rel, %$o); |
600 |
} |
601 |
} else { |
602 |
return length $rel ? $rel : $NS{default_base_uri}; |
603 |
} |
604 |
} |
605 |
sub base_uri ($;$) { |
606 |
my ($self, $new_uri) = @_; |
607 |
my $base; |
608 |
if (defined $new_uri) { |
609 |
$base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml}); |
610 |
} |
611 |
$base ||= $self->get_attribute ('base', namespace_uri => $NS{xml}); |
612 |
ref ($base) ? $base->inner_text : undef; |
613 |
} |
614 |
|
615 |
=item $tag = $x->start_tag |
616 |
|
617 |
Returns the start tag (or something that marks the start of something, such as '<!--' |
618 |
for C<#comment> nodes). |
619 |
|
620 |
=cut |
621 |
|
622 |
sub start_tag ($) { |
623 |
my $self = shift; |
624 |
if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { |
625 |
my $r = ''; |
626 |
$self->qname; # dummy |
627 |
for (@{$self->{node}}) { |
628 |
$r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute'; |
629 |
} |
630 |
for my $prefix (sort grep !/^-/, keys %{$self->{ns}||{}}) { |
631 |
if ($prefix ne DEFAULT_PFX) { |
632 |
$r .= ' xmlns:'.$prefix; |
633 |
} else { |
634 |
$r .= ' xmlns'; |
635 |
} |
636 |
$r .= '="'; |
637 |
$r .= $self->_escape ($self->{ns}->{$prefix}) |
638 |
if $self->{ns}->{$prefix} ne NULL_URI |
639 |
&& $self->{ns}->{$prefix} ne NULL_URI; |
640 |
$r .= '"'; |
641 |
} |
642 |
$r .= '>'; |
643 |
'<' . $self->qname . $r; |
644 |
} elsif ($self->{type} eq '#comment') { |
645 |
'<!--'; |
646 |
} elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { |
647 |
'<?' . ($self->{local_name}); |
648 |
} elsif ($self->{type} eq '#reference') { |
649 |
if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') { |
650 |
'&#x'; |
651 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') { |
652 |
'&#'; |
653 |
} elsif ($self->_check_ncname ($self->{local_name})) { |
654 |
if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') { |
655 |
'%'; |
656 |
} else { |
657 |
'&'; |
658 |
} |
659 |
} else { # error |
660 |
''; |
661 |
} |
662 |
} elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) { |
663 |
'<!' . { |
664 |
$NS{SGML}.'attlist' => 'ATTLIST', |
665 |
$NS{SGML}.'doctype' => 'DOCTYPE', |
666 |
$NS{SGML}.'element' => 'ELEMENT', |
667 |
$NS{SGML}.'entity' => 'ENTITY', |
668 |
$NS{SGML}.'entity:parameter' => 'ENTITY', |
669 |
$NS{SGML}.'notation' => 'NOTATION', |
670 |
}->{$self->{namespace_uri}} . ' ' . |
671 |
($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ? |
672 |
($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):''); |
673 |
} elsif ($self->{type} eq '#section') { |
674 |
'<!['; |
675 |
} else { |
676 |
''; |
677 |
} |
678 |
} |
679 |
|
680 |
=item $tag = $x->end_tag |
681 |
|
682 |
Returns the end tag (or something that marks the end of something, such as '-->' |
683 |
for C<#comment> nodes). |
684 |
|
685 |
=cut |
686 |
|
687 |
sub end_tag ($) { |
688 |
my $self = shift; |
689 |
if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { |
690 |
'</' . $self->qname . '>'; |
691 |
} elsif ($self->{type} eq '#comment') { |
692 |
'-->'; |
693 |
} elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { |
694 |
'?>'; |
695 |
} elsif ($self->{type} eq '#reference') { |
696 |
';'; |
697 |
} elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) { |
698 |
'>'; |
699 |
} elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) { |
700 |
'>'; |
701 |
} elsif ($self->{type} eq '#section') { |
702 |
']]>'; |
703 |
} else { |
704 |
''; |
705 |
} |
706 |
} |
707 |
|
708 |
=item $tag = $x->attribute_name |
709 |
|
710 |
Returns the attribute name. |
711 |
|
712 |
=cut |
713 |
|
714 |
sub attribute_name ($) { |
715 |
my $self = shift; |
716 |
$self->qname; |
717 |
} |
718 |
|
719 |
=item $tag = $x->attribute_value |
720 |
|
721 |
Returns the attribute value. |
722 |
|
723 |
=cut |
724 |
|
725 |
sub attribute_value ($;%) { |
726 |
my ($self, %o) = @_; |
727 |
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
728 |
my $r = '"'; |
729 |
my $isc = $self->_is_same_class ($self->{value}); |
730 |
$r .= $self->_escape ($self->{value}, keep_wsp => 1) |
731 |
if !$isc and defined $self->{value}; |
732 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
733 |
my $nt = $_->{type}; |
734 |
if ($nt eq '#reference' || $nt eq '#xml') { |
735 |
$r .= $_->outer_xml; |
736 |
} elsif ($nt ne '#attribute') { |
737 |
$r .= $self->_escape ($_->inner_text, keep_wsp => 1); |
738 |
} |
739 |
} |
740 |
return $r . '"'; |
741 |
} else { |
742 |
''; |
743 |
} |
744 |
} |
745 |
|
746 |
sub entity_value ($;%) { |
747 |
my ($self, %o) = @_; |
748 |
my $_escape = sub { |
749 |
my $s = shift; |
750 |
return '' unless defined $s; |
751 |
$s =~ s/&/&/g; |
752 |
$s =~ s/&(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g; |
753 |
$s =~ s/([\x0D%"])/sprintf '&#x%02X;', ord $1/ge; |
754 |
$s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F\x7F])/sprintf '&#x%02X;', ord $1/ge; |
755 |
$s; |
756 |
}; |
757 |
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
758 |
my $r = '"' . &$_escape ($self->{value}); |
759 |
for (@{$self->{node}}) { |
760 |
my $nt = $_->{type}; |
761 |
if ($nt eq '#reference' || $nt eq '#xml') { |
762 |
$r .= $_->outer_xml; |
763 |
} elsif ($nt ne '#attribute') { |
764 |
$r .= &$_escape ($_->inner_text); |
765 |
} |
766 |
} |
767 |
return $r . '"'; |
768 |
} else { |
769 |
''; |
770 |
} |
771 |
} |
772 |
|
773 |
## This method should be called only from Message::Markup::XML::* family modules, |
774 |
## since this is NOT a FORMAL interface. |
775 |
sub _entity_parameter_literal_value ($;%) { |
776 |
my $self = shift; |
777 |
my $r = ''; |
778 |
my $isc = $self->_is_same_class ($self->{value}); |
779 |
$r = $self->{value} unless $isc; |
780 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
781 |
my $nt = $_->{type}; |
782 |
## Bare node and general entity reference node |
783 |
if ($nt eq '#xml' || ($nt eq '#reference' && $_->{namespace_uri} eq $NS{SGML}.'entity')) { |
784 |
$r .= $_->outer_xml; |
785 |
## Text node and parameter entity reference node |
786 |
} elsif ($nt ne '#attribute') { |
787 |
$r .= $_->inner_text; |
788 |
} |
789 |
} |
790 |
$r; |
791 |
} |
792 |
|
793 |
=item $tag = $x->attribute |
794 |
|
795 |
Returns the attribute (name and value pair). |
796 |
|
797 |
=cut |
798 |
|
799 |
sub attribute ($) { |
800 |
my $self = shift; |
801 |
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
802 |
$self->attribute_name . '=' . $self->attribute_value; |
803 |
} else { |
804 |
''; |
805 |
} |
806 |
} |
807 |
|
808 |
sub external_id ($;%) { |
809 |
my $self = shift; |
810 |
my %o = @_; |
811 |
my ($pubid, $sysid, $ndata); |
812 |
for (@{$self->{node}}) { |
813 |
if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) { |
814 |
if ($_->{local_name} eq 'PUBLIC') { |
815 |
$pubid = $_->inner_text; |
816 |
} elsif ($_->{local_name} eq 'SYSTEM') { |
817 |
$sysid = $_->inner_text; |
818 |
} elsif ($_->{local_name} eq 'NDATA') { |
819 |
$ndata = $_->inner_text; |
820 |
undef $ndata unless $self->_check_ncname ($ndata); |
821 |
} |
822 |
} |
823 |
} |
824 |
my $r = ''; |
825 |
if (defined $pubid) { |
826 |
$pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges; |
827 |
$pubid = '"' . $pubid . '"'; |
828 |
} |
829 |
if (defined $sysid) { |
830 |
if (index ($sysid, '"') > -1) { |
831 |
if (index ($sysid, "'") > -1) { |
832 |
$sysid =~ s/"/%22/; $sysid = '"' . $sysid . '"'; |
833 |
} else { |
834 |
$sysid = "'" . $sysid . "'"; |
835 |
} |
836 |
} else { |
837 |
$sysid = '"' . $sysid . '"'; |
838 |
} |
839 |
} |
840 |
if ($pubid && $sysid) { |
841 |
$r = 'PUBLIC ' . $pubid . ' ' . $sysid; |
842 |
} elsif ($sysid) { |
843 |
$r = 'SYSTEM ' . $sysid; |
844 |
} elsif ($pubid && $o{allow_pubid_only}) { |
845 |
$r = 'PUBLIC ' . $pubid; |
846 |
} |
847 |
if ($r && $ndata && $o{use_ndata}) { |
848 |
$r .= ' NDATA ' . $ndata; |
849 |
} |
850 |
$r; |
851 |
} |
852 |
|
853 |
=item $tag = $x->inner_xml |
854 |
|
855 |
Returns the content of the node in XML syntax. (In case of the C<#element> nodes, |
856 |
element content without start- and end-tags is returned.) |
857 |
|
858 |
Note that for not all node types the behavior of this method is defined. |
859 |
For example, returned value of C<#attribute> might be unexpected one |
860 |
in this version of this module. |
861 |
|
862 |
=cut |
863 |
|
864 |
sub inner_xml ($;%) { |
865 |
my ($self, %o) = @_; |
866 |
my $r = ''; |
867 |
if ($self->{type} eq '#comment') { |
868 |
$r = $self->inner_text; |
869 |
$r =~ s/--/--/g; |
870 |
$r =~ s/-$/-/; |
871 |
} elsif ($self->{type} eq '#pi') { |
872 |
my $isc = $self->_is_same_class ($self->{value}); |
873 |
if (!$isc and defined $self->{value} and length ($self->{value})) { |
874 |
$r = ' ' . $self->{value}; |
875 |
#$r =~ s/\?>/? >/g; ## Same replacement as of the recommendation of XSLT:p.i. |
876 |
$r =~ s/\?>/?>/g; ## Some PI (such as xml-stylesheet) support predefined entity reference |
877 |
} |
878 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
879 |
if ($_->node_type eq '#attribute') { |
880 |
$r .= ' ' . $_->attribute; |
881 |
} else { |
882 |
my $s = $_->inner_text; |
883 |
if (length $s) { |
884 |
$s =~ s/\?>/? >/g; |
885 |
$r .= ' ' . $s; |
886 |
} |
887 |
} |
888 |
} |
889 |
} elsif ($self->{type} eq '#reference') { |
890 |
if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') { |
891 |
$r = sprintf '%02X', $self->{value}; |
892 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') { |
893 |
$r = sprintf '%02d', $self->{value}; |
894 |
} elsif (ref ($self->{local_name}) && $self->{local_name}->{type} eq '#declaration') { |
895 |
$r = $self->{local_name}->{local_name}; |
896 |
} elsif ($self->_check_ncname ($self->{local_name})) { |
897 |
$r = ($self->{local_name}); |
898 |
} else { # error |
899 |
$r = ''; |
900 |
} |
901 |
} elsif ($self->{type} eq '#declaration') { |
902 |
if ($self->{namespace_uri} eq $NS{SGML}.'doctype') { |
903 |
my $root = $self->get_attribute ('qname'); |
904 |
$root = (ref $root ? $root->inner_text : undef) || (ref $self->{parent} ? (do { |
905 |
for (@{$self->{parent}->{node}}) { |
906 |
if ($_->{type} eq '#element') { |
907 |
$root = $_->qname; |
908 |
last if $root; |
909 |
} |
910 |
} |
911 |
$root || '#IMPLIED'; |
912 |
}) : '#IMPLIED'); ## error! |
913 |
my ($isub, $xid) = ('', $self->external_id); |
914 |
for (@{$self->{node}}) { |
915 |
$isub .= $_->outer_xml if $_->{type} ne '#attribute'; |
916 |
} |
917 |
if ($xid) { |
918 |
$r = $xid; |
919 |
if ($isub) { |
920 |
$r .= " [" . $isub . "]"; |
921 |
} |
922 |
} else { |
923 |
if ($isub) { |
924 |
$r = "[" . $isub . "]"; |
925 |
} else { |
926 |
$r = "[]"; |
927 |
} |
928 |
} |
929 |
$r = $root . ' ' . $r; |
930 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'entity' |
931 |
|| $self->{namespace_uri} eq $NS{SGML}.'entity:parameter' |
932 |
|| $self->{namespace_uri} eq $NS{SGML}.'notation') { |
933 |
my %xid_opt; |
934 |
$r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref} |
935 |
&& $self->_check_ncname ($self->{local_name}); |
936 |
if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') { |
937 |
#$r = '% ' . $r; |
938 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') { |
939 |
$xid_opt{use_ndata} = 1; |
940 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') { |
941 |
$xid_opt{allow_pubid_only} = 1; |
942 |
} |
943 |
|
944 |
my $xid; |
945 |
$xid = $self->external_id (%xid_opt) |
946 |
unless $self->{flag}->{smxp__defined_with_param_ref}; |
947 |
if ($xid) { ## External ID |
948 |
$r .= $xid; |
949 |
} else { ## EntityValue |
950 |
my $entity_value; |
951 |
$entity_value = $self->get_attribute ('value') |
952 |
unless $self->{flag}->{smxp__defined_with_param_ref}; |
953 |
if (ref $entity_value) { # <!ENTITY foo "bar"> |
954 |
$r .= $entity_value->entity_value; |
955 |
} else { ## Consist of parameters |
956 |
my $params = ''; |
957 |
Carp::carp qq({value} property ("$self->{value}") is not allowed for this type of node) |
958 |
if defined $self->{value}; |
959 |
for (@{$self->{node}}) { |
960 |
$params .= $_->outer_xml unless $_->{type} eq '#attribute'; |
961 |
} |
962 |
$r .= length $params ? $params : '""'; |
963 |
} |
964 |
} |
965 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'element') { |
966 |
$r = $self->get_attribute_value ('qname') |
967 |
unless $self->{flag}->{smxp__defined_with_param_ref}; |
968 |
if ($r) { |
969 |
unless ($self->_check_name ($r)) { |
970 |
Carp::carp qq'"$r": QName expected'; |
971 |
$r = ''; |
972 |
} else { |
973 |
$r .= ' '; |
974 |
} |
975 |
|
976 |
my $cmodel = $self->get_attribute_value ('content', default => 'EMPTY'); |
977 |
if ($cmodel ne 'mixed' and $cmodel ne 'element') { |
978 |
$r .= $cmodel; |
979 |
} else { # element content or mixed content |
980 |
my $make_cmodel; |
981 |
$make_cmodel = sub { |
982 |
my $c = shift; |
983 |
my @tt; |
984 |
for (@{$c->child_nodes}) { |
985 |
if ($_->node_type eq '#element' |
986 |
and $_->namespace_uri eq $NS{SGML}.'element') { |
987 |
if ($_->local_name eq 'group') { |
988 |
my $tt = &$make_cmodel ($_); |
989 |
push @tt, '(' . $tt . ')' |
990 |
. $_->get_attribute_value ('occurence', default => '') |
991 |
if $tt; |
992 |
} elsif ($_->local_name eq 'element') { |
993 |
push @tt, $_->get_attribute_value ('qname') |
994 |
. $_->get_attribute_value ('occurence', default => ''); |
995 |
} |
996 |
} |
997 |
} |
998 |
return join scalar ($c->get_attribute_value ('connector') |
999 |
|| '|'), |
1000 |
grep {$_} @tt; |
1001 |
}; |
1002 |
my $tt; |
1003 |
my $grp_node; |
1004 |
for (@{$self->{node}}) { |
1005 |
if ($_->node_type eq '#element' |
1006 |
and $_->namespace_uri eq $NS{SGML}.'element' |
1007 |
and $_->local_name eq 'group') { |
1008 |
$grp_node = $_; |
1009 |
$tt = &$make_cmodel ($grp_node); |
1010 |
last; |
1011 |
} |
1012 |
} |
1013 |
if ($cmodel eq 'mixed') { ## mixed content |
1014 |
if ($tt) { |
1015 |
$r .= '(#PCDATA|' . $tt . ')*'; |
1016 |
} else { |
1017 |
$r .= '(#PCDATA)' |
1018 |
. ($grp_node->get_attribute_value ('occurence') eq '*' |
1019 |
? '*' : ''); |
1020 |
} |
1021 |
} else { ## element content |
1022 |
if ($tt) { |
1023 |
$r .= '(' . $tt . ')' |
1024 |
. $grp_node->get_attribute_value ('occurence', default => ''); |
1025 |
} else { ## Error |
1026 |
$r .= 'EMPTY'; |
1027 |
} |
1028 |
} # mixed or element content |
1029 |
} # content model group |
1030 |
} else { ## Save source doc's description as far as possible |
1031 |
my $isc = $self->_is_same_class ($self->{value}); |
1032 |
$r .= $self->{value} if defined $self->{value} and !$isc; |
1033 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
1034 |
unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') { |
1035 |
$r .= $_->outer_xml; |
1036 |
} elsif ($_->{type} eq '#element' |
1037 |
and $_->{namespace_uri} eq $NS{SGML}.'group') { |
1038 |
$r .= $_->outer_xml; |
1039 |
} |
1040 |
} |
1041 |
} |
1042 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'attlist') { |
1043 |
$r = $self->get_attribute_value ('qname') |
1044 |
unless $self->{flag}->{smxp__defined_with_param_ref}; |
1045 |
if ($r) { |
1046 |
unless ($self->_check_name ($r)) { |
1047 |
Carp::carp qq'inner_xml: "$r": QName expected'; |
1048 |
$r = ''; |
1049 |
} |
1050 |
for (@{$self->{node}}) { |
1051 |
if ($_->{type} eq '#element' |
1052 |
and $_->{namespace_uri} eq $NS{XML}.'attlist' |
1053 |
and $_->{local_name} eq 'AttDef') { |
1054 |
$r .= "\n\t" . $_->get_attribute_value ('qname'); |
1055 |
my $attr_type = $_->get_attribute_value ('type', default => 'CDATA'); |
1056 |
if ($attr_type ne 'enum') { |
1057 |
$r .= "\t" . $attr_type; |
1058 |
} |
1059 |
if ($attr_type eq 'enum' or $attr_type eq 'NOTATION') { |
1060 |
my @l; |
1061 |
for my $item (@{$_->{node}}) { |
1062 |
if ($item->{type} eq '#element' |
1063 |
and $item->{namespace_uri} eq $NS{XML}.'attlist' |
1064 |
and $item->{local_name} eq 'enum') { |
1065 |
push @l, $item->inner_text; |
1066 |
} |
1067 |
} |
1068 |
$r .= "\t(" . join ('|', @l) . ')'; |
1069 |
} |
1070 |
## DefaultDecl -- Keyword |
1071 |
my $deftype = $_->get_attribute_value ('default_type'); |
1072 |
if ($deftype) { |
1073 |
$r .= "\t#" . $deftype; |
1074 |
} |
1075 |
## DefaultDecl -- Attribute value specification |
1076 |
if (not $deftype or $deftype eq 'FIXED') { |
1077 |
$r .= "\t" |
1078 |
. $_->get_attribute ('default_value', make_new_node => 1) |
1079 |
->attribute_value; |
1080 |
} |
1081 |
} # AttDef |
1082 |
} |
1083 |
} else { ## Save source doc's description as far as possible |
1084 |
Carp::carp '{value} should not be used here' if defined $self->{value}; |
1085 |
for (@{$self->{node}}) { |
1086 |
unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') { |
1087 |
$r .= $_->outer_xml; |
1088 |
} elsif ($_->{type} eq '#element' |
1089 |
and $_->{namespace_uri} eq $NS{SGML}.'group') { |
1090 |
$r .= $_->outer_xml; |
1091 |
} |
1092 |
} |
1093 |
} |
1094 |
} else { # unknown declaration |
1095 |
Carp::carp qq'Unsupported type (<$self->{namespace_uri}>) of markup declaration'; |
1096 |
for (@{$self->{node}}) { |
1097 |
$r .= $_->outer_xml; |
1098 |
} |
1099 |
} |
1100 |
} elsif ($self->{type} eq '#section') { |
1101 |
my $status = $self->get_attribute_value ('status', default => ''); |
1102 |
if ($status eq 'CDATA') { |
1103 |
$r = $self->inner_text; |
1104 |
$r =~ s/]]>/]]>]]<![CDATA[>/g; |
1105 |
$r = 'CDATA['.$r; |
1106 |
} else { |
1107 |
my $sl = $self->get_attribute ('status_list', make_new_node => 1); |
1108 |
if ($sl->{flag}->{smxp__defined_with_param_ref}) { |
1109 |
my $isc = $self->_is_same_class ($self->{value}); |
1110 |
$status = (defined $sl->{value} and !$isc) ? $sl->{value} : ''; |
1111 |
for (($isc?$sl->{value}:()), @{$sl->{node}}) { |
1112 |
$status .= $_->outer_xml unless $_->{type} eq '#attribute'; |
1113 |
} |
1114 |
$r = $status.'['.$r; |
1115 |
} elsif ($status) { |
1116 |
$r = $status.'['.$r; |
1117 |
} else { |
1118 |
## Must be an ignore*d* section |
1119 |
$r = '['; |
1120 |
} |
1121 |
my $isc = $self->_is_same_class ($self->{value}); |
1122 |
if (not $isc and defined $self->{value}) { |
1123 |
my $s = $self->{value}; |
1124 |
$s =~ s/\]\]>/]]>/g; |
1125 |
$r .= $s; |
1126 |
} |
1127 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
1128 |
if ($_->{type} eq '#text') { |
1129 |
my $s = $_->inner_text; |
1130 |
$s =~ s/\]\]>/]]>/g; |
1131 |
$r .= $s; ## But this will be non well-formed. |
1132 |
} elsif ($_->{type} ne '#attribute') { |
1133 |
$r .= $_->outer_xml; |
1134 |
} |
1135 |
} |
1136 |
} |
1137 |
} else { |
1138 |
my $isc = $self->_is_same_class ($self->{value}); |
1139 |
unless ($isc) { |
1140 |
if ($self->{type} ne '#xml') { |
1141 |
$r = defined $self->{value} ? $self->_escape ($self->{value}) : ''; |
1142 |
} else { |
1143 |
$r = $self->{value}; |
1144 |
} |
1145 |
} |
1146 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
1147 |
my $nt = $_->{type}; |
1148 |
if (($self->{option}->{indent}) |
1149 |
&& ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) { |
1150 |
$r .= "\n"; |
1151 |
} |
1152 |
$r .= $_->outer_xml unless $_->node_type eq '#attribute'; |
1153 |
} |
1154 |
} |
1155 |
$r; |
1156 |
} |
1157 |
|
1158 |
|
1159 |
=item $tag = $x->outer_xml |
1160 |
|
1161 |
Returns the node in XML syntax. |
1162 |
|
1163 |
=cut |
1164 |
|
1165 |
sub outer_xml ($) { |
1166 |
my $self = shift; |
1167 |
if ($self->{type} eq '#attribute') { |
1168 |
return $self->attribute; |
1169 |
} else { |
1170 |
$self->qname; ## Register undeclared namespace |
1171 |
my $c = $self->inner_xml; |
1172 |
my $r = $self->start_tag; |
1173 |
if ($self->{type} eq '#element' |
1174 |
and $self->{option}->{use_EmptyElemTag} |
1175 |
and not length $c) { |
1176 |
substr ($r, -1) = ' />'; |
1177 |
} else { |
1178 |
$r .= $c . $self->end_tag; |
1179 |
} |
1180 |
return $r; |
1181 |
#return '{'.$self->{type}.': '.$r.'}'; ## DEBUG: show structure |
1182 |
} |
1183 |
} |
1184 |
|
1185 |
=item $tag = $x->inner_text |
1186 |
|
1187 |
Returns the text content of the node. (In many case the returned value is same |
1188 |
as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's. |
1189 |
But some classes that inherits this module might implement to return other |
1190 |
value (eg. to return the value of the alt attribute of html:img element). |
1191 |
|
1192 |
Available options: C<output_ref_as_is>. |
1193 |
|
1194 |
=cut |
1195 |
|
1196 |
sub inner_text ($;%) { |
1197 |
my $self = shift; |
1198 |
my %o = @_; |
1199 |
my $r = ''; |
1200 |
if ($self->{type} eq '#reference' |
1201 |
&& ($self->{namespace_uri} eq $NS{SGML}.'char:ref' |
1202 |
|| $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) { |
1203 |
$r = chr $self->{value}; |
1204 |
} elsif ($self->{type} eq '#declaration' |
1205 |
&& ($self->{namespace_uri} eq $NS{SGML}.'entity' |
1206 |
|| $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) { |
1207 |
## TODO: |
1208 |
$r = $self->set_attribute ('value')->inner_text; |
1209 |
} else { # not #reference nor #declaration(ENTITY) |
1210 |
my $isc = $self->_is_same_class ($self->{value}); |
1211 |
$r = $self->{value} if !$isc && defined $self->{value}; |
1212 |
if ($o{output_ref_as_is}) { ## output as if RCDATA |
1213 |
$r =~ s/&/&/g; |
1214 |
for my $node (($isc?$self->{value}:()), @{$self->{node}}) { |
1215 |
my $nt = $node->node_type; |
1216 |
if ($nt eq '#reference' || $nt eq '#xml') { |
1217 |
$r .= $node->outer_xml; |
1218 |
} elsif ($nt ne '#attribute') { |
1219 |
$r .= map {s/&/&/g; $_} $node->inner_text; |
1220 |
} |
1221 |
} |
1222 |
} else { |
1223 |
for (($isc?$self->{value}:()), @{$self->{node}}) { |
1224 |
$r .= $_->inner_text unless $_->{type} eq '#attribute'; |
1225 |
} |
1226 |
} |
1227 |
} |
1228 |
$r; |
1229 |
} |
1230 |
|
1231 |
sub stringify ($;%) { shift->outer_xml (@_) } |
1232 |
|
1233 |
sub _is_same_class ($$) { |
1234 |
my ($self, $something) = @_; |
1235 |
return 0 if {qw/ARRAY 1 HASH 1 CODE 1 :nonref: 1/}->{ref ($something) || ':nonref:'}; |
1236 |
eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME} ? 1 : 0; |
1237 |
} |
1238 |
|
1239 |
sub root_node ($) { |
1240 |
my $self = shift; |
1241 |
if ($self->{type} eq '#document') { |
1242 |
return $self; |
1243 |
} elsif (ref $self->{parent}) { |
1244 |
return $self->{parent}->root_node; |
1245 |
} else { |
1246 |
return $self; |
1247 |
} |
1248 |
} |
1249 |
|
1250 |
sub _get_ns_decls_node ($;%) { |
1251 |
my ($self, %opt) = @_; |
1252 |
if ($self->{type} eq '#element') { |
1253 |
return $self; |
1254 |
} elsif (ref $self->{parent}) { |
1255 |
return $self->{parent}->_get_ns_decls_node; |
1256 |
} elsif (exists $opt{default}) { |
1257 |
return $opt{default}; |
1258 |
} else { |
1259 |
Carp::carp qq(There is no namespace declarations node (type $self->{type})); |
1260 |
return {}; |
1261 |
} |
1262 |
} |
1263 |
|
1264 |
sub _get_entity_manager ($) { |
1265 |
my $self = shift; |
1266 |
if ($self->{type} eq '#document') { |
1267 |
unless ($self->{flag}->{smx__entity_manager}) { |
1268 |
require Message::Markup::XML::EntityManager; |
1269 |
$self->{flag}->{smx__entity_manager} = Message::Markup::XML::EntityManager->new ($self); |
1270 |
} |
1271 |
return $self->{flag}->{smx__entity_manager}; |
1272 |
} elsif (ref $self->{parent}) { |
1273 |
return $self->{parent}->_get_entity_manager; |
1274 |
} else { |
1275 |
unless ($self->{flag}->{smx__entity_manager}) { |
1276 |
require Message::Markup::XML::EntityManager; |
1277 |
$self->{flag}->{smx__entity_manager} = Message::Markup::XML::EntityManager->new ($self); |
1278 |
} |
1279 |
return $self->{flag}->{smx__entity_manager}; |
1280 |
} |
1281 |
} |
1282 |
|
1283 |
sub _CLASS_NAME { 'SuikaWiki::Markup::XML' } |
1284 |
|
1285 |
# $s = $x->_escape ($s) |
1286 |
sub _escape ($$;%) { |
1287 |
my ($self, $s, %o) = (shift, shift, @_); |
1288 |
$s =~ s/&/&/g; |
1289 |
$s =~ s/</</g; |
1290 |
$s =~ s/>/>/g; |
1291 |
$s =~ s/"/"/g; |
1292 |
## XML 1.0 |
1293 |
$s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&#%d;', ord $1/ge; |
1294 |
## XML 1.1 |
1295 |
#$s =~ s/(\x00)/sprintf '&#%d;', ord $1/ge; |
1296 |
#$s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&#x%02X;', ord $1/ge; |
1297 |
$s =~ s/([\x09\x0A\x0D])/sprintf '&#%d;', ord $1/ge if $o{keep_wsp}; |
1298 |
$s; |
1299 |
} |
1300 |
|
1301 |
# 1/0 = $x->_check_name ($s) |
1302 |
sub _check_name ($$) { |
1303 |
my $self = shift; |
1304 |
my $s = shift; |
1305 |
return 0 unless defined $s; |
1306 |
return $Cache{name}->{$s} if defined $Cache{name}->{$s}; |
1307 |
if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) { |
1308 |
# \p{...}('*'/'+'/'{n,}') does not work... |
1309 |
$Cache{name}->{$s} = 1; |
1310 |
1; |
1311 |
} else { |
1312 |
$Cache{name}->{$s} = 0; |
1313 |
0; |
1314 |
} |
1315 |
} |
1316 |
# 1/0 = $x->_check_ncname ($s) |
1317 |
sub _check_ncname ($$) { |
1318 |
my $self = shift; |
1319 |
my $s = shift; |
1320 |
return 0 unless defined $s; |
1321 |
return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s}; |
1322 |
if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) { |
1323 |
# \p{...}('*'/'+'/'{n,}') does not work... |
1324 |
$Cache{ncname}->{$s} = 1; |
1325 |
1; |
1326 |
} else { |
1327 |
$Cache{ncname}->{$s} = 0; |
1328 |
0; |
1329 |
} |
1330 |
} |
1331 |
|
1332 |
# 1/0 = $x->_check_namespace_prefix ($s) |
1333 |
sub _check_namespace_prefix ($$) { |
1334 |
my $self = shift; |
1335 |
my $s = shift; |
1336 |
return 0 unless defined $s; |
1337 |
return 1 if $s eq ''; |
1338 |
substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':'; |
1339 |
$self->_check_ncname ($s); |
1340 |
} |
1341 |
|
1342 |
## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have |
1343 |
## multiple parents. |
1344 |
## TODO: normalize namespace URI (removing non URI chars) |
1345 |
|
1346 |
sub flag ($$;$) { |
1347 |
my ($self, $name, $value) = @_; |
1348 |
if (defined $value) { |
1349 |
$self->{flag}->{$name} = $value; |
1350 |
} |
1351 |
$self->{flag}->{$name}; |
1352 |
} |
1353 |
|
1354 |
sub option ($$;$) { |
1355 |
my ($self, $name, $value) = @_; |
1356 |
if (defined $value) { |
1357 |
$self->{option}->{$name} = $value; |
1358 |
} |
1359 |
$self->{option}->{$name}; |
1360 |
} |
1361 |
|
1362 |
=back |
1363 |
|
1364 |
=head1 NODE TYPES |
1365 |
|
1366 |
=over 4 |
1367 |
|
1368 |
=item #attribute |
1369 |
|
1370 |
Attribute. Its XML representation takes the form of NAME="VALUE". |
1371 |
|
1372 |
=item #comment |
1373 |
|
1374 |
Comment declarement. <!-- --> |
1375 |
|
1376 |
=item #declarement |
1377 |
|
1378 |
SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc. |
1379 |
<!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value"> |
1380 |
|
1381 |
=item #element |
1382 |
|
1383 |
Element. Its XML representation consists of start tag, content and end tag, |
1384 |
like <TYPE>content</TYPE>. |
1385 |
|
1386 |
=item #fragment |
1387 |
|
1388 |
Fragment of nodes. It's similar to DOM's fragment node. |
1389 |
|
1390 |
=item #pi |
1391 |
|
1392 |
Prosessing instruction. <?NAME VALUE?> |
1393 |
|
1394 |
=item #reference |
1395 |
|
1396 |
Character reference or general or parameter entity reference. |
1397 |
&#nnnn;, &#xhhhh;, &name;, %name;. |
1398 |
|
1399 |
=item #section |
1400 |
|
1401 |
Markup section. CDATA, INCLUDE and IGNORE are supported by XML. |
1402 |
<![%type;[...]]> |
1403 |
|
1404 |
=item #text |
1405 |
|
1406 |
Text. |
1407 |
|
1408 |
=item #xml |
1409 |
|
1410 |
Preformatted XML text. |
1411 |
|
1412 |
=cut |
1413 |
|
1414 |
=head1 RESTRICTIONS |
1415 |
|
1416 |
=over 4 |
1417 |
|
1418 |
=item XML without XML Namespace is not supported. |
1419 |
|
1420 |
=item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared. |
1421 |
|
1422 |
For example, next code generates invalid (non-well-formed) XML Namespace document. |
1423 |
|
1424 |
my $x = Message::Markup::XML->new (local_name => 'elementType'); |
1425 |
print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType> |
1426 |
|
1427 |
So you must write like: |
1428 |
|
1429 |
my $x = Message::Markup::XML->new (local_name => 'elementType'); |
1430 |
$x->define_new_namespace ('' => ''); |
1431 |
print $x; # <elementType xmlns=""></elementType> |
1432 |
|
1433 |
=back |
1434 |
|
1435 |
=head1 LICENSE |
1436 |
|
1437 |
Copyright 2003 Wakaba <w@suika.fam.cx> |
1438 |
|
1439 |
This program is free software; you can redistribute it and/or |
1440 |
modify it under the same terms as Perl itself. |
1441 |
|
1442 |
=cut |
1443 |
|
1444 |
1; # $Date: 2004/02/14 11:25:16 $ |