1 |
wakaba |
1.1 |
|
2 |
|
|
=head1 NAME |
3 |
|
|
|
4 |
|
|
SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator |
5 |
|
|
|
6 |
|
|
=head1 DESCRIPTION |
7 |
|
|
|
8 |
|
|
This module can be used to generate the document fragment of XML, SGML or |
9 |
|
|
other well-formed (in XML meaning) data formats with the object oriented manner. |
10 |
|
|
|
11 |
|
|
This module cannot be used to parse XML (or other marked-up) document (or its fragment) |
12 |
|
|
by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose |
13 |
|
|
of this module is to make it easy for tiny perl scripts to GENERATE well-formed |
14 |
|
|
markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-)) |
15 |
|
|
|
16 |
|
|
=cut |
17 |
|
|
|
18 |
|
|
package SuikaWiki::Markup::XML; |
19 |
|
|
use strict; |
20 |
|
|
use overload '""' => \&stringify; |
21 |
|
|
use Char::Class::XML qw!InXML_NCNameStartChar InXMLNCNameChar!; |
22 |
|
|
our %Namespace_URI_to_prefix = ( |
23 |
|
|
'DAV:' => [qw/dav webdav/], |
24 |
|
|
'http://greenbytes.de/2002/rfcedit' => [qw/ed/], |
25 |
|
|
'http://icl.com/saxon' => [qw/saxon/], |
26 |
|
|
'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/], |
27 |
|
|
'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/], |
28 |
|
|
'http://purl.org/rss/1.0/' => ['', qw/rss rss10/], |
29 |
|
|
'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/], |
30 |
|
|
'http://www.mozilla.org/xbl' => ['', qw/xbl/], |
31 |
|
|
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf RDF/], |
32 |
|
|
'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/], |
33 |
|
|
'http://www.w3.org/1999/xlink' => [qw/l xlink/], |
34 |
|
|
'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/], |
35 |
|
|
'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/], |
36 |
|
|
'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/], |
37 |
|
|
'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/], |
38 |
|
|
'http://www.w3.org/2000/svg' => ['', qw/s svg/], |
39 |
|
|
'http://www.w3.org/2002/06/hlink' => [qw/h hlink/], |
40 |
|
|
'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/], |
41 |
|
|
'http://www.w3.org/Graphics/SVG/svg-19990412.dtd' => ['', qw/svg/], |
42 |
|
|
'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/], |
43 |
|
|
'http://xml.apache.org/xalan' => [qw/xalan/], |
44 |
|
|
'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/], |
45 |
|
|
'urn:schemas-microsoft-com:office:excel' => [qw/x excel/], |
46 |
|
|
'urn:schemas-microsoft-com:office:office' => [qw/o office/], |
47 |
|
|
'urn:schemas-microsoft-com:office:word' => [qw/w word/], |
48 |
|
|
'urn:schemas-microsoft-com:vml' => [qw/v vml/], |
49 |
|
|
'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/], |
50 |
|
|
'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/], |
51 |
|
|
); |
52 |
|
|
my %Cache; |
53 |
|
|
|
54 |
|
|
=head1 METHODS |
55 |
|
|
|
56 |
|
|
=over 4 |
57 |
|
|
|
58 |
|
|
=item $x = SuikaWiki::Markup::XML->new (%options) |
59 |
|
|
|
60 |
|
|
Returns new instance of the module. It is itself a node. |
61 |
|
|
|
62 |
|
|
Available options: C<type> (default: C<#element>), C<local_name>, C<namespace_uri>, |
63 |
|
|
C<value>. |
64 |
|
|
|
65 |
|
|
=cut |
66 |
|
|
|
67 |
|
|
sub new ($;%) { |
68 |
|
|
my $class = shift; |
69 |
|
|
my $self = bless {@_}, $class; |
70 |
|
|
$self->{type} ||= '#element'; |
71 |
|
|
$self->{node} ||= []; |
72 |
|
|
$self; |
73 |
|
|
} |
74 |
|
|
|
75 |
|
|
|
76 |
|
|
=item $x->append_node ($node) |
77 |
|
|
|
78 |
|
|
Appending given node to the object (as the last child). |
79 |
|
|
If the type of given node is C<#fragment>, its all children, not the node |
80 |
|
|
itself, are appended. |
81 |
|
|
|
82 |
|
|
This method returns the appended node unless the type of given node is C<#fragment>. |
83 |
|
|
In such cases, this node (C<$x>) is returned. |
84 |
|
|
|
85 |
|
|
=cut |
86 |
|
|
|
87 |
|
|
sub append_node ($$) { |
88 |
|
|
my $self = shift; |
89 |
|
|
my $new_node = shift; |
90 |
|
|
die "append_node: Invalid node" unless ref $new_node; |
91 |
|
|
if ($new_node->{type} eq '#fragment') { |
92 |
|
|
for (@{$new_node->{node}}) { |
93 |
|
|
push @{$self->{node}}, $_; |
94 |
|
|
$_->{parent} = $self; |
95 |
|
|
} |
96 |
|
|
$self; |
97 |
|
|
} else { |
98 |
|
|
push @{$self->{node}}, $new_node; |
99 |
|
|
$new_node->{parent} = $self; |
100 |
|
|
$new_node; |
101 |
|
|
} |
102 |
|
|
} |
103 |
|
|
|
104 |
|
|
=item $new_node = $x->append_new_node (%options) |
105 |
|
|
|
106 |
|
|
Appending a new node. The new node is returned. |
107 |
|
|
|
108 |
|
|
Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>. |
109 |
|
|
|
110 |
|
|
=cut |
111 |
|
|
|
112 |
|
|
sub append_new_node ($;%) { |
113 |
|
|
my $self = shift; |
114 |
|
|
my %o = @_; |
115 |
|
|
my $new_node = __PACKAGE__->new (%o); |
116 |
|
|
push @{$self->{node}}, $new_node; |
117 |
|
|
$new_node->{parent} = $self; |
118 |
|
|
$new_node; |
119 |
|
|
} |
120 |
|
|
|
121 |
|
|
=item $new_node = $x->append_text ($text) |
122 |
|
|
|
123 |
|
|
Appending given text as a new text node. The new text node is returned. |
124 |
|
|
|
125 |
|
|
=cut |
126 |
|
|
|
127 |
|
|
sub append_text ($$;%) { |
128 |
|
|
my $self = shift; |
129 |
|
|
my $s = shift; |
130 |
|
|
#if (@{$self->{node}}[-1]->{type} eq '#text') { |
131 |
|
|
# $self->{node}}[-1]->append_new_node (type => '#text', value => $s); |
132 |
|
|
#} else { |
133 |
|
|
$self->append_new_node (type => '#text', value => $s); |
134 |
|
|
#} |
135 |
|
|
} |
136 |
|
|
|
137 |
|
|
=item $attr_node = $x->get_attribute ($local_name, %options) |
138 |
|
|
|
139 |
|
|
Returns the attribute node whose local-name is C<$local_name>. |
140 |
|
|
|
141 |
|
|
Available options: C<namespace_uri>, C<make_new_node>. |
142 |
|
|
|
143 |
|
|
=cut |
144 |
|
|
|
145 |
|
|
sub get_attribute ($$;%) { |
146 |
|
|
my $self = shift; |
147 |
|
|
my ($name, %o) = @_; |
148 |
|
|
for (@{$self->{node}}) { |
149 |
|
|
if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) { |
150 |
|
|
return $_; |
151 |
|
|
} |
152 |
|
|
} |
153 |
|
|
## Node is not exist |
154 |
|
|
if ($o{make_new_node}) { |
155 |
|
|
return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri}); |
156 |
|
|
} else { |
157 |
|
|
return undef; |
158 |
|
|
} |
159 |
|
|
} |
160 |
|
|
|
161 |
|
|
=item $attr_node = $x->set_attribute ($local_name => $value, %options) |
162 |
|
|
|
163 |
|
|
Set the value of the attribute. The attribute node is returned. |
164 |
|
|
|
165 |
|
|
Available options: C<namespace_uri>. |
166 |
|
|
|
167 |
|
|
=cut |
168 |
|
|
|
169 |
|
|
sub set_attribute ($$$;%) { |
170 |
|
|
my $self = shift; |
171 |
|
|
my ($name, $val, %o) = @_; |
172 |
|
|
for (@{$self->{node}}) { |
173 |
|
|
if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) { |
174 |
|
|
$_->{value} = $val; |
175 |
|
|
$_->{node} = []; |
176 |
|
|
return $_; |
177 |
|
|
} |
178 |
|
|
} |
179 |
|
|
return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri}); |
180 |
|
|
} |
181 |
|
|
|
182 |
|
|
=item \@children = $x->child_nodes |
183 |
|
|
|
184 |
|
|
Returns an array reference to child nodes. |
185 |
|
|
|
186 |
|
|
=item $local_name = $x->local_name |
187 |
|
|
|
188 |
|
|
Returns the local-name. |
189 |
|
|
|
190 |
|
|
=item $type = $x->node_type |
191 |
|
|
|
192 |
|
|
Returns the node type. |
193 |
|
|
|
194 |
|
|
=item $node = $x->parent_node |
195 |
|
|
|
196 |
|
|
Returns the parent node. If there is no parent node, undef is returned. |
197 |
|
|
|
198 |
|
|
=cut |
199 |
|
|
|
200 |
|
|
sub child_nodes ($) { shift->{node} } |
201 |
|
|
sub local_name ($) { shift->{local_name} } |
202 |
|
|
sub node_type ($) { shift->{type} } |
203 |
|
|
sub parent_node ($) { shift->{parent} } |
204 |
|
|
|
205 |
|
|
=item $i = $x->count |
206 |
|
|
|
207 |
|
|
Returns the number of child nodes. |
208 |
|
|
|
209 |
|
|
=cut |
210 |
|
|
|
211 |
|
|
# TODO: support counting by type |
212 |
|
|
sub count ($;@) { |
213 |
|
|
scalar @{shift->{node}}; |
214 |
|
|
} |
215 |
|
|
|
216 |
|
|
# $prefix = $x->_get_namespace_prefix ($namespace_uri) |
217 |
|
|
sub _get_namespace_prefix ($$;%) { |
218 |
|
|
my ($self, $uri) = (shift, shift); |
219 |
|
|
my %o = @_; |
220 |
|
|
if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) { |
221 |
|
|
return $p if $self->_prefix_to_uri ($p) eq $uri; |
222 |
|
|
} if ($Namespace_URI_to_prefix{$uri}) { |
223 |
|
|
for (@{$Namespace_URI_to_prefix{$uri}}) { |
224 |
|
|
my $pfx = $_; $pfx .= ':' if $pfx; |
225 |
|
|
if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) { |
226 |
|
|
return $self->_uri_to_prefix ($uri => $pfx, %o); |
227 |
|
|
} |
228 |
|
|
} |
229 |
|
|
} else { |
230 |
|
|
my ($u_r_i, $pfx) = ($uri); |
231 |
|
|
while ($u_r_i =~ s/([A-Za-z][0-9A-Za-z-]+)[^0-9A-Za-z-]*$//) { |
232 |
|
|
my $p_f_x = $1 . ':'; |
233 |
|
|
next if uc (substr ($p_f_x, 0, 3)) eq 'XML'; |
234 |
|
|
unless ($self->_prefix_to_uri ($p_f_x)) { |
235 |
|
|
$pfx = $p_f_x; |
236 |
|
|
last; |
237 |
|
|
} |
238 |
|
|
} |
239 |
|
|
if ($pfx) { |
240 |
|
|
return $self->_uri_to_prefix ($uri => $pfx, %o); |
241 |
|
|
} else { |
242 |
|
|
while (1) { |
243 |
|
|
my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':'; |
244 |
|
|
unless ($self->_prefix_to_uri ($pfx)) { |
245 |
|
|
return $self->_uri_to_prefix ($uri => $pfx, %o); |
246 |
|
|
} |
247 |
|
|
} |
248 |
|
|
} |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
|
|
252 |
|
|
# $uri or undef = $x->_prefix_to_uri ($prefix => [$new_uri]) |
253 |
|
|
sub _prefix_to_uri ($$;$%) { |
254 |
|
|
my ($self, $prefix, $new_uri, %o) = @_; |
255 |
|
|
return undef unless $self->_check_namespace_prefix ($prefix); |
256 |
|
|
if ($new_uri) { |
257 |
|
|
$self->{ns}->{$prefix} = $new_uri; |
258 |
|
|
} |
259 |
|
|
if (uc (substr $prefix, 0, 3) eq 'XML') { |
260 |
|
|
return 'http://www.w3.org/XML/1998/namespace' if $prefix eq 'xml:'; |
261 |
|
|
return 'http://www.w3.org/2000/xmlns/' if $prefix eq 'xmlns:'; |
262 |
|
|
} if (defined $self->{ns}->{$prefix}) { |
263 |
|
|
$self->{ns}->{$prefix}; |
264 |
|
|
} elsif (ref $self->{parent}) { |
265 |
|
|
shift; # $self |
266 |
|
|
$self->{parent}->_prefix_to_uri (@_); |
267 |
|
|
} else { |
268 |
|
|
undef; |
269 |
|
|
} |
270 |
|
|
} |
271 |
|
|
|
272 |
|
|
# $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options) |
273 |
|
|
# use_no_prefix (default: 1): Allow default namespace (no prefix). |
274 |
|
|
sub _uri_to_prefix ($$;$%) { |
275 |
|
|
my ($self, $uri, $new_prefix, %o) = @_; |
276 |
|
|
if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) { |
277 |
|
|
$self->{ns}->{$new_prefix} = $uri; |
278 |
|
|
$new_prefix; |
279 |
|
|
} else { |
280 |
|
|
return 'xml:' if $uri eq 'http://www.w3.org/XML/1998/namespace'; |
281 |
|
|
return 'xmlns:' if $uri eq 'http://www.w3.org/2000/xmlns/'; |
282 |
|
|
for (keys %{$self->{ns}||{}}) { |
283 |
|
|
next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix}); |
284 |
|
|
return $_ if $self->{ns}->{$_} eq $uri; |
285 |
|
|
} |
286 |
|
|
if (ref $self->{parent}) { |
287 |
|
|
shift; # $self |
288 |
|
|
$self->{parent}->_uri_to_prefix (@_); |
289 |
|
|
} else { |
290 |
|
|
undef; |
291 |
|
|
} |
292 |
|
|
} |
293 |
|
|
} |
294 |
|
|
|
295 |
|
|
=item $x->define_new_namespace ($prefix => $uri) |
296 |
|
|
|
297 |
|
|
Defines a new XML Namespace. This method is useful for root or section-level |
298 |
|
|
element node. |
299 |
|
|
|
300 |
|
|
Returned value is unspecified in this version of this module. |
301 |
|
|
|
302 |
|
|
=cut |
303 |
|
|
|
304 |
|
|
sub define_new_namespace ($$$) { |
305 |
|
|
my ($self, $prefix, $uri) = @_; |
306 |
|
|
if ($prefix eq '' || $self->_check_ncname ($prefix)) { |
307 |
|
|
$prefix .= ':' if $prefix && substr ($prefix, -1) ne ':'; |
308 |
|
|
$self->_prefix_to_uri ($prefix => $uri); |
309 |
|
|
} else { |
310 |
|
|
undef; |
311 |
|
|
} |
312 |
|
|
} |
313 |
|
|
|
314 |
|
|
=item $qname = $x->qname |
315 |
|
|
|
316 |
|
|
Returns QName ((namespace-)qualified name) of the element type. |
317 |
|
|
Undef is retuened when the type does not have its QName |
318 |
|
|
(ie. when type is neither C<#element> or C<#attribute>). |
319 |
|
|
|
320 |
|
|
=cut |
321 |
|
|
|
322 |
|
|
sub qname ($) { |
323 |
|
|
my $self = shift; |
324 |
|
|
if ($self->_check_ncname ($self->{local_name})) { |
325 |
|
|
if ($self->{type} eq '#element') { |
326 |
|
|
$self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name} |
327 |
|
|
unless $self->{_qname}; |
328 |
|
|
return $self->{_qname}; |
329 |
|
|
} elsif ($self->{type} eq '#attribute') { |
330 |
|
|
return $self->attribute_name; |
331 |
|
|
} |
332 |
|
|
} |
333 |
|
|
undef; |
334 |
|
|
} |
335 |
|
|
|
336 |
|
|
=item $tag = $x->start_tag |
337 |
|
|
|
338 |
|
|
Returns the start tag (or something that marks the start of something, such as '<!-- ' |
339 |
|
|
for C<#comment> nodes). |
340 |
|
|
|
341 |
|
|
=cut |
342 |
|
|
|
343 |
|
|
sub start_tag ($) { |
344 |
|
|
my $self = shift; |
345 |
|
|
if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { |
346 |
|
|
my $r = '<'; |
347 |
|
|
$r .= $self->qname; |
348 |
|
|
for (@{$self->{node}}) { |
349 |
|
|
$r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute'; |
350 |
|
|
} |
351 |
|
|
for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) { |
352 |
|
|
if ($prefix) { |
353 |
|
|
$r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1); |
354 |
|
|
} else { |
355 |
|
|
$r .= ' xmlns'; |
356 |
|
|
} |
357 |
|
|
$r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"'; |
358 |
|
|
} |
359 |
|
|
$r .= '>'; |
360 |
|
|
$r; |
361 |
|
|
} elsif ($self->{type} eq '#comment') { |
362 |
|
|
'<!-- '; |
363 |
|
|
} elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { |
364 |
|
|
'<?' . $self->{local_name}; |
365 |
|
|
} elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) { |
366 |
|
|
my $r = '<!' . $self->{local_name} . ' '; |
367 |
|
|
if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) { |
368 |
|
|
my $qname; |
369 |
|
|
for (@{$self->{parent}->{node}}) { |
370 |
|
|
if ($_->{type} eq '#element') { |
371 |
|
|
$qname = $_->qname; |
372 |
|
|
last if $qname; |
373 |
|
|
} |
374 |
|
|
} |
375 |
|
|
$r .= ($qname ? $qname : '#IMPLIED') . ' '; |
376 |
|
|
$r; |
377 |
|
|
} |
378 |
|
|
} else { |
379 |
|
|
''; |
380 |
|
|
} |
381 |
|
|
} |
382 |
|
|
|
383 |
|
|
=item $tag = $x->end_tag |
384 |
|
|
|
385 |
|
|
Returns the end tag (or something that marks the end of something, such as ' -->' |
386 |
|
|
for C<#comment> nodes). |
387 |
|
|
|
388 |
|
|
=cut |
389 |
|
|
|
390 |
|
|
sub end_tag ($) { |
391 |
|
|
my $self = shift; |
392 |
|
|
if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { |
393 |
|
|
'</' . $self->qname . '>'; |
394 |
|
|
} elsif ($self->{type} eq '#comment') { |
395 |
|
|
' -->'; |
396 |
|
|
} elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { |
397 |
|
|
'?>'; |
398 |
|
|
} elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) { |
399 |
|
|
my $r = ''; |
400 |
|
|
$r .= '>'; |
401 |
|
|
} else { |
402 |
|
|
''; |
403 |
|
|
} |
404 |
|
|
} |
405 |
|
|
|
406 |
|
|
=item $tag = $x->attribute_name |
407 |
|
|
|
408 |
|
|
Returns the attribute name. |
409 |
|
|
|
410 |
|
|
=cut |
411 |
|
|
|
412 |
|
|
sub attribute_name ($) { |
413 |
|
|
my $self = shift; |
414 |
|
|
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
415 |
|
|
($self->{namespace_uri} ? |
416 |
|
|
(ref $self->{parent} ? $self->{parent} : $self) |
417 |
|
|
->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '') |
418 |
|
|
.$self->{local_name}; |
419 |
|
|
} elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) { |
420 |
|
|
$self->{local_name}; |
421 |
|
|
} else { |
422 |
|
|
''; |
423 |
|
|
} |
424 |
|
|
} |
425 |
|
|
|
426 |
|
|
=item $tag = $x->attribute_value |
427 |
|
|
|
428 |
|
|
Returns the attribute value. |
429 |
|
|
|
430 |
|
|
=cut |
431 |
|
|
|
432 |
|
|
sub attribute_value ($) { |
433 |
|
|
my $self = shift; |
434 |
|
|
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
435 |
|
|
'"' . $self->_entitize ($self->inner_text) . '"'; |
436 |
|
|
} elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) { |
437 |
|
|
my $t = $self->inner_text; |
438 |
|
|
if ($t =~ /"/) { |
439 |
|
|
if ($t =~ /'/) { |
440 |
|
|
$t =~ s/"/"/g; |
441 |
|
|
return '"' . $t . '"'; |
442 |
|
|
} else { |
443 |
|
|
return "'" . $t . "'"; |
444 |
|
|
} |
445 |
|
|
} else { |
446 |
|
|
return '"' . $t . '"'; |
447 |
|
|
} |
448 |
|
|
} else { |
449 |
|
|
''; |
450 |
|
|
} |
451 |
|
|
} |
452 |
|
|
|
453 |
|
|
=item $tag = $x->attribute |
454 |
|
|
|
455 |
|
|
Returns the attribute (name and value pair). |
456 |
|
|
|
457 |
|
|
=cut |
458 |
|
|
|
459 |
|
|
sub attribute ($) { |
460 |
|
|
my $self = shift; |
461 |
|
|
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
462 |
|
|
$self->attribute_name . '=' . $self->attribute_value; |
463 |
|
|
} elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) { |
464 |
|
|
$self->attribute_name . ' ' . $self->attribute_value; |
465 |
|
|
} else { |
466 |
|
|
''; |
467 |
|
|
} |
468 |
|
|
} |
469 |
|
|
|
470 |
|
|
=item $tag = $x->inner_xml |
471 |
|
|
|
472 |
|
|
Returns the content of the node in XML syntax. (In case of the C<#element> nodes, |
473 |
|
|
element content without start- and end-tags is returned.) |
474 |
|
|
|
475 |
|
|
Note that for not all node types the behavior of this method is defined. |
476 |
|
|
For example, returned value of C<#attribute> might be unexpected one |
477 |
|
|
in this version of this module. |
478 |
|
|
|
479 |
|
|
=cut |
480 |
|
|
|
481 |
|
|
sub inner_xml ($) { |
482 |
|
|
my $self = shift; |
483 |
|
|
my $r = ''; |
484 |
|
|
if ($self->{type} eq '#comment') { |
485 |
|
|
$r = $self->inner_text; |
486 |
|
|
$r =~ s/--/-E/g; |
487 |
|
|
} elsif ($self->{type} eq '#pi') { |
488 |
|
|
if (length $self->{value}) { |
489 |
|
|
$r = ' ' . $self->{value}; |
490 |
|
|
$r =~ s/\?>/?>/g; |
491 |
|
|
} |
492 |
|
|
for (@{$self->{node}}) { |
493 |
|
|
if ($_->node_type eq '#attribute') { |
494 |
|
|
$r .= ' ' . $_->attribute; |
495 |
|
|
} else { |
496 |
|
|
my $s = $_->inner_text; |
497 |
|
|
$s =~ s/\?>/?>/g; |
498 |
|
|
$r .= ' ' . $s if length $s; |
499 |
|
|
} |
500 |
|
|
} |
501 |
|
|
} elsif ($self->{type} eq '#declaration') { |
502 |
|
|
my ($isub, $pubid, $sysid) = (''); |
503 |
|
|
for (@{$self->{node}}) { |
504 |
|
|
if ($_->{type} eq '#pair') { |
505 |
|
|
if ($_->{local_name} eq 'PUBLIC') { |
506 |
|
|
$pubid = $_; |
507 |
|
|
} elsif ($_->{local_name} eq 'SYSTEM') { |
508 |
|
|
$sysid = $_; |
509 |
|
|
} |
510 |
|
|
} else { |
511 |
|
|
$isub .= $_->outer_xml; |
512 |
|
|
} |
513 |
|
|
} |
514 |
|
|
if ($sysid) { |
515 |
|
|
if ($pubid) { |
516 |
|
|
$r = $pubid->attribute . ' ' . $sysid->attribute_value; |
517 |
|
|
} else { |
518 |
|
|
$r = $sysid->attribute; |
519 |
|
|
} |
520 |
|
|
if ($isub) { |
521 |
|
|
$r .= " [\n" . $isub . "\n]"; |
522 |
|
|
} |
523 |
|
|
} else { |
524 |
|
|
if ($isub) { |
525 |
|
|
$r = "[\n" . $isub . "\n]"; |
526 |
|
|
} else { |
527 |
|
|
$r = "[]"; |
528 |
|
|
} |
529 |
|
|
} |
530 |
|
|
} else { |
531 |
|
|
if ($self->{type} ne '#xml') { |
532 |
|
|
$r = $self->_entitize ($self->{value}); |
533 |
|
|
} else { |
534 |
|
|
$r = $self->{value}; |
535 |
|
|
} |
536 |
|
|
for (@{$self->{node}}) { |
537 |
|
|
my $nt = $_->node_type; |
538 |
|
|
if ((0||$self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) { |
539 |
|
|
$r .= "\n"; |
540 |
|
|
} |
541 |
|
|
$r .= $_->outer_xml unless $_->node_type eq '#attribute'; |
542 |
|
|
} |
543 |
|
|
} |
544 |
|
|
$r; |
545 |
|
|
} |
546 |
|
|
|
547 |
|
|
|
548 |
|
|
=item $tag = $x->outer_xml |
549 |
|
|
|
550 |
|
|
Returns the node in XML syntax. |
551 |
|
|
|
552 |
|
|
=cut |
553 |
|
|
|
554 |
|
|
sub outer_xml ($) { |
555 |
|
|
my $self = shift; |
556 |
|
|
if ($self->{type} eq '#attribute') { |
557 |
|
|
$self->attribute; |
558 |
|
|
} else { |
559 |
|
|
if ((0||$self->{option}->{indent}) && $self->{type} eq '#element') { |
560 |
|
|
my $r = $self->start_tag; |
561 |
|
|
my $c = $self->inner_xml; |
562 |
|
|
if ($c) { |
563 |
|
|
$c =~ s/\n/\n /g; |
564 |
|
|
$r .= "\n " . $c . "\n"; |
565 |
|
|
} |
566 |
|
|
$r .= $self->end_tag; |
567 |
|
|
$r; |
568 |
|
|
} else { |
569 |
|
|
$self->start_tag . $self->inner_xml . $self->end_tag; |
570 |
|
|
} |
571 |
|
|
} |
572 |
|
|
} |
573 |
|
|
|
574 |
|
|
=item $tag = $x->inner_text |
575 |
|
|
|
576 |
|
|
Returns the text content of the node. (In many case the returned value is same |
577 |
|
|
as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's. |
578 |
|
|
But some classes that inherits this module might implement to return other |
579 |
|
|
value (eg. to return the value of the alt attribute of html:img element). |
580 |
|
|
|
581 |
|
|
=cut |
582 |
|
|
|
583 |
|
|
sub inner_text ($) { |
584 |
|
|
my $self = shift; |
585 |
|
|
my $r = $self->{value}; |
586 |
|
|
for (@{$self->{node}}) { |
587 |
|
|
$r .= $_->inner_text unless $_->node_type eq '#attribute'; |
588 |
|
|
} |
589 |
|
|
$r; |
590 |
|
|
} |
591 |
|
|
|
592 |
|
|
{no warnings; # prototype mismatch |
593 |
|
|
*stringify = \&outer_xml; |
594 |
|
|
} |
595 |
|
|
|
596 |
|
|
# $s = $x->_entitize ($s) |
597 |
|
|
sub _entitize ($$) { |
598 |
|
|
my ($self, $s) = (shift, shift); |
599 |
|
|
$s =~ s/&/&/g; |
600 |
|
|
$s =~ s/</</g; |
601 |
|
|
$s =~ s/>/>/g; |
602 |
|
|
$s =~ s/"/"/g; |
603 |
|
|
$s; |
604 |
|
|
} |
605 |
|
|
|
606 |
|
|
# 1/0 = $x->_check_ncname ($s) |
607 |
|
|
sub _check_ncname ($$) { |
608 |
|
|
my $self = shift; |
609 |
|
|
my $s = shift; |
610 |
|
|
return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s}; |
611 |
|
|
if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) { |
612 |
|
|
# \p{...}('*'/'+'/'{n,}') does not work... |
613 |
|
|
$Cache{ncname}->{$s} = 1; |
614 |
|
|
1; |
615 |
|
|
} else { |
616 |
|
|
$Cache{ncname}->{$s} = 0; |
617 |
|
|
0; |
618 |
|
|
} |
619 |
|
|
} |
620 |
|
|
|
621 |
|
|
# 1/0 = $x->_check_namespace_prefix ($s) |
622 |
|
|
sub _check_namespace_prefix ($$) { |
623 |
|
|
my $self = shift; |
624 |
|
|
my $s = shift; |
625 |
|
|
return 0 unless defined $s; |
626 |
|
|
return 1 if $s eq ''; |
627 |
|
|
substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':'; |
628 |
|
|
$self->_check_ncname ($s); |
629 |
|
|
} |
630 |
|
|
|
631 |
|
|
=back |
632 |
|
|
|
633 |
|
|
=head1 LICENSE |
634 |
|
|
|
635 |
|
|
Copyright 2003 Wakaba <w@suika.fam.cx> |
636 |
|
|
|
637 |
|
|
This program is free software; you can redistribute it and/or |
638 |
|
|
modify it under the same terms as Perl itself. |
639 |
|
|
|
640 |
|
|
=cut |
641 |
|
|
|
642 |
|
|
1; # $Date: 2003/04/03 01:08:17 $ |