1 |
wakaba |
1.1 |
|
2 |
|
|
=head1 NAME |
3 |
|
|
|
4 |
|
|
SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator |
5 |
|
|
|
6 |
|
|
=head1 DESCRIPTION |
7 |
|
|
|
8 |
|
|
This module can be used to generate the document fragment of XML, SGML or |
9 |
|
|
other well-formed (in XML meaning) data formats with the object oriented manner. |
10 |
|
|
|
11 |
|
|
This module cannot be used to parse XML (or other marked-up) document (or its fragment) |
12 |
|
|
by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose |
13 |
|
|
of this module is to make it easy for tiny perl scripts to GENERATE well-formed |
14 |
|
|
markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-)) |
15 |
|
|
|
16 |
|
|
=cut |
17 |
|
|
|
18 |
|
|
package SuikaWiki::Markup::XML; |
19 |
|
|
use strict; |
20 |
wakaba |
1.9 |
our $VERSION = do{my @r=(q$Revision: 1.8 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
21 |
|
|
use overload '""' => \&outer_xml, |
22 |
wakaba |
1.2 |
fallback => 1; |
23 |
|
|
use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!; |
24 |
wakaba |
1.1 |
our %Namespace_URI_to_prefix = ( |
25 |
|
|
'DAV:' => [qw/dav webdav/], |
26 |
|
|
'http://greenbytes.de/2002/rfcedit' => [qw/ed/], |
27 |
|
|
'http://icl.com/saxon' => [qw/saxon/], |
28 |
|
|
'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/], |
29 |
|
|
'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/], |
30 |
|
|
'http://purl.org/rss/1.0/' => ['', qw/rss rss10/], |
31 |
|
|
'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/], |
32 |
|
|
'http://www.mozilla.org/xbl' => ['', qw/xbl/], |
33 |
wakaba |
1.3 |
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/], |
34 |
wakaba |
1.1 |
'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/], |
35 |
|
|
'http://www.w3.org/1999/xlink' => [qw/l xlink/], |
36 |
|
|
'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/], |
37 |
|
|
'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/], |
38 |
|
|
'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/], |
39 |
|
|
'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/], |
40 |
|
|
'http://www.w3.org/2000/svg' => ['', qw/s svg/], |
41 |
|
|
'http://www.w3.org/2002/06/hlink' => [qw/h hlink/], |
42 |
|
|
'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/], |
43 |
wakaba |
1.3 |
'http://www.w3.org/2002/07/owl' => [qw/owl/], |
44 |
wakaba |
1.6 |
'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/], |
45 |
wakaba |
1.1 |
'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/], |
46 |
wakaba |
1.3 |
'http://www.wapforum.org/2001/wml' => [qw/wap/], |
47 |
wakaba |
1.1 |
'http://xml.apache.org/xalan' => [qw/xalan/], |
48 |
|
|
'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/], |
49 |
|
|
'urn:schemas-microsoft-com:vml' => [qw/v vml/], |
50 |
|
|
'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/], |
51 |
|
|
'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/], |
52 |
wakaba |
1.7 |
'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/], |
53 |
wakaba |
1.1 |
); |
54 |
|
|
my %Cache; |
55 |
wakaba |
1.7 |
my %NS = ( |
56 |
|
|
SGML => 'urn:x-suika-fam-cx:markup:sgml:', |
57 |
wakaba |
1.8 |
xml => 'http://www.w3.org/XML/1998/namespace', |
58 |
|
|
xmlns => 'http://www.w3.org/2000/xmlns/', |
59 |
wakaba |
1.7 |
); |
60 |
wakaba |
1.1 |
|
61 |
|
|
=head1 METHODS |
62 |
|
|
|
63 |
|
|
=over 4 |
64 |
|
|
|
65 |
|
|
=item $x = SuikaWiki::Markup::XML->new (%options) |
66 |
|
|
|
67 |
|
|
Returns new instance of the module. It is itself a node. |
68 |
|
|
|
69 |
wakaba |
1.4 |
Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>. |
70 |
wakaba |
1.1 |
|
71 |
|
|
=cut |
72 |
|
|
|
73 |
|
|
sub new ($;%) { |
74 |
|
|
my $class = shift; |
75 |
|
|
my $self = bless {@_}, $class; |
76 |
|
|
$self->{type} ||= '#element'; |
77 |
wakaba |
1.5 |
if ($self->{qname}) { |
78 |
|
|
($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname}); |
79 |
|
|
$self->{_qname} = $self->{qname}; |
80 |
|
|
} |
81 |
|
|
if (defined $self->{namespace_prefix}) { |
82 |
|
|
$self->{namespace_prefix} .= ':' if $self->{namespace_prefix} |
83 |
|
|
&& substr ($self->{namespace_prefix}, -1) ne ':'; |
84 |
|
|
$self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri} if defined $self->{namespace_uri}; |
85 |
|
|
} |
86 |
wakaba |
1.4 |
for (qw/local_name value/) { |
87 |
wakaba |
1.8 |
if (ref ($self->{$_}) && eval q($self->{$_}->_CLASS_NAME eq $self->_CLASS_NAME)) { |
88 |
wakaba |
1.2 |
$self->{$_}->{parent} = $self; |
89 |
|
|
} |
90 |
|
|
} |
91 |
wakaba |
1.1 |
$self->{node} ||= []; |
92 |
|
|
$self; |
93 |
|
|
} |
94 |
|
|
|
95 |
wakaba |
1.5 |
sub _ns_parse_qname ($$) { |
96 |
|
|
shift; |
97 |
|
|
my $qname = shift; |
98 |
|
|
if ($qname =~ /:/) { |
99 |
|
|
return split /:/, $qname, 2; |
100 |
|
|
} else { |
101 |
|
|
return (undef, $qname); |
102 |
|
|
} |
103 |
|
|
} |
104 |
wakaba |
1.1 |
|
105 |
|
|
=item $x->append_node ($node) |
106 |
|
|
|
107 |
|
|
Appending given node to the object (as the last child). |
108 |
|
|
If the type of given node is C<#fragment>, its all children, not the node |
109 |
|
|
itself, are appended. |
110 |
|
|
|
111 |
|
|
This method returns the appended node unless the type of given node is C<#fragment>. |
112 |
|
|
In such cases, this node (C<$x>) is returned. |
113 |
|
|
|
114 |
wakaba |
1.2 |
Available options: C<node_or_text>. |
115 |
|
|
|
116 |
wakaba |
1.1 |
=cut |
117 |
|
|
|
118 |
wakaba |
1.2 |
sub append_node ($$;%) { |
119 |
wakaba |
1.1 |
my $self = shift; |
120 |
wakaba |
1.2 |
my ($new_node, %o) = @_; |
121 |
|
|
unless (ref $new_node) { |
122 |
|
|
if ($o{node_or_text}) { |
123 |
|
|
return $self->append_text ($new_node); |
124 |
|
|
} else { |
125 |
|
|
die "append_node: Invalid node" unless ref $new_node; |
126 |
|
|
} |
127 |
|
|
} |
128 |
wakaba |
1.1 |
if ($new_node->{type} eq '#fragment') { |
129 |
|
|
for (@{$new_node->{node}}) { |
130 |
|
|
push @{$self->{node}}, $_; |
131 |
|
|
$_->{parent} = $self; |
132 |
|
|
} |
133 |
|
|
$self; |
134 |
|
|
} else { |
135 |
|
|
push @{$self->{node}}, $new_node; |
136 |
|
|
$new_node->{parent} = $self; |
137 |
|
|
$new_node; |
138 |
|
|
} |
139 |
|
|
} |
140 |
|
|
|
141 |
|
|
=item $new_node = $x->append_new_node (%options) |
142 |
|
|
|
143 |
|
|
Appending a new node. The new node is returned. |
144 |
|
|
|
145 |
|
|
Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>. |
146 |
|
|
|
147 |
|
|
=cut |
148 |
|
|
|
149 |
|
|
sub append_new_node ($;%) { |
150 |
|
|
my $self = shift; |
151 |
|
|
my %o = @_; |
152 |
|
|
my $new_node = __PACKAGE__->new (%o); |
153 |
|
|
push @{$self->{node}}, $new_node; |
154 |
|
|
$new_node->{parent} = $self; |
155 |
|
|
$new_node; |
156 |
|
|
} |
157 |
|
|
|
158 |
|
|
=item $new_node = $x->append_text ($text) |
159 |
|
|
|
160 |
|
|
Appending given text as a new text node. The new text node is returned. |
161 |
|
|
|
162 |
|
|
=cut |
163 |
|
|
|
164 |
|
|
sub append_text ($$;%) { |
165 |
|
|
my $self = shift; |
166 |
|
|
my $s = shift; |
167 |
|
|
#if (@{$self->{node}}[-1]->{type} eq '#text') { |
168 |
|
|
# $self->{node}}[-1]->append_new_node (type => '#text', value => $s); |
169 |
|
|
#} else { |
170 |
|
|
$self->append_new_node (type => '#text', value => $s); |
171 |
|
|
#} |
172 |
|
|
} |
173 |
|
|
|
174 |
wakaba |
1.2 |
sub append_baretext ($$;%) { |
175 |
|
|
my $self = shift; |
176 |
|
|
my $s = shift; |
177 |
|
|
$self->append_new_node (type => '#xml', value => $s); |
178 |
|
|
} |
179 |
|
|
|
180 |
wakaba |
1.9 |
sub remove_child_node ($$) { |
181 |
|
|
my ($self, $node) = @_; |
182 |
|
|
return unless ref $node; |
183 |
|
|
$node = overload::StrVal ($node); |
184 |
|
|
$self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}]; |
185 |
|
|
} |
186 |
|
|
|
187 |
wakaba |
1.1 |
=item $attr_node = $x->get_attribute ($local_name, %options) |
188 |
|
|
|
189 |
|
|
Returns the attribute node whose local-name is C<$local_name>. |
190 |
|
|
|
191 |
|
|
Available options: C<namespace_uri>, C<make_new_node>. |
192 |
|
|
|
193 |
|
|
=cut |
194 |
|
|
|
195 |
|
|
sub get_attribute ($$;%) { |
196 |
|
|
my $self = shift; |
197 |
|
|
my ($name, %o) = @_; |
198 |
|
|
for (@{$self->{node}}) { |
199 |
|
|
if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) { |
200 |
|
|
return $_; |
201 |
|
|
} |
202 |
|
|
} |
203 |
|
|
## Node is not exist |
204 |
|
|
if ($o{make_new_node}) { |
205 |
|
|
return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri}); |
206 |
|
|
} else { |
207 |
|
|
return undef; |
208 |
|
|
} |
209 |
|
|
} |
210 |
|
|
|
211 |
|
|
=item $attr_node = $x->set_attribute ($local_name => $value, %options) |
212 |
|
|
|
213 |
|
|
Set the value of the attribute. The attribute node is returned. |
214 |
|
|
|
215 |
|
|
Available options: C<namespace_uri>. |
216 |
|
|
|
217 |
|
|
=cut |
218 |
|
|
|
219 |
|
|
sub set_attribute ($$$;%) { |
220 |
|
|
my $self = shift; |
221 |
|
|
my ($name, $val, %o) = @_; |
222 |
wakaba |
1.7 |
if (ref ($val) eq 'ARRAY' || ref ($val) eq 'HASH' || ref ($val) eq 'CODE') { |
223 |
|
|
## TODO: common error handling |
224 |
|
|
require Carp; |
225 |
|
|
Carp::croak "set_attribute: new attribute value must be string or blessed object"; |
226 |
|
|
} |
227 |
wakaba |
1.1 |
for (@{$self->{node}}) { |
228 |
|
|
if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) { |
229 |
|
|
$_->{value} = $val; |
230 |
|
|
$_->{node} = []; |
231 |
|
|
return $_; |
232 |
|
|
} |
233 |
|
|
} |
234 |
|
|
return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri}); |
235 |
|
|
} |
236 |
|
|
|
237 |
|
|
=item \@children = $x->child_nodes |
238 |
|
|
|
239 |
|
|
Returns an array reference to child nodes. |
240 |
|
|
|
241 |
wakaba |
1.4 |
=item $local_name = $x->local_name ([$new_name]) |
242 |
wakaba |
1.1 |
|
243 |
wakaba |
1.4 |
Returns or set the local-name. |
244 |
|
|
|
245 |
|
|
=item $uri = $x->namespace_uri ([$new_uri]) |
246 |
|
|
|
247 |
|
|
Returns or set namespace name (URI) of the element or the attribute |
248 |
wakaba |
1.1 |
|
249 |
wakaba |
1.5 |
=item $uri = $x->namespace_prefix ([$new_prefix]) |
250 |
|
|
|
251 |
|
|
Returns or set namespace prefix of the element or the attribute. |
252 |
|
|
You may give C<$new_prefix> in form either 'foo' or 'foo:'. |
253 |
|
|
To indicate "default" prefix, use '' (length == 0 string). |
254 |
|
|
|
255 |
wakaba |
1.1 |
=item $type = $x->node_type |
256 |
|
|
|
257 |
|
|
Returns the node type. |
258 |
|
|
|
259 |
|
|
=item $node = $x->parent_node |
260 |
|
|
|
261 |
|
|
Returns the parent node. If there is no parent node, undef is returned. |
262 |
|
|
|
263 |
|
|
=cut |
264 |
|
|
|
265 |
|
|
sub child_nodes ($) { shift->{node} } |
266 |
wakaba |
1.4 |
sub local_name ($;$) { |
267 |
|
|
my ($self, $newname) = @_; |
268 |
|
|
if ($newname) { |
269 |
|
|
$self->{local_name} = $newname; |
270 |
|
|
} |
271 |
|
|
if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') { |
272 |
|
|
$self->{local_name}->{local_name}; |
273 |
|
|
} else { |
274 |
|
|
$self->{local_name} |
275 |
|
|
} |
276 |
|
|
} |
277 |
wakaba |
1.1 |
sub node_type ($) { shift->{type} } |
278 |
|
|
sub parent_node ($) { shift->{parent} } |
279 |
|
|
|
280 |
wakaba |
1.4 |
## TODO: obsolete |
281 |
|
|
sub target_name ($;$) { |
282 |
|
|
my ($self, $new) = @_; |
283 |
|
|
if (defined $new) { |
284 |
|
|
$self->{target_name} = $new; |
285 |
|
|
} |
286 |
|
|
$self->{target_name}; |
287 |
|
|
} |
288 |
|
|
|
289 |
|
|
sub namespace_uri ($;$) { |
290 |
|
|
my ($self, $new_uri) = @_; |
291 |
|
|
if (defined $new_uri) { |
292 |
|
|
$self->{namespace_uri} = $new_uri; |
293 |
|
|
} |
294 |
|
|
$self->{namespace_uri}; |
295 |
|
|
} |
296 |
wakaba |
1.5 |
sub namespace_prefix ($;$) { |
297 |
|
|
my ($self, $new_pfx) = @_; |
298 |
|
|
if (defined $new_pfx && $self->{namespace_uri}) { |
299 |
|
|
$new_pfx .= ':' if $new_pfx; |
300 |
|
|
$self->{namespace_prefix} = $new_pfx; |
301 |
|
|
$self->{ns}->{$new_pfx} = $self->{namespace_uri}; |
302 |
|
|
} |
303 |
|
|
$self->_get_namespace_prefix ($self->{namespace_uri}); |
304 |
|
|
} |
305 |
wakaba |
1.4 |
|
306 |
wakaba |
1.1 |
=item $i = $x->count |
307 |
|
|
|
308 |
|
|
Returns the number of child nodes. |
309 |
|
|
|
310 |
|
|
=cut |
311 |
|
|
|
312 |
|
|
# TODO: support counting by type |
313 |
|
|
sub count ($;@) { |
314 |
wakaba |
1.2 |
my $self = shift; |
315 |
|
|
(defined $self->{value} ? 1 : 0) + scalar @{$self->{node}}; |
316 |
wakaba |
1.1 |
} |
317 |
|
|
|
318 |
|
|
# $prefix = $x->_get_namespace_prefix ($namespace_uri) |
319 |
|
|
sub _get_namespace_prefix ($$;%) { |
320 |
|
|
my ($self, $uri) = (shift, shift); |
321 |
|
|
my %o = @_; |
322 |
|
|
if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) { |
323 |
|
|
return $p if $self->_prefix_to_uri ($p) eq $uri; |
324 |
|
|
} if ($Namespace_URI_to_prefix{$uri}) { |
325 |
|
|
for (@{$Namespace_URI_to_prefix{$uri}}) { |
326 |
|
|
my $pfx = $_; $pfx .= ':' if $pfx; |
327 |
|
|
if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) { |
328 |
|
|
return $self->_uri_to_prefix ($uri => $pfx, %o); |
329 |
|
|
} |
330 |
|
|
} |
331 |
|
|
} else { |
332 |
|
|
my ($u_r_i, $pfx) = ($uri); |
333 |
wakaba |
1.4 |
$u_r_i =~ s/[^0-9A-Za-z._-]+/ /g; |
334 |
|
|
my @u_r_i = split / /, $u_r_i; |
335 |
|
|
for (reverse @u_r_i) { |
336 |
|
|
if (s/([A-Za-z][0-9A-Za-z._-]+)//) { |
337 |
|
|
my $p_f_x = $1 . ':'; |
338 |
|
|
next if lc (substr ($p_f_x, 0, 3)) eq 'xml'; |
339 |
|
|
unless ($self->_prefix_to_uri ($p_f_x)) { |
340 |
|
|
$pfx = $p_f_x; |
341 |
|
|
last; |
342 |
|
|
} |
343 |
wakaba |
1.1 |
} |
344 |
|
|
} |
345 |
|
|
if ($pfx) { |
346 |
|
|
return $self->_uri_to_prefix ($uri => $pfx, %o); |
347 |
|
|
} else { |
348 |
|
|
while (1) { |
349 |
|
|
my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':'; |
350 |
|
|
unless ($self->_prefix_to_uri ($pfx)) { |
351 |
|
|
return $self->_uri_to_prefix ($uri => $pfx, %o); |
352 |
|
|
} |
353 |
|
|
} |
354 |
|
|
} |
355 |
|
|
} |
356 |
|
|
} |
357 |
|
|
|
358 |
wakaba |
1.4 |
sub _set_prefix_to_uri ($$$;%) { |
359 |
|
|
my ($self, $prefix => $uri, %o) = @_; |
360 |
|
|
return undef unless $self->_check_namespace_prefix ($prefix); |
361 |
|
|
$self->{ns}->{$prefix} = $uri; |
362 |
|
|
$self->_prefix_to_uri ($prefix); |
363 |
|
|
} |
364 |
|
|
|
365 |
|
|
## TODO: removing ns declare (1.1) support |
366 |
|
|
# $uri or undef = $x->_prefix_to_uri ($prefix) |
367 |
wakaba |
1.1 |
sub _prefix_to_uri ($$;$%) { |
368 |
wakaba |
1.4 |
my ($self, $prefix, %o) = @_; |
369 |
wakaba |
1.1 |
return undef unless $self->_check_namespace_prefix ($prefix); |
370 |
|
|
if (uc (substr $prefix, 0, 3) eq 'XML') { |
371 |
wakaba |
1.8 |
return $NS{xml} if $prefix eq 'xml:'; |
372 |
|
|
return $NS{xmlns} if $prefix eq 'xmlns:'; |
373 |
wakaba |
1.2 |
} |
374 |
|
|
if (defined $self->{ns}->{$prefix}) { |
375 |
wakaba |
1.1 |
$self->{ns}->{$prefix}; |
376 |
|
|
} elsif (ref $self->{parent}) { |
377 |
|
|
shift; # $self |
378 |
|
|
$self->{parent}->_prefix_to_uri (@_); |
379 |
|
|
} else { |
380 |
|
|
undef; |
381 |
|
|
} |
382 |
|
|
} |
383 |
|
|
|
384 |
|
|
# $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options) |
385 |
|
|
# use_no_prefix (default: 1): Allow default namespace (no prefix). |
386 |
|
|
sub _uri_to_prefix ($$;$%) { |
387 |
|
|
my ($self, $uri, $new_prefix, %o) = @_; |
388 |
|
|
if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) { |
389 |
|
|
$self->{ns}->{$new_prefix} = $uri; |
390 |
|
|
$new_prefix; |
391 |
|
|
} else { |
392 |
wakaba |
1.8 |
return 'xml:' if $uri eq $NS{xml}; |
393 |
|
|
return 'xmlns:' if $uri eq $NS{xmlns}; |
394 |
wakaba |
1.1 |
for (keys %{$self->{ns}||{}}) { |
395 |
|
|
next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix}); |
396 |
|
|
return $_ if $self->{ns}->{$_} eq $uri; |
397 |
|
|
} |
398 |
wakaba |
1.2 |
if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') { |
399 |
wakaba |
1.1 |
shift; # $self |
400 |
|
|
$self->{parent}->_uri_to_prefix (@_); |
401 |
|
|
} else { |
402 |
|
|
undef; |
403 |
|
|
} |
404 |
|
|
} |
405 |
|
|
} |
406 |
|
|
|
407 |
|
|
=item $x->define_new_namespace ($prefix => $uri) |
408 |
|
|
|
409 |
|
|
Defines a new XML Namespace. This method is useful for root or section-level |
410 |
|
|
element node. |
411 |
|
|
|
412 |
|
|
Returned value is unspecified in this version of this module. |
413 |
|
|
|
414 |
|
|
=cut |
415 |
|
|
|
416 |
wakaba |
1.4 |
## TODO: structured URI (such as http://&server;/) support |
417 |
wakaba |
1.1 |
sub define_new_namespace ($$$) { |
418 |
|
|
my ($self, $prefix, $uri) = @_; |
419 |
|
|
if ($prefix eq '' || $self->_check_ncname ($prefix)) { |
420 |
|
|
$prefix .= ':' if $prefix && substr ($prefix, -1) ne ':'; |
421 |
wakaba |
1.4 |
$self->_set_prefix_to_uri ($prefix => $uri); |
422 |
wakaba |
1.1 |
} else { |
423 |
|
|
undef; |
424 |
|
|
} |
425 |
|
|
} |
426 |
|
|
|
427 |
wakaba |
1.4 |
=item $uri = $x->defined_namespace_prefix ($prefix) |
428 |
|
|
|
429 |
|
|
Query whether the namespace prefix is defined or not. |
430 |
|
|
If defined, return namespace name (URI). |
431 |
|
|
|
432 |
|
|
=cut |
433 |
|
|
|
434 |
|
|
sub defined_namespace_prefix ($$) { |
435 |
|
|
my ($self, $prefix) = @_; |
436 |
|
|
$prefix .= ':' if $prefix;#Carp::carp join ",", ';',@_; |
437 |
|
|
$self->_prefix_to_uri ($prefix); |
438 |
|
|
} |
439 |
|
|
|
440 |
wakaba |
1.1 |
=item $qname = $x->qname |
441 |
|
|
|
442 |
|
|
Returns QName ((namespace-)qualified name) of the element type. |
443 |
|
|
Undef is retuened when the type does not have its QName |
444 |
|
|
(ie. when type is neither C<#element> or C<#attribute>). |
445 |
|
|
|
446 |
|
|
=cut |
447 |
|
|
|
448 |
|
|
sub qname ($) { |
449 |
|
|
my $self = shift; |
450 |
|
|
if ($self->_check_ncname ($self->{local_name})) { |
451 |
|
|
if ($self->{type} eq '#element') { |
452 |
|
|
$self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name} |
453 |
|
|
unless $self->{_qname}; |
454 |
|
|
return $self->{_qname}; |
455 |
|
|
} elsif ($self->{type} eq '#attribute') { |
456 |
|
|
return $self->attribute_name; |
457 |
|
|
} |
458 |
|
|
} |
459 |
|
|
undef; |
460 |
|
|
} |
461 |
|
|
|
462 |
wakaba |
1.9 |
sub merge_external_subset ($) { |
463 |
|
|
my $self = shift; |
464 |
|
|
unless ($self->{type} eq '#declaration' && $self->{namespace_uri} eq $NS{SGML}.'doctype') { |
465 |
|
|
return unless $self->{type} eq '#document' || $self->{type} eq '#fragment'; |
466 |
|
|
for (@{$self->{node}}) { |
467 |
|
|
$_->merge_external_subset; |
468 |
|
|
} |
469 |
|
|
return; |
470 |
|
|
} |
471 |
|
|
my $xsub = $self->get_attribute ('external-subset'); |
472 |
|
|
return unless ref $xsub; |
473 |
|
|
for (@{$xsub->{node}}) { |
474 |
|
|
$_->{parent} = $self; |
475 |
|
|
} |
476 |
|
|
push @{$self->{node}}, @{$xsub->{node}}; |
477 |
|
|
$self->remove_child_node ($xsub); |
478 |
|
|
$self->remove_child_node ($self->get_attribute ('PUBLIC')); |
479 |
|
|
$self->remove_child_node ($self->get_attribute ('SYSTEM')); |
480 |
|
|
$self->remove_marked_section; |
481 |
|
|
} |
482 |
|
|
|
483 |
|
|
sub remove_marked_section ($) { |
484 |
|
|
my $self = shift; |
485 |
|
|
my @node; |
486 |
|
|
for (@{$self->{node}}) { |
487 |
|
|
$_->remove_marked_section; |
488 |
|
|
} |
489 |
|
|
for (@{$self->{node}}) { |
490 |
|
|
if ($_->{type} ne '#section') { |
491 |
|
|
push @node, $_; |
492 |
|
|
} else { |
493 |
|
|
my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text; |
494 |
|
|
if ($status eq 'CDATA') { |
495 |
|
|
$_->{type} = '#text'; |
496 |
|
|
$_->remove_child_node ($_->get_attribute ('status')); |
497 |
|
|
push @node, $_; |
498 |
|
|
} elsif ($status ne 'IGNORE') { # INCLUDE |
499 |
|
|
for my $e (@{$_->{node}}) { |
500 |
|
|
if ($e->{type} ne '#attribute') { |
501 |
|
|
$e->{parent} = $self; |
502 |
|
|
push @node, $e; |
503 |
|
|
} |
504 |
|
|
} |
505 |
|
|
} |
506 |
|
|
} |
507 |
|
|
} |
508 |
|
|
$self->{node} = \@node; |
509 |
|
|
} |
510 |
|
|
|
511 |
wakaba |
1.7 |
sub remove_references ($) { |
512 |
|
|
my $self = shift; |
513 |
|
|
my @node; |
514 |
wakaba |
1.8 |
for (@{$self->{node}}) { |
515 |
|
|
$_->remove_references; |
516 |
|
|
} |
517 |
wakaba |
1.7 |
for (@{$self->{node}}) { |
518 |
|
|
if ($_->{type} ne '#reference' |
519 |
|
|
|| ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) { |
520 |
|
|
push @node, $_; |
521 |
|
|
} else { |
522 |
|
|
if ($_->{namespace_uri} =~ /char/) { |
523 |
|
|
my $e = ref ($_)->new (type => '#text', value => chr $_->{value}); |
524 |
|
|
$e->{parent} = $self; |
525 |
|
|
push @node, $e; |
526 |
|
|
} elsif ($_->{flag}->{smxp__ref_expanded}) { |
527 |
|
|
for my $e (@{$_->{node}}) { |
528 |
wakaba |
1.8 |
if ($e->{type} ne '#attribute') { |
529 |
|
|
$e->{parent} = $self; |
530 |
|
|
push @node, $e; |
531 |
|
|
} |
532 |
wakaba |
1.7 |
} |
533 |
|
|
} else { ## reference is not expanded |
534 |
|
|
push @node, $_; |
535 |
|
|
} |
536 |
|
|
} |
537 |
wakaba |
1.8 |
$_->{flag}->{smxp__defined_with_param_ref} = 0 |
538 |
|
|
if $_->{flag}->{smxp__defined_with_param_ref}; |
539 |
wakaba |
1.7 |
} |
540 |
|
|
$self->{node} = \@node; |
541 |
|
|
} |
542 |
|
|
|
543 |
wakaba |
1.8 |
sub resolve_relative_uri ($;$%) { |
544 |
|
|
require URI; |
545 |
|
|
my ($self, $rel, %o) = @_; |
546 |
|
|
my $base = $self->get_attribute ('base', namespace_uri => $NS{xml}); |
547 |
|
|
$base = ref ($base) ? $base->inner_text : undef; |
548 |
|
|
if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative |
549 |
|
|
$base = $self->_resolve_relative_uri_by_parent ($base, \%o); |
550 |
|
|
} |
551 |
|
|
eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...) |
552 |
|
|
return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false |
553 |
|
|
} or return $rel; |
554 |
|
|
} |
555 |
|
|
sub _resolve_relative_uri_by_parent ($$$) { |
556 |
|
|
my ($self, $rel, $o) = @_; |
557 |
|
|
if (ref $self->{parent}) { |
558 |
|
|
if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') { |
559 |
|
|
## This case is necessary to work with |
560 |
|
|
## <element> <!-- element can have base URI --> |
561 |
|
|
## text <!-- text cannot have base URI --> |
562 |
|
|
## &ent; <!-- ref's base URI is referred entity's one (in this module) --> |
563 |
|
|
## <!-- expantion of ent --> |
564 |
|
|
## entity's text <!-- text cannot have base URI, so use <element>'s one --> |
565 |
|
|
## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one --> |
566 |
|
|
## </element> |
567 |
|
|
return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o); |
568 |
|
|
} else { |
569 |
|
|
return $self->{parent}->resolve_relative_uri ($rel, %$o); |
570 |
|
|
} |
571 |
|
|
} else { |
572 |
|
|
return $rel; |
573 |
|
|
} |
574 |
|
|
} |
575 |
|
|
sub base_uri ($;$) { |
576 |
|
|
my ($self, $new_uri) = @_; |
577 |
|
|
my $base; |
578 |
|
|
if (defined $new_uri) { |
579 |
|
|
$base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml}); |
580 |
|
|
} |
581 |
|
|
$base ||= $self->get_attribute ('base', namespace_uri => $NS{xml}); |
582 |
|
|
ref ($base) ? $base->inner_text : undef; |
583 |
|
|
} |
584 |
|
|
|
585 |
wakaba |
1.1 |
=item $tag = $x->start_tag |
586 |
|
|
|
587 |
wakaba |
1.4 |
Returns the start tag (or something that marks the start of something, such as '<!--' |
588 |
wakaba |
1.1 |
for C<#comment> nodes). |
589 |
|
|
|
590 |
|
|
=cut |
591 |
|
|
|
592 |
|
|
sub start_tag ($) { |
593 |
|
|
my $self = shift; |
594 |
|
|
if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { |
595 |
|
|
my $r = '<'; |
596 |
|
|
$r .= $self->qname; |
597 |
|
|
for (@{$self->{node}}) { |
598 |
|
|
$r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute'; |
599 |
|
|
} |
600 |
|
|
for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) { |
601 |
|
|
if ($prefix) { |
602 |
|
|
$r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1); |
603 |
|
|
} else { |
604 |
|
|
$r .= ' xmlns'; |
605 |
|
|
} |
606 |
|
|
$r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"'; |
607 |
|
|
} |
608 |
|
|
$r .= '>'; |
609 |
|
|
$r; |
610 |
|
|
} elsif ($self->{type} eq '#comment') { |
611 |
wakaba |
1.4 |
'<!--'; |
612 |
|
|
} elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { |
613 |
|
|
'<?' . ($self->{local_name}); |
614 |
wakaba |
1.2 |
} elsif ($self->{type} eq '#reference') { |
615 |
|
|
if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') { |
616 |
|
|
'&#x'; |
617 |
|
|
} elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') { |
618 |
|
|
'&#'; |
619 |
wakaba |
1.4 |
} elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') { |
620 |
|
|
if ($self->{local_name}->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') { |
621 |
wakaba |
1.2 |
'%'; |
622 |
|
|
} else { |
623 |
|
|
'&'; |
624 |
|
|
} |
625 |
wakaba |
1.4 |
} elsif ($self->_check_ncname ($self->{local_name})) { |
626 |
wakaba |
1.2 |
if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') { |
627 |
|
|
'%'; |
628 |
|
|
} else { |
629 |
|
|
'&'; |
630 |
|
|
} |
631 |
|
|
} else { # error |
632 |
|
|
''; |
633 |
|
|
} |
634 |
wakaba |
1.4 |
} elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) { |
635 |
|
|
'<!' . { |
636 |
wakaba |
1.8 |
$NS{SGML}.'attlist' => 'ATTLIST', |
637 |
|
|
$NS{SGML}.'doctype' => 'DOCTYPE', |
638 |
|
|
$NS{SGML}.'element' => 'ELEMENT', |
639 |
|
|
$NS{SGML}.'entity' => 'ENTITY', |
640 |
|
|
$NS{SGML}.'entity:parameter' => 'ENTITY', |
641 |
|
|
$NS{SGML}.'notation' => 'NOTATION', |
642 |
wakaba |
1.7 |
}->{$self->{namespace_uri}} . ' ' . ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ? |
643 |
|
|
($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):''); |
644 |
wakaba |
1.1 |
} elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) { |
645 |
|
|
my $r = '<!' . $self->{local_name} . ' '; |
646 |
|
|
if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) { |
647 |
|
|
my $qname; |
648 |
|
|
for (@{$self->{parent}->{node}}) { |
649 |
|
|
if ($_->{type} eq '#element') { |
650 |
|
|
$qname = $_->qname; |
651 |
|
|
last if $qname; |
652 |
|
|
} |
653 |
|
|
} |
654 |
|
|
$r .= ($qname ? $qname : '#IMPLIED') . ' '; |
655 |
wakaba |
1.2 |
} |
656 |
|
|
$r; |
657 |
|
|
} elsif ($self->{type} eq '#section') { |
658 |
wakaba |
1.9 |
'<!['; |
659 |
wakaba |
1.1 |
} else { |
660 |
|
|
''; |
661 |
|
|
} |
662 |
|
|
} |
663 |
|
|
|
664 |
|
|
=item $tag = $x->end_tag |
665 |
|
|
|
666 |
wakaba |
1.4 |
Returns the end tag (or something that marks the end of something, such as '-->' |
667 |
wakaba |
1.1 |
for C<#comment> nodes). |
668 |
|
|
|
669 |
|
|
=cut |
670 |
|
|
|
671 |
|
|
sub end_tag ($) { |
672 |
|
|
my $self = shift; |
673 |
|
|
if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) { |
674 |
|
|
'</' . $self->qname . '>'; |
675 |
|
|
} elsif ($self->{type} eq '#comment') { |
676 |
wakaba |
1.4 |
'-->'; |
677 |
wakaba |
1.1 |
} elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) { |
678 |
|
|
'?>'; |
679 |
wakaba |
1.2 |
} elsif ($self->{type} eq '#reference') { |
680 |
|
|
';'; |
681 |
wakaba |
1.4 |
} elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) { |
682 |
|
|
'>'; |
683 |
wakaba |
1.1 |
} elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) { |
684 |
wakaba |
1.4 |
'>'; |
685 |
wakaba |
1.2 |
} elsif ($self->{type} eq '#section') { |
686 |
wakaba |
1.9 |
']]>'; |
687 |
wakaba |
1.1 |
} else { |
688 |
|
|
''; |
689 |
|
|
} |
690 |
|
|
} |
691 |
|
|
|
692 |
|
|
=item $tag = $x->attribute_name |
693 |
|
|
|
694 |
|
|
Returns the attribute name. |
695 |
|
|
|
696 |
|
|
=cut |
697 |
|
|
|
698 |
|
|
sub attribute_name ($) { |
699 |
|
|
my $self = shift; |
700 |
|
|
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
701 |
|
|
($self->{namespace_uri} ? |
702 |
|
|
(ref $self->{parent} ? $self->{parent} : $self) |
703 |
|
|
->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '') |
704 |
|
|
.$self->{local_name}; |
705 |
|
|
} else { |
706 |
|
|
''; |
707 |
|
|
} |
708 |
|
|
} |
709 |
|
|
|
710 |
|
|
=item $tag = $x->attribute_value |
711 |
|
|
|
712 |
|
|
Returns the attribute value. |
713 |
|
|
|
714 |
|
|
=cut |
715 |
|
|
|
716 |
wakaba |
1.4 |
sub attribute_value ($;%) { |
717 |
wakaba |
1.1 |
my $self = shift; |
718 |
wakaba |
1.4 |
my %o = @_; |
719 |
wakaba |
1.1 |
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
720 |
wakaba |
1.5 |
my $r = '"'; |
721 |
wakaba |
1.8 |
if (ref ($self->{value}) && eval q($self->{value}->_CLASS_NAME eq $self->_CLASS_NAME)) { |
722 |
wakaba |
1.5 |
unshift @{$self->{node}}, $self->{value}; |
723 |
|
|
undef $self->{value}; |
724 |
|
|
} else { |
725 |
|
|
$r .= $self->_entitize ($self->{value}); |
726 |
|
|
} |
727 |
wakaba |
1.4 |
for (@{$self->{node}}) { |
728 |
|
|
my $nt = $_->node_type; |
729 |
|
|
if ($nt eq '#reference' || $nt eq '#xml') { |
730 |
|
|
$r .= $_->outer_xml; |
731 |
|
|
} elsif ($nt ne '#attribute') { |
732 |
|
|
$r .= $self->_entitize ($_->inner_text, percent => $o{escape_percent}); |
733 |
|
|
} |
734 |
|
|
} |
735 |
|
|
return $r . '"'; |
736 |
|
|
} else { |
737 |
|
|
''; |
738 |
|
|
} |
739 |
|
|
} |
740 |
|
|
|
741 |
|
|
sub entity_value ($;%) { |
742 |
|
|
my $self = shift; |
743 |
|
|
my %o = @_; |
744 |
|
|
my $_entitize = sub { |
745 |
|
|
my $s = shift; |
746 |
wakaba |
1.7 |
$s =~ s/&/&/g; |
747 |
|
|
$s =~ s/&(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g; |
748 |
wakaba |
1.4 |
$s =~ s/%/%/g; |
749 |
wakaba |
1.7 |
$s =~ s/"/"/g; |
750 |
wakaba |
1.4 |
$s; |
751 |
|
|
}; |
752 |
|
|
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
753 |
|
|
my $r = '"' . &$_entitize ($self->{value}); |
754 |
|
|
for (@{$self->{node}}) { |
755 |
|
|
my $nt = $_->node_type; |
756 |
|
|
if ($nt eq '#reference' || $nt eq '#xml') { |
757 |
|
|
$r .= $_->outer_xml; |
758 |
|
|
} elsif ($nt ne '#attribute') { |
759 |
|
|
$r .= &$_entitize ($_->inner_text); |
760 |
wakaba |
1.1 |
} |
761 |
|
|
} |
762 |
wakaba |
1.4 |
return $r . '"'; |
763 |
wakaba |
1.1 |
} else { |
764 |
|
|
''; |
765 |
|
|
} |
766 |
|
|
} |
767 |
|
|
|
768 |
wakaba |
1.7 |
## This method should be called only from SuikaWiki::Markup::XML::* family modules, |
769 |
|
|
## since this is NOT a FORMAL interface. |
770 |
|
|
sub _entity_parameter_literal_value ($;%) { |
771 |
|
|
my $self = shift; |
772 |
|
|
my $r = $self->{value}; |
773 |
|
|
for (@{$self->{node}}) { |
774 |
|
|
my $nt = $_->node_type; |
775 |
wakaba |
1.8 |
#if ($nt eq '#reference' || $nt eq '#xml') { |
776 |
|
|
if ($nt eq '#xml') { |
777 |
wakaba |
1.7 |
$r .= $_->outer_xml; |
778 |
|
|
} elsif ($nt ne '#attribute') { |
779 |
|
|
$r .= $_->inner_text; |
780 |
|
|
} |
781 |
|
|
} |
782 |
|
|
return $r; |
783 |
|
|
} |
784 |
|
|
|
785 |
wakaba |
1.1 |
=item $tag = $x->attribute |
786 |
|
|
|
787 |
|
|
Returns the attribute (name and value pair). |
788 |
|
|
|
789 |
|
|
=cut |
790 |
|
|
|
791 |
|
|
sub attribute ($) { |
792 |
|
|
my $self = shift; |
793 |
|
|
if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) { |
794 |
|
|
$self->attribute_name . '=' . $self->attribute_value; |
795 |
|
|
} else { |
796 |
|
|
''; |
797 |
|
|
} |
798 |
|
|
} |
799 |
|
|
|
800 |
wakaba |
1.2 |
sub external_id ($;%) { |
801 |
|
|
my $self = shift; |
802 |
|
|
my %o = @_; |
803 |
|
|
my ($pubid, $sysid, $ndata); |
804 |
|
|
for (@{$self->{node}}) { |
805 |
wakaba |
1.4 |
if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) { |
806 |
wakaba |
1.2 |
if ($_->{local_name} eq 'PUBLIC') { |
807 |
wakaba |
1.4 |
$pubid = $_->inner_text; |
808 |
wakaba |
1.2 |
} elsif ($_->{local_name} eq 'SYSTEM') { |
809 |
wakaba |
1.4 |
$sysid = $_->inner_text; |
810 |
wakaba |
1.2 |
} elsif ($_->{local_name} eq 'NDATA') { |
811 |
wakaba |
1.4 |
$ndata = $_->inner_text; |
812 |
|
|
undef $ndata unless $self->_check_ncname ($ndata); |
813 |
wakaba |
1.2 |
} |
814 |
|
|
} |
815 |
|
|
} |
816 |
|
|
my $r = ''; |
817 |
wakaba |
1.9 |
if (defined $pubid) { |
818 |
wakaba |
1.4 |
$pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges; |
819 |
wakaba |
1.9 |
$pubid = '"' . $pubid . '"'; |
820 |
wakaba |
1.4 |
} |
821 |
wakaba |
1.9 |
if (defined $sysid) { |
822 |
wakaba |
1.4 |
if ($sysid =~ /"/) { |
823 |
|
|
if ($sysid =~ /'/) { |
824 |
|
|
$sysid =~ s/"/"/; $sysid = '"' . $sysid . '"'; |
825 |
|
|
} else { |
826 |
|
|
$sysid = "'" . $sysid . "'"; |
827 |
|
|
} |
828 |
|
|
} else { |
829 |
|
|
$sysid = '"' . $sysid . '"'; |
830 |
|
|
} |
831 |
|
|
} |
832 |
wakaba |
1.2 |
if ($pubid && $sysid) { |
833 |
wakaba |
1.9 |
$r = 'PUBLIC ' . $pubid . ' ' . $sysid; |
834 |
wakaba |
1.2 |
} elsif ($sysid) { |
835 |
wakaba |
1.4 |
$r = 'SYSTEM ' . $sysid; |
836 |
wakaba |
1.2 |
} elsif ($pubid && $o{allow_pubid_only}) { |
837 |
wakaba |
1.9 |
$r = 'PUBLIC ' . $pubid; |
838 |
wakaba |
1.2 |
} |
839 |
|
|
if ($r && $ndata && $o{use_ndata}) { |
840 |
wakaba |
1.4 |
$r .= ' NDATA ' . $ndata; |
841 |
wakaba |
1.2 |
} |
842 |
|
|
$r; |
843 |
|
|
} |
844 |
|
|
|
845 |
|
|
=item $s = $x->content_spec |
846 |
|
|
|
847 |
|
|
Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>) |
848 |
|
|
or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>). |
849 |
|
|
|
850 |
|
|
=cut |
851 |
|
|
|
852 |
|
|
sub content_spec ($) { |
853 |
|
|
my $self = shift; |
854 |
|
|
if ($self->{type} eq '#element') { |
855 |
|
|
my $text = 0; |
856 |
|
|
my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}}; |
857 |
|
|
$contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text; |
858 |
|
|
|
859 |
|
|
return $contentspec ? '(' . $contentspec . ')' : 'EMPTY'; |
860 |
|
|
} elsif ($self->{type} eq '#attribute') { |
861 |
|
|
my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t"; |
862 |
|
|
my $default = $self->{default_decl}; |
863 |
|
|
$default .= ' ' . $self->attribute_value if $default eq '#FIXED'; |
864 |
|
|
unless ($default) { |
865 |
|
|
$default = defined $self->{value} ? $self->attribute_value : '#IMPLIED'; |
866 |
|
|
} |
867 |
|
|
return $attdef . $default; |
868 |
|
|
} |
869 |
|
|
} |
870 |
|
|
|
871 |
wakaba |
1.1 |
=item $tag = $x->inner_xml |
872 |
|
|
|
873 |
|
|
Returns the content of the node in XML syntax. (In case of the C<#element> nodes, |
874 |
|
|
element content without start- and end-tags is returned.) |
875 |
|
|
|
876 |
|
|
Note that for not all node types the behavior of this method is defined. |
877 |
|
|
For example, returned value of C<#attribute> might be unexpected one |
878 |
|
|
in this version of this module. |
879 |
|
|
|
880 |
|
|
=cut |
881 |
|
|
|
882 |
wakaba |
1.3 |
sub inner_xml ($;%) { |
883 |
wakaba |
1.1 |
my $self = shift; |
884 |
wakaba |
1.3 |
my %o = @_; |
885 |
wakaba |
1.1 |
my $r = ''; |
886 |
|
|
if ($self->{type} eq '#comment') { |
887 |
|
|
$r = $self->inner_text; |
888 |
|
|
$r =~ s/--/-E/g; |
889 |
|
|
} elsif ($self->{type} eq '#pi') { |
890 |
|
|
if (length $self->{value}) { |
891 |
|
|
$r = ' ' . $self->{value}; |
892 |
wakaba |
1.2 |
$r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i. |
893 |
wakaba |
1.1 |
} |
894 |
|
|
for (@{$self->{node}}) { |
895 |
|
|
if ($_->node_type eq '#attribute') { |
896 |
|
|
$r .= ' ' . $_->attribute; |
897 |
|
|
} else { |
898 |
|
|
my $s = $_->inner_text; |
899 |
|
|
$s =~ s/\?>/?>/g; |
900 |
|
|
$r .= ' ' . $s if length $s; |
901 |
|
|
} |
902 |
|
|
} |
903 |
wakaba |
1.2 |
} elsif ($self->{type} eq '#reference') { |
904 |
|
|
if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') { |
905 |
|
|
$r = sprintf '%02X', $self->{value}; |
906 |
|
|
} elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') { |
907 |
|
|
$r = sprintf '%02d', $self->{value}; |
908 |
wakaba |
1.4 |
} elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') { |
909 |
|
|
$r = $self->{local_name}->{local_name}; |
910 |
|
|
} elsif ($self->_check_ncname ($self->{local_name})) { |
911 |
|
|
$r = ($self->{local_name}); |
912 |
wakaba |
1.2 |
} else { # error |
913 |
|
|
$r = ''; |
914 |
|
|
} |
915 |
wakaba |
1.1 |
} elsif ($self->{type} eq '#declaration') { |
916 |
wakaba |
1.8 |
if ($self->{namespace_uri} eq $NS{SGML}.'doctype') { |
917 |
wakaba |
1.4 |
my $root = $self->get_attribute ('qname'); |
918 |
|
|
ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do { |
919 |
|
|
for (@{$self->{parent}->{node}}) { |
920 |
|
|
if ($_->{type} eq '#element') { |
921 |
|
|
$root = $_->qname; |
922 |
|
|
last if $root; |
923 |
|
|
} |
924 |
|
|
} |
925 |
|
|
$root = '#IMPLIED'; |
926 |
|
|
}) : ($root = '#IMPLIED')); ## error! |
927 |
|
|
my ($isub, $xid) = ('', $self->external_id); |
928 |
|
|
for (@{$self->{node}}) { |
929 |
|
|
$isub .= $_->outer_xml if $_->{type} ne '#attribute'; |
930 |
wakaba |
1.2 |
} |
931 |
|
|
if ($xid) { |
932 |
wakaba |
1.4 |
$r = $xid; |
933 |
|
|
if ($isub) { |
934 |
wakaba |
1.7 |
$r .= " [" . $isub . "]"; |
935 |
wakaba |
1.4 |
} |
936 |
|
|
} else { |
937 |
|
|
if ($isub) { |
938 |
wakaba |
1.7 |
$r = "[" . $isub . "]"; |
939 |
wakaba |
1.4 |
} else { |
940 |
|
|
$r = "[]"; |
941 |
|
|
} |
942 |
|
|
} |
943 |
|
|
$r = $root . ' ' . $r; |
944 |
wakaba |
1.9 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'entity' |
945 |
|
|
|| $self->{namespace_uri} eq $NS{SGML}.'entity:parameter' |
946 |
|
|
|| $self->{namespace_uri} eq $NS{SGML}.'notation') { |
947 |
wakaba |
1.4 |
my %xid_opt; |
948 |
wakaba |
1.7 |
$r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref} |
949 |
|
|
&& $self->_check_ncname ($self->{local_name}); |
950 |
wakaba |
1.8 |
if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') { |
951 |
wakaba |
1.4 |
#$r = '% ' . $r; |
952 |
wakaba |
1.8 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') { |
953 |
wakaba |
1.4 |
$xid_opt{use_ndata} = 1; |
954 |
wakaba |
1.8 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') { |
955 |
wakaba |
1.4 |
$xid_opt{allow_pubid_only} = 1; |
956 |
|
|
} |
957 |
|
|
|
958 |
|
|
my ($v, $xid) = ($self->{value}, $self->external_id (%xid_opt)); |
959 |
wakaba |
1.7 |
undef $xid if $self->{flag}->{smxp__defined_with_param_ref}; |
960 |
wakaba |
1.4 |
if ($xid) { ## External ID |
961 |
wakaba |
1.2 |
$r .= $xid; |
962 |
wakaba |
1.4 |
} else { ## EntityValue |
963 |
|
|
my $entity_value = $self->get_attribute ('value'); |
964 |
wakaba |
1.7 |
undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref}; |
965 |
wakaba |
1.4 |
if ($entity_value) { # <!ENTITY foo "bar"> |
966 |
|
|
$r .= $entity_value->entity_value; |
967 |
|
|
} else { ## Parameter entity reference |
968 |
wakaba |
1.9 |
my $isc = $self->_is_same_class ($self->{value}); |
969 |
|
|
$r .= $self->{value} unless $isc; |
970 |
|
|
for (($isc?$self->{value}:()), @{$self->{node}}) { |
971 |
wakaba |
1.7 |
$r .= $_->outer_xml unless $_->{type} eq '#attribute'; |
972 |
wakaba |
1.4 |
} |
973 |
wakaba |
1.2 |
} |
974 |
wakaba |
1.1 |
} |
975 |
wakaba |
1.8 |
} elsif ($self->{namespace_uri} eq $NS{SGML}.'element') { |
976 |
wakaba |
1.4 |
my $qname = $self->get_attribute ('qname'); |
977 |
|
|
ref $qname ? $qname = $qname->inner_text : undef; |
978 |
|
|
if ($qname && $self->_check_name ($qname)) { |
979 |
|
|
## Element type name is defined |
980 |
|
|
$r = $qname . ' '; |
981 |
wakaba |
1.3 |
if ($o{output_tag_omit_declaration}) { |
982 |
wakaba |
1.4 |
$r .= ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ') |
983 |
|
|
. |
984 |
|
|
($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- '); |
985 |
wakaba |
1.3 |
} |
986 |
wakaba |
1.4 |
my $rs = $self->inner_text (output_ref_as_is => 1); |
987 |
|
|
$r .= $rs || 'ANY'; |
988 |
wakaba |
1.2 |
} else { |
989 |
wakaba |
1.4 |
#if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') { |
990 |
|
|
### Element prototype is given |
991 |
|
|
# $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec; |
992 |
|
|
#} elsif () { |
993 |
|
|
### Element type name and contentspec is given |
994 |
|
|
# $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY'); |
995 |
|
|
#} else { |
996 |
wakaba |
1.2 |
## (Element type name and contetnspac) is given |
997 |
wakaba |
1.4 |
$r .= $self->inner_text (output_ref_as_is => 1); |
998 |
|
|
# || 'Name ANY'; # error |
999 |
|
|
#} |
1000 |
wakaba |
1.2 |
} |
1001 |
wakaba |
1.7 |
## TODO: reform |
1002 |
wakaba |
1.2 |
} elsif ($self->{local_name} eq 'ATTLIST') { |
1003 |
|
|
if ($self->_check_name ($self->{target_name})) { |
1004 |
|
|
$r = $self->{target_name}; |
1005 |
|
|
} |
1006 |
|
|
my $t = $self->inner_text (output_ref_as_is => 1); |
1007 |
|
|
$r .= "\n\t" . $t if $t; |
1008 |
|
|
$r ||= 'Name'; # error! |
1009 |
|
|
for (@{$self->{node}}) { |
1010 |
|
|
if ($_->{type} eq '#attribute') { |
1011 |
|
|
$r .= "\n\t" . $_->content_spec; |
1012 |
|
|
} |
1013 |
wakaba |
1.1 |
} |
1014 |
wakaba |
1.2 |
} else { # unknown |
1015 |
wakaba |
1.3 |
for (@{$self->{node}}) { |
1016 |
|
|
$r .= $_->outer_xml; |
1017 |
|
|
} |
1018 |
wakaba |
1.1 |
} |
1019 |
wakaba |
1.9 |
} elsif ($self->{type} eq '#section') { |
1020 |
|
|
my $status = $self->get_attribute ('status', make_new_node => 1)->inner_text; |
1021 |
|
|
if ($status eq 'CDATA') { |
1022 |
|
|
$r = $self->inner_text; |
1023 |
|
|
$r =~ s/]]>/]]>]]<![CDATA[>/g; |
1024 |
|
|
$r = 'CDATA['.$r; |
1025 |
|
|
} else { |
1026 |
|
|
my $sl = $self->get_attribute ('status_list', make_new_node => 1); |
1027 |
|
|
if ($sl->{flag}->{smxp__defined_with_param_ref}) { |
1028 |
|
|
my $isc = $self->_is_same_class ($self->{value}); |
1029 |
|
|
$status = ''; |
1030 |
|
|
$status = $sl->{value} unless $isc; |
1031 |
|
|
for (($isc?$sl->{value}:()), @{$sl->{node}}) { |
1032 |
|
|
$status .= $_->outer_xml unless $_->{type} eq '#attribute'; |
1033 |
|
|
} |
1034 |
|
|
$r = $status.'['.$r; |
1035 |
|
|
} elsif ($status) { |
1036 |
|
|
$r = $status.'['.$r; |
1037 |
|
|
} else { |
1038 |
|
|
## Must be an ignored section |
1039 |
|
|
} |
1040 |
|
|
my $isc = $self->_is_same_class ($self->{value}); |
1041 |
|
|
$r .= join '', map {s/\]\]>/]]>/g; $_} $self->{value} unless $isc; |
1042 |
|
|
for (($isc?$self->{value}:()), @{$self->{node}}) { |
1043 |
|
|
if ($_->{type} eq '#text') { |
1044 |
|
|
$r .= join '', map {s/\]\]>/]]>/g; $_} $_->inner_text; ## Anyway, this is error |
1045 |
|
|
} elsif ($_->{type} ne '#attribute') { |
1046 |
|
|
$r .= $_->outer_xml; |
1047 |
|
|
} |
1048 |
|
|
} |
1049 |
|
|
} |
1050 |
wakaba |
1.1 |
} else { |
1051 |
|
|
if ($self->{type} ne '#xml') { |
1052 |
|
|
$r = $self->_entitize ($self->{value}); |
1053 |
|
|
} else { |
1054 |
|
|
$r = $self->{value}; |
1055 |
|
|
} |
1056 |
|
|
for (@{$self->{node}}) { |
1057 |
|
|
my $nt = $_->node_type; |
1058 |
wakaba |
1.4 |
if (($self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) { |
1059 |
wakaba |
1.1 |
$r .= "\n"; |
1060 |
|
|
} |
1061 |
|
|
$r .= $_->outer_xml unless $_->node_type eq '#attribute'; |
1062 |
|
|
} |
1063 |
|
|
} |
1064 |
|
|
$r; |
1065 |
|
|
} |
1066 |
|
|
|
1067 |
|
|
|
1068 |
|
|
=item $tag = $x->outer_xml |
1069 |
|
|
|
1070 |
|
|
Returns the node in XML syntax. |
1071 |
|
|
|
1072 |
|
|
=cut |
1073 |
|
|
|
1074 |
|
|
sub outer_xml ($) { |
1075 |
|
|
my $self = shift; |
1076 |
|
|
if ($self->{type} eq '#attribute') { |
1077 |
|
|
$self->attribute; |
1078 |
|
|
} else { |
1079 |
wakaba |
1.4 |
if ($self->{option}->{indent} && $self->{type} eq '#element') { |
1080 |
wakaba |
1.1 |
my $r = $self->start_tag; |
1081 |
|
|
my $c = $self->inner_xml; |
1082 |
wakaba |
1.4 |
if (!length $c && $self->{option}->{use_EmptyElemTag}) { |
1083 |
|
|
substr ($r, -1) = ' />'; |
1084 |
|
|
} else { |
1085 |
|
|
if ($c) { |
1086 |
|
|
$c =~ s/\n/\n /g; |
1087 |
|
|
$r .= "\n " . $c . "\n"; |
1088 |
|
|
} |
1089 |
|
|
$r .= $self->end_tag; |
1090 |
wakaba |
1.1 |
} |
1091 |
|
|
$r; |
1092 |
|
|
} else { |
1093 |
wakaba |
1.4 |
my $r = $self->start_tag; |
1094 |
|
|
my $c = $self->inner_xml; |
1095 |
wakaba |
1.8 |
if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) { |
1096 |
wakaba |
1.4 |
substr ($r, -1) = ' />'; |
1097 |
|
|
} else { |
1098 |
|
|
$r .= $c . $self->end_tag; |
1099 |
|
|
#$r .= "\n" if $self->{type} eq '#declaration'; |
1100 |
|
|
} |
1101 |
wakaba |
1.2 |
$r; |
1102 |
|
|
#'{'.$self->{type}.': '.$r.'}'; # for debug |
1103 |
wakaba |
1.1 |
} |
1104 |
|
|
} |
1105 |
|
|
} |
1106 |
|
|
|
1107 |
|
|
=item $tag = $x->inner_text |
1108 |
|
|
|
1109 |
|
|
Returns the text content of the node. (In many case the returned value is same |
1110 |
|
|
as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's. |
1111 |
|
|
But some classes that inherits this module might implement to return other |
1112 |
|
|
value (eg. to return the value of the alt attribute of html:img element). |
1113 |
|
|
|
1114 |
wakaba |
1.2 |
Available options: C<output_ref_as_is>. |
1115 |
|
|
|
1116 |
wakaba |
1.1 |
=cut |
1117 |
|
|
|
1118 |
wakaba |
1.2 |
sub inner_text ($;%) { |
1119 |
wakaba |
1.1 |
my $self = shift; |
1120 |
wakaba |
1.2 |
my %o = @_; |
1121 |
wakaba |
1.5 |
my $r = ''; |
1122 |
wakaba |
1.7 |
if ($self->{type} eq '#reference' && #) { |
1123 |
|
|
#if ( |
1124 |
wakaba |
1.8 |
($self->{namespace_uri} eq $NS{SGML}.'char:ref' || |
1125 |
|
|
$self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) { |
1126 |
wakaba |
1.7 |
$r = chr $self->{value}; |
1127 |
|
|
#} else { # general entity ref. or parameter entity ref. |
1128 |
|
|
# ## TODO: how implement? is this ok? |
1129 |
|
|
# my $em = $self->_get_entity_manager; |
1130 |
|
|
# $r = $em->get_entity_value ($self->{local_name}, namespace_uri => $self->{namespace_uri}); |
1131 |
|
|
#} |
1132 |
|
|
} elsif ($self->{type} eq '#declaration' |
1133 |
|
|
&& ($self->{namespace_uri} eq $NS{SGML}.'entity' |
1134 |
|
|
|| $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) { |
1135 |
|
|
## TODO: |
1136 |
|
|
$r = $self->get_attribute ('value')->inner_text; |
1137 |
|
|
} else { # not #reference nor #declaration(ENTITY) |
1138 |
wakaba |
1.8 |
if (ref ($self->{value}) && eval q($self->{value}->_CLASS_NAME eq $self->_CLASS_NAME)) { |
1139 |
wakaba |
1.7 |
unshift @{$self->{node}}, $self->{value}; |
1140 |
|
|
undef $self->{value}; |
1141 |
|
|
} else { |
1142 |
|
|
$r = $self->{value}; |
1143 |
|
|
} |
1144 |
|
|
if ($o{output_ref_as_is}) { ## output as if RCDATA |
1145 |
|
|
$r =~ s/&/&/g; |
1146 |
|
|
for my $node (@{$self->{node}}) { |
1147 |
|
|
my $nt = $node->node_type; |
1148 |
|
|
if ($nt eq '#reference' || $nt eq '#xml') { |
1149 |
|
|
$r .= $node->outer_xml; |
1150 |
|
|
} elsif ($nt ne '#attribute') { |
1151 |
|
|
$r .= map {s/&/&/g; $_} $node->inner_text; |
1152 |
|
|
} |
1153 |
|
|
} |
1154 |
|
|
} else { |
1155 |
|
|
for (@{$self->{node}}) { |
1156 |
|
|
$r .= $_->inner_text unless $_->node_type eq '#attribute'; |
1157 |
wakaba |
1.2 |
} |
1158 |
|
|
} |
1159 |
wakaba |
1.1 |
} |
1160 |
|
|
$r; |
1161 |
|
|
} |
1162 |
|
|
|
1163 |
wakaba |
1.9 |
sub _is_same_class ($$) { |
1164 |
|
|
my ($self, $something) = @_; |
1165 |
|
|
return 0 if !ref $something |
1166 |
|
|
|| ref ($something) eq 'ARRAY' |
1167 |
|
|
|| ref ($something) eq 'HASH' |
1168 |
|
|
|| ref ($something) eq 'CODE'; |
1169 |
|
|
if (eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME}) { |
1170 |
|
|
return 1; |
1171 |
|
|
} else { |
1172 |
|
|
return 0; |
1173 |
|
|
} |
1174 |
|
|
} |
1175 |
|
|
|
1176 |
|
|
sub root_node ($) { |
1177 |
|
|
my $self = shift; |
1178 |
|
|
if ($self->{type} eq '#document') { |
1179 |
|
|
return $self; |
1180 |
|
|
} elsif (ref $self->{parent}) { |
1181 |
|
|
return $self->{parent}->root_node; |
1182 |
|
|
} else { |
1183 |
|
|
return $self; |
1184 |
|
|
} |
1185 |
wakaba |
1.1 |
} |
1186 |
|
|
|
1187 |
wakaba |
1.7 |
sub _get_entity_manager ($) { |
1188 |
|
|
my $self = shift; |
1189 |
|
|
if ($self->{type} eq '#document') { |
1190 |
|
|
unless ($self->{flag}->{smx__entity_manager}) { |
1191 |
|
|
require SuikaWiki::Markup::XML::EntityManager; |
1192 |
|
|
$self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self); |
1193 |
|
|
} |
1194 |
|
|
return $self->{flag}->{smx__entity_manager}; |
1195 |
|
|
} elsif (ref $self->{parent}) { |
1196 |
|
|
return $self->{parent}->_get_entity_manager; |
1197 |
|
|
} else { |
1198 |
|
|
unless ($self->{flag}->{smx__entity_manager}) { |
1199 |
|
|
require SuikaWiki::Markup::XML::EntityManager; |
1200 |
|
|
$self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self); |
1201 |
|
|
} |
1202 |
|
|
return $self->{flag}->{smx__entity_manager}; |
1203 |
|
|
} |
1204 |
|
|
} |
1205 |
|
|
|
1206 |
wakaba |
1.5 |
sub _CLASS_NAME ($) { |
1207 |
|
|
'SuikaWiki::Markup::XML'; |
1208 |
|
|
} |
1209 |
|
|
|
1210 |
wakaba |
1.1 |
# $s = $x->_entitize ($s) |
1211 |
wakaba |
1.4 |
sub _entitize ($$;%) { |
1212 |
|
|
my ($self, $s, %o) = (shift, shift, @_); |
1213 |
wakaba |
1.1 |
$s =~ s/&/&/g; |
1214 |
|
|
$s =~ s/</</g; |
1215 |
|
|
$s =~ s/>/>/g; |
1216 |
|
|
$s =~ s/"/"/g; |
1217 |
wakaba |
1.4 |
$s =~ s/%/%/g if $o{percent}; |
1218 |
|
|
$s =~ s/'/'/g if $o{apos}; |
1219 |
wakaba |
1.7 |
$s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&#%d;', ord $1/ge; |
1220 |
wakaba |
1.1 |
$s; |
1221 |
|
|
} |
1222 |
|
|
|
1223 |
wakaba |
1.2 |
# 1/0 = $x->_check_name ($s) |
1224 |
|
|
sub _check_name ($$) { |
1225 |
|
|
my $self = shift; |
1226 |
|
|
my $s = shift; |
1227 |
|
|
return $Cache{name}->{$s} if defined $Cache{name}->{$s}; |
1228 |
|
|
if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) { |
1229 |
|
|
# \p{...}('*'/'+'/'{n,}') does not work... |
1230 |
|
|
$Cache{name}->{$s} = 1; |
1231 |
|
|
1; |
1232 |
|
|
} else { |
1233 |
|
|
$Cache{name}->{$s} = 0; |
1234 |
|
|
0; |
1235 |
|
|
} |
1236 |
|
|
} |
1237 |
wakaba |
1.1 |
# 1/0 = $x->_check_ncname ($s) |
1238 |
|
|
sub _check_ncname ($$) { |
1239 |
|
|
my $self = shift; |
1240 |
|
|
my $s = shift; |
1241 |
|
|
return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s}; |
1242 |
|
|
if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) { |
1243 |
|
|
# \p{...}('*'/'+'/'{n,}') does not work... |
1244 |
|
|
$Cache{ncname}->{$s} = 1; |
1245 |
|
|
1; |
1246 |
|
|
} else { |
1247 |
|
|
$Cache{ncname}->{$s} = 0; |
1248 |
|
|
0; |
1249 |
|
|
} |
1250 |
|
|
} |
1251 |
|
|
|
1252 |
|
|
# 1/0 = $x->_check_namespace_prefix ($s) |
1253 |
|
|
sub _check_namespace_prefix ($$) { |
1254 |
|
|
my $self = shift; |
1255 |
|
|
my $s = shift; |
1256 |
|
|
return 0 unless defined $s; |
1257 |
|
|
return 1 if $s eq ''; |
1258 |
|
|
substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':'; |
1259 |
|
|
$self->_check_ncname ($s); |
1260 |
|
|
} |
1261 |
|
|
|
1262 |
wakaba |
1.2 |
## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have |
1263 |
|
|
## multiple parents. |
1264 |
|
|
## TODO: normalize namespace URI (removing non URI chars) |
1265 |
|
|
|
1266 |
|
|
sub flag ($$;$) { |
1267 |
|
|
my ($self, $name, $value) = @_; |
1268 |
|
|
if (defined $value) { |
1269 |
|
|
$self->{flag}->{$name} = $value; |
1270 |
|
|
} |
1271 |
|
|
$self->{flag}->{$name}; |
1272 |
|
|
} |
1273 |
|
|
|
1274 |
wakaba |
1.3 |
sub option ($$;$) { |
1275 |
|
|
my ($self, $name, $value) = @_; |
1276 |
|
|
if (defined $value) { |
1277 |
|
|
$self->{option}->{$name} = $value; |
1278 |
|
|
} |
1279 |
|
|
$self->{option}->{$name}; |
1280 |
|
|
} |
1281 |
|
|
|
1282 |
wakaba |
1.1 |
=back |
1283 |
|
|
|
1284 |
wakaba |
1.2 |
=head1 NODE TYPES |
1285 |
|
|
|
1286 |
|
|
=over 4 |
1287 |
|
|
|
1288 |
|
|
=item #attribute |
1289 |
|
|
|
1290 |
|
|
Attribute. Its XML representation takes the form of NAME="VALUE". |
1291 |
|
|
|
1292 |
|
|
=item #comment |
1293 |
|
|
|
1294 |
|
|
Comment declarement. <!-- --> |
1295 |
|
|
|
1296 |
|
|
=item #declarement |
1297 |
|
|
|
1298 |
|
|
SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc. |
1299 |
|
|
<!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value"> |
1300 |
|
|
|
1301 |
|
|
=item #element |
1302 |
|
|
|
1303 |
|
|
Element. Its XML representation consists of start tag, content and end tag, |
1304 |
|
|
like <TYPE>content</TYPE>. |
1305 |
|
|
|
1306 |
|
|
=item #fragment |
1307 |
|
|
|
1308 |
|
|
Fragment of nodes. It's similar to DOM's fragment node. |
1309 |
|
|
|
1310 |
|
|
=item #pi |
1311 |
|
|
|
1312 |
|
|
Prosessing instruction. <?NAME VALUE?> |
1313 |
|
|
|
1314 |
|
|
=item #reference |
1315 |
|
|
|
1316 |
|
|
Character reference or general or parameter entity reference. |
1317 |
|
|
&#nnnn;, &#xhhhh;, &name;, %name;. |
1318 |
|
|
|
1319 |
|
|
=item #section |
1320 |
|
|
|
1321 |
|
|
Markup section. CDATA, INCLUDE and IGNORE are supported by XML. |
1322 |
|
|
<![%type;[...]]> |
1323 |
|
|
|
1324 |
|
|
=item #text |
1325 |
|
|
|
1326 |
|
|
Text. |
1327 |
|
|
|
1328 |
|
|
=item #xml |
1329 |
|
|
|
1330 |
|
|
Preformatted XML text. |
1331 |
|
|
|
1332 |
|
|
=cut |
1333 |
|
|
|
1334 |
wakaba |
1.4 |
=head1 RESTRICTIONS |
1335 |
|
|
|
1336 |
|
|
=over 4 |
1337 |
|
|
|
1338 |
|
|
=item XML without XML Namespace is not supported. |
1339 |
|
|
|
1340 |
|
|
=item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared. |
1341 |
|
|
|
1342 |
|
|
For example, next code generates invalid (non-well-formed) XML Namespace document. |
1343 |
|
|
|
1344 |
|
|
my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType'); |
1345 |
|
|
print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType> |
1346 |
|
|
|
1347 |
|
|
So you must write like: |
1348 |
|
|
|
1349 |
|
|
my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType'); |
1350 |
|
|
$x->define_new_namespace ('' => ''); |
1351 |
|
|
print $x; # <elementType xmlns=""></elementType> |
1352 |
|
|
|
1353 |
|
|
=back |
1354 |
|
|
|
1355 |
wakaba |
1.1 |
=head1 LICENSE |
1356 |
|
|
|
1357 |
|
|
Copyright 2003 Wakaba <w@suika.fam.cx> |
1358 |
|
|
|
1359 |
|
|
This program is free software; you can redistribute it and/or |
1360 |
|
|
modify it under the same terms as Perl itself. |
1361 |
|
|
|
1362 |
|
|
=cut |
1363 |
|
|
|
1364 |
wakaba |
1.9 |
1; # $Date: 2003/06/27 13:07:44 $ |