/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (hide annotations) (download)
Sat Jul 5 07:26:05 2003 UTC (22 years ago) by wakaba
Branch: MAIN
Changes since 1.10: +6 -6 lines
XML Catalog support

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.11 our $VERSION = do{my @r=(q$Revision: 1.10 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.9 use overload '""' => \&outer_xml,
22 wakaba 1.2 fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.1 our %Namespace_URI_to_prefix = (
25     'DAV:' => [qw/dav webdav/],
26     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
27     'http://icl.com/saxon' => [qw/saxon/],
28     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
29     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
30     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
31     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
32     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
33 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
34 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
35     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
36     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
37     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
38     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
39     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
40     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
41     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
42     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
43 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
44 wakaba 1.6 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/],
45 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
46 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
47 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
48     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
49     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
50     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
51     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
52 wakaba 1.7 'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/],
53 wakaba 1.1 );
54     my %Cache;
55 wakaba 1.10 our %NS = (
56 wakaba 1.7 SGML => 'urn:x-suika-fam-cx:markup:sgml:',
57 wakaba 1.10 internal_attr_duplicate => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/invalid-attr#',
58     internal_invalid_sysid => 'http://system.identifier.invalid/',
59     internal_ns_invalid => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/unknown-namespace#',
60 wakaba 1.8 xml => 'http://www.w3.org/XML/1998/namespace',
61     xmlns => 'http://www.w3.org/2000/xmlns/',
62 wakaba 1.7 );
63 wakaba 1.1
64     =head1 METHODS
65    
66     =over 4
67    
68     =item $x = SuikaWiki::Markup::XML->new (%options)
69    
70     Returns new instance of the module. It is itself a node.
71    
72 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
73 wakaba 1.1
74     =cut
75    
76     sub new ($;%) {
77     my $class = shift;
78     my $self = bless {@_}, $class;
79     $self->{type} ||= '#element';
80 wakaba 1.5 if ($self->{qname}) {
81     ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
82     $self->{_qname} = $self->{qname};
83     }
84     if (defined $self->{namespace_prefix}) {
85     $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
86     && substr ($self->{namespace_prefix}, -1) ne ':';
87 wakaba 1.10 $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri}
88     if defined $self->{namespace_uri};
89 wakaba 1.5 }
90 wakaba 1.4 for (qw/local_name value/) {
91 wakaba 1.10 if ($self->_is_same_class ($self->{$_})) {
92 wakaba 1.2 $self->{$_}->{parent} = $self;
93     }
94     }
95 wakaba 1.1 $self->{node} ||= [];
96     $self;
97     }
98    
99 wakaba 1.5 sub _ns_parse_qname ($$) {
100 wakaba 1.10 my $qname = $_[1];
101 wakaba 1.5 if ($qname =~ /:/) {
102     return split /:/, $qname, 2;
103     } else {
104     return (undef, $qname);
105     }
106     }
107 wakaba 1.1
108     =item $x->append_node ($node)
109    
110     Appending given node to the object (as the last child).
111     If the type of given node is C<#fragment>, its all children, not the node
112     itself, are appended.
113    
114     This method returns the appended node unless the type of given node is C<#fragment>.
115     In such cases, this node (C<$x>) is returned.
116    
117 wakaba 1.2 Available options: C<node_or_text>.
118    
119 wakaba 1.1 =cut
120    
121 wakaba 1.2 sub append_node ($$;%) {
122 wakaba 1.1 my $self = shift;
123 wakaba 1.2 my ($new_node, %o) = @_;
124     unless (ref $new_node) {
125     if ($o{node_or_text}) {
126     return $self->append_text ($new_node);
127     } else {
128 wakaba 1.10 die "append_node: Invalid node";
129 wakaba 1.2 }
130     }
131 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
132     for (@{$new_node->{node}}) {
133     push @{$self->{node}}, $_;
134     $_->{parent} = $self;
135     }
136     $self;
137     } else {
138     push @{$self->{node}}, $new_node;
139     $new_node->{parent} = $self;
140     $new_node;
141     }
142     }
143    
144     =item $new_node = $x->append_new_node (%options)
145    
146     Appending a new node. The new node is returned.
147    
148     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
149    
150     =cut
151    
152     sub append_new_node ($;%) {
153     my $self = shift;
154 wakaba 1.10 my $new_node = __PACKAGE__->new (@_);
155 wakaba 1.1 push @{$self->{node}}, $new_node;
156     $new_node->{parent} = $self;
157     $new_node;
158     }
159    
160     =item $new_node = $x->append_text ($text)
161    
162     Appending given text as a new text node. The new text node is returned.
163    
164     =cut
165    
166     sub append_text ($$;%) {
167     my $self = shift;
168     my $s = shift;
169 wakaba 1.10 $self->append_new_node (type => '#text', value => $s);
170 wakaba 1.1 }
171    
172 wakaba 1.2 sub append_baretext ($$;%) {
173     my $self = shift;
174     my $s = shift;
175     $self->append_new_node (type => '#xml', value => $s);
176     }
177    
178 wakaba 1.9 sub remove_child_node ($$) {
179     my ($self, $node) = @_;
180     return unless ref $node;
181     $node = overload::StrVal ($node);
182     $self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}];
183     }
184    
185 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
186    
187     Returns the attribute node whose local-name is C<$local_name>.
188    
189     Available options: C<namespace_uri>, C<make_new_node>.
190    
191     =cut
192    
193     sub get_attribute ($$;%) {
194 wakaba 1.10 my ($self, $name, %o) = @_;
195 wakaba 1.1 for (@{$self->{node}}) {
196 wakaba 1.10 if ($_->{type} eq '#attribute'
197     && $_->{local_name} eq $name
198     && $o{namespace_uri} eq $_->{namespace_uri}) {
199 wakaba 1.1 return $_;
200     }
201     }
202     ## Node is not exist
203     if ($o{make_new_node}) {
204 wakaba 1.10 return $self->append_new_node (type => '#attribute', local_name => $name,
205     namespace_uri => $o{namespace_uri});
206 wakaba 1.1 } else {
207     return undef;
208     }
209     }
210    
211     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
212    
213     Set the value of the attribute. The attribute node is returned.
214    
215     Available options: C<namespace_uri>.
216    
217     =cut
218    
219     sub set_attribute ($$$;%) {
220 wakaba 1.10 my ($self, $name, $val, %o) = @_;
221     if ({qw/ARRAY 1 HASH 1 CODE 1/}->{ref ($val)}) {
222 wakaba 1.7 ## TODO: common error handling
223     require Carp;
224     Carp::croak "set_attribute: new attribute value must be string or blessed object";
225     }
226 wakaba 1.1 for (@{$self->{node}}) {
227 wakaba 1.10 if ($_->{type} eq '#attribute'
228     && $_->{local_name} eq $name
229     && $o{namespace_uri} eq $_->{namespace_uri}) {
230 wakaba 1.1 $_->{value} = $val;
231     $_->{node} = [];
232     return $_;
233     }
234     }
235 wakaba 1.10 return $self->append_new_node (type => '#attribute', local_name => $name,
236     value => $val, namespace_uri => $o{namespace_uri});
237 wakaba 1.1 }
238    
239     =item \@children = $x->child_nodes
240    
241     Returns an array reference to child nodes.
242    
243 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
244 wakaba 1.1
245 wakaba 1.4 Returns or set the local-name.
246    
247     =item $uri = $x->namespace_uri ([$new_uri])
248    
249     Returns or set namespace name (URI) of the element or the attribute
250 wakaba 1.1
251 wakaba 1.5 =item $uri = $x->namespace_prefix ([$new_prefix])
252    
253     Returns or set namespace prefix of the element or the attribute.
254     You may give C<$new_prefix> in form either 'foo' or 'foo:'.
255     To indicate "default" prefix, use '' (length == 0 string).
256    
257 wakaba 1.1 =item $type = $x->node_type
258    
259     Returns the node type.
260    
261     =item $node = $x->parent_node
262    
263     Returns the parent node. If there is no parent node, undef is returned.
264    
265     =cut
266    
267 wakaba 1.10 sub child_nodes ($) { $_[0]->{node} }
268 wakaba 1.4 sub local_name ($;$) {
269     my ($self, $newname) = @_;
270 wakaba 1.10 $self->{local_name} = $newname if $newname;
271 wakaba 1.4 if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
272     $self->{local_name}->{local_name};
273     } else {
274     $self->{local_name}
275     }
276     }
277 wakaba 1.10 sub node_type ($) { $_[0]->{type} }
278     sub parent_node ($) { $_[0]->{parent} }
279 wakaba 1.1
280 wakaba 1.4 ## TODO: obsolete
281     sub target_name ($;$) {
282     my ($self, $new) = @_;
283 wakaba 1.10 $self->{target_name} = $new if defined $new;
284 wakaba 1.4 $self->{target_name};
285     }
286    
287     sub namespace_uri ($;$) {
288     my ($self, $new_uri) = @_;
289 wakaba 1.10 $self->{namespace_uri} = $new_uri if defined $new_uri;
290 wakaba 1.4 $self->{namespace_uri};
291     }
292 wakaba 1.5 sub namespace_prefix ($;$) {
293     my ($self, $new_pfx) = @_;
294     if (defined $new_pfx && $self->{namespace_uri}) {
295     $new_pfx .= ':' if $new_pfx;
296     $self->{namespace_prefix} = $new_pfx;
297     $self->{ns}->{$new_pfx} = $self->{namespace_uri};
298     }
299     $self->_get_namespace_prefix ($self->{namespace_uri});
300     }
301 wakaba 1.4
302 wakaba 1.1 =item $i = $x->count
303    
304     Returns the number of child nodes.
305    
306     =cut
307    
308     # TODO: support counting by type
309     sub count ($;@) {
310 wakaba 1.10 (defined $_[0]->{value} ? 1 : 0) + scalar @{$_[0]->{node}};
311 wakaba 1.1 }
312    
313     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
314     sub _get_namespace_prefix ($$;%) {
315 wakaba 1.10 my ($self, $uri, %o) = @_;
316 wakaba 1.1 if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
317     return $p if $self->_prefix_to_uri ($p) eq $uri;
318     } if ($Namespace_URI_to_prefix{$uri}) {
319     for (@{$Namespace_URI_to_prefix{$uri}}) {
320     my $pfx = $_; $pfx .= ':' if $pfx;
321     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
322     return $self->_uri_to_prefix ($uri => $pfx, %o);
323     }
324     }
325     } else {
326     my ($u_r_i, $pfx) = ($uri);
327 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
328     my @u_r_i = split / /, $u_r_i;
329     for (reverse @u_r_i) {
330     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
331     my $p_f_x = $1 . ':';
332     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
333     unless ($self->_prefix_to_uri ($p_f_x)) {
334     $pfx = $p_f_x;
335     last;
336     }
337 wakaba 1.1 }
338     }
339     if ($pfx) {
340     return $self->_uri_to_prefix ($uri => $pfx, %o);
341     } else {
342     while (1) {
343     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
344     unless ($self->_prefix_to_uri ($pfx)) {
345     return $self->_uri_to_prefix ($uri => $pfx, %o);
346     }
347     }
348     }
349     }
350     }
351    
352 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
353     my ($self, $prefix => $uri, %o) = @_;
354     return undef unless $self->_check_namespace_prefix ($prefix);
355     $self->{ns}->{$prefix} = $uri;
356     $self->_prefix_to_uri ($prefix);
357     }
358    
359     ## TODO: removing ns declare (1.1) support
360     # $uri or undef = $x->_prefix_to_uri ($prefix)
361 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
362 wakaba 1.4 my ($self, $prefix, %o) = @_;
363 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
364     if (uc (substr $prefix, 0, 3) eq 'XML') {
365 wakaba 1.8 return $NS{xml} if $prefix eq 'xml:';
366     return $NS{xmlns} if $prefix eq 'xmlns:';
367 wakaba 1.2 }
368     if (defined $self->{ns}->{$prefix}) {
369 wakaba 1.1 $self->{ns}->{$prefix};
370     } elsif (ref $self->{parent}) {
371     shift; # $self
372     $self->{parent}->_prefix_to_uri (@_);
373     } else {
374     undef;
375     }
376     }
377    
378     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
379     # use_no_prefix (default: 1): Allow default namespace (no prefix).
380     sub _uri_to_prefix ($$;$%) {
381     my ($self, $uri, $new_prefix, %o) = @_;
382     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
383     $self->{ns}->{$new_prefix} = $uri;
384     $new_prefix;
385     } else {
386 wakaba 1.8 return 'xml:' if $uri eq $NS{xml};
387     return 'xmlns:' if $uri eq $NS{xmlns};
388 wakaba 1.1 for (keys %{$self->{ns}||{}}) {
389     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
390     return $_ if $self->{ns}->{$_} eq $uri;
391     }
392 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
393 wakaba 1.1 shift; # $self
394     $self->{parent}->_uri_to_prefix (@_);
395     } else {
396     undef;
397     }
398     }
399     }
400    
401     =item $x->define_new_namespace ($prefix => $uri)
402    
403     Defines a new XML Namespace. This method is useful for root or section-level
404     element node.
405    
406     Returned value is unspecified in this version of this module.
407    
408     =cut
409    
410 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
411 wakaba 1.1 sub define_new_namespace ($$$) {
412     my ($self, $prefix, $uri) = @_;
413     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
414     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
415 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
416 wakaba 1.1 } else {
417     undef;
418     }
419     }
420    
421 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
422    
423     Query whether the namespace prefix is defined or not.
424     If defined, return namespace name (URI).
425    
426     =cut
427    
428     sub defined_namespace_prefix ($$) {
429     my ($self, $prefix) = @_;
430 wakaba 1.10 $prefix .= ':' if $prefix;
431 wakaba 1.4 $self->_prefix_to_uri ($prefix);
432     }
433    
434 wakaba 1.1 =item $qname = $x->qname
435    
436     Returns QName ((namespace-)qualified name) of the element type.
437     Undef is retuened when the type does not have its QName
438     (ie. when type is neither C<#element> or C<#attribute>).
439    
440     =cut
441    
442     sub qname ($) {
443     my $self = shift;
444     if ($self->_check_ncname ($self->{local_name})) {
445     if ($self->{type} eq '#element') {
446     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
447     unless $self->{_qname};
448     return $self->{_qname};
449     } elsif ($self->{type} eq '#attribute') {
450     return $self->attribute_name;
451     }
452     }
453     undef;
454     }
455    
456 wakaba 1.9 sub merge_external_subset ($) {
457     my $self = shift;
458     unless ($self->{type} eq '#declaration' && $self->{namespace_uri} eq $NS{SGML}.'doctype') {
459     return unless $self->{type} eq '#document' || $self->{type} eq '#fragment';
460     for (@{$self->{node}}) {
461     $_->merge_external_subset;
462     }
463     return;
464     }
465     my $xsub = $self->get_attribute ('external-subset');
466     return unless ref $xsub;
467     for (@{$xsub->{node}}) {
468     $_->{parent} = $self;
469     }
470     push @{$self->{node}}, @{$xsub->{node}};
471     $self->remove_child_node ($xsub);
472     $self->remove_child_node ($self->get_attribute ('PUBLIC'));
473     $self->remove_child_node ($self->get_attribute ('SYSTEM'));
474     $self->remove_marked_section;
475     }
476    
477     sub remove_marked_section ($) {
478     my $self = shift;
479     my @node;
480     for (@{$self->{node}}) {
481     $_->remove_marked_section;
482     }
483     for (@{$self->{node}}) {
484     if ($_->{type} ne '#section') {
485     push @node, $_;
486     } else {
487     my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text;
488     if ($status eq 'CDATA') {
489     $_->{type} = '#text';
490     $_->remove_child_node ($_->get_attribute ('status'));
491     push @node, $_;
492     } elsif ($status ne 'IGNORE') { # INCLUDE
493     for my $e (@{$_->{node}}) {
494     if ($e->{type} ne '#attribute') {
495     $e->{parent} = $self;
496     push @node, $e;
497     }
498     }
499     }
500     }
501     }
502     $self->{node} = \@node;
503     }
504    
505 wakaba 1.7 sub remove_references ($) {
506     my $self = shift;
507     my @node;
508 wakaba 1.8 for (@{$self->{node}}) {
509     $_->remove_references;
510     }
511 wakaba 1.7 for (@{$self->{node}}) {
512     if ($_->{type} ne '#reference'
513     || ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
514     push @node, $_;
515     } else {
516     if ($_->{namespace_uri} =~ /char/) {
517     my $e = ref ($_)->new (type => '#text', value => chr $_->{value});
518     $e->{parent} = $self;
519     push @node, $e;
520     } elsif ($_->{flag}->{smxp__ref_expanded}) {
521     for my $e (@{$_->{node}}) {
522 wakaba 1.8 if ($e->{type} ne '#attribute') {
523     $e->{parent} = $self;
524     push @node, $e;
525     }
526 wakaba 1.7 }
527     } else { ## reference is not expanded
528     push @node, $_;
529     }
530     }
531 wakaba 1.8 $_->{flag}->{smxp__defined_with_param_ref} = 0
532     if $_->{flag}->{smxp__defined_with_param_ref};
533 wakaba 1.7 }
534     $self->{node} = \@node;
535     }
536    
537 wakaba 1.8 sub resolve_relative_uri ($;$%) {
538     require URI;
539     my ($self, $rel, %o) = @_;
540     my $base = $self->get_attribute ('base', namespace_uri => $NS{xml});
541     $base = ref ($base) ? $base->inner_text : undef;
542     if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative
543     $base = $self->_resolve_relative_uri_by_parent ($base, \%o);
544     }
545     eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...)
546     return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false
547     } or return $rel;
548     }
549     sub _resolve_relative_uri_by_parent ($$$) {
550     my ($self, $rel, $o) = @_;
551     if (ref $self->{parent}) {
552     if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') {
553     ## This case is necessary to work with
554     ## <element> <!-- element can have base URI -->
555     ## text <!-- text cannot have base URI -->
556     ## &ent; <!-- ref's base URI is referred entity's one (in this module) -->
557     ## <!-- expantion of ent -->
558     ## entity's text <!-- text cannot have base URI, so use <element>'s one -->
559     ## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one -->
560     ## </element>
561     return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o);
562     } else {
563     return $self->{parent}->resolve_relative_uri ($rel, %$o);
564     }
565     } else {
566     return $rel;
567     }
568     }
569     sub base_uri ($;$) {
570     my ($self, $new_uri) = @_;
571     my $base;
572     if (defined $new_uri) {
573     $base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml});
574     }
575     $base ||= $self->get_attribute ('base', namespace_uri => $NS{xml});
576     ref ($base) ? $base->inner_text : undef;
577     }
578    
579 wakaba 1.1 =item $tag = $x->start_tag
580    
581 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
582 wakaba 1.1 for C<#comment> nodes).
583    
584     =cut
585    
586     sub start_tag ($) {
587     my $self = shift;
588     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
589     my $r = '<';
590     $r .= $self->qname;
591     for (@{$self->{node}}) {
592     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
593     }
594     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
595     if ($prefix) {
596     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
597     } else {
598     $r .= ' xmlns';
599     }
600     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
601     }
602     $r .= '>';
603     $r;
604     } elsif ($self->{type} eq '#comment') {
605 wakaba 1.4 '<!--';
606     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
607     '<?' . ($self->{local_name});
608 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
609 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
610 wakaba 1.2 '&#x';
611 wakaba 1.10 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
612 wakaba 1.2 '&#';
613 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
614 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
615 wakaba 1.2 '%';
616     } else {
617     '&';
618     }
619     } else { # error
620     '';
621     }
622 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
623     '<!' . {
624 wakaba 1.8 $NS{SGML}.'attlist' => 'ATTLIST',
625     $NS{SGML}.'doctype' => 'DOCTYPE',
626     $NS{SGML}.'element' => 'ELEMENT',
627     $NS{SGML}.'entity' => 'ENTITY',
628     $NS{SGML}.'entity:parameter' => 'ENTITY',
629     $NS{SGML}.'notation' => 'NOTATION',
630 wakaba 1.10 }->{$self->{namespace_uri}} . ' ' .
631     ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ?
632 wakaba 1.7 ($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):'');
633 wakaba 1.2 } elsif ($self->{type} eq '#section') {
634 wakaba 1.9 '<![';
635 wakaba 1.1 } else {
636     '';
637     }
638     }
639    
640     =item $tag = $x->end_tag
641    
642 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
643 wakaba 1.1 for C<#comment> nodes).
644    
645     =cut
646    
647     sub end_tag ($) {
648     my $self = shift;
649     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
650     '</' . $self->qname . '>';
651     } elsif ($self->{type} eq '#comment') {
652 wakaba 1.4 '-->';
653 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
654     '?>';
655 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
656     ';';
657 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
658     '>';
659 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
660 wakaba 1.4 '>';
661 wakaba 1.2 } elsif ($self->{type} eq '#section') {
662 wakaba 1.9 ']]>';
663 wakaba 1.1 } else {
664     '';
665     }
666     }
667    
668     =item $tag = $x->attribute_name
669    
670     Returns the attribute name.
671    
672     =cut
673    
674     sub attribute_name ($) {
675     my $self = shift;
676     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
677     ($self->{namespace_uri} ?
678     (ref $self->{parent} ? $self->{parent} : $self)
679     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
680     .$self->{local_name};
681     } else {
682     '';
683     }
684     }
685    
686     =item $tag = $x->attribute_value
687    
688     Returns the attribute value.
689    
690     =cut
691    
692 wakaba 1.4 sub attribute_value ($;%) {
693 wakaba 1.10 my ($self, %o) = @_;
694 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
695 wakaba 1.5 my $r = '"';
696 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
697     $r .= $self->_entitize ($self->{value}) unless $isc;
698     for (($isc?$self->{value}:()), @{$self->{node}}) {
699     my $nt = $_->{type};
700 wakaba 1.4 if ($nt eq '#reference' || $nt eq '#xml') {
701     $r .= $_->outer_xml;
702     } elsif ($nt ne '#attribute') {
703 wakaba 1.10 $r .= $self->_entitize ($_->inner_text);
704 wakaba 1.4 }
705     }
706     return $r . '"';
707     } else {
708     '';
709     }
710     }
711    
712     sub entity_value ($;%) {
713 wakaba 1.10 my ($self, %o) = @_;
714 wakaba 1.4 my $_entitize = sub {
715     my $s = shift;
716 wakaba 1.7 $s =~ s/&/&#x26;/g;
717     $s =~ s/&#x26;(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g;
718 wakaba 1.4 $s =~ s/%/&#x25;/g;
719 wakaba 1.7 $s =~ s/"/&#x22;/g;
720 wakaba 1.4 $s;
721     };
722     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
723     my $r = '"' . &$_entitize ($self->{value});
724     for (@{$self->{node}}) {
725 wakaba 1.10 my $nt = $_->{type};
726 wakaba 1.4 if ($nt eq '#reference' || $nt eq '#xml') {
727     $r .= $_->outer_xml;
728     } elsif ($nt ne '#attribute') {
729     $r .= &$_entitize ($_->inner_text);
730 wakaba 1.1 }
731     }
732 wakaba 1.4 return $r . '"';
733 wakaba 1.1 } else {
734     '';
735     }
736     }
737    
738 wakaba 1.7 ## This method should be called only from SuikaWiki::Markup::XML::* family modules,
739     ## since this is NOT a FORMAL interface.
740     sub _entity_parameter_literal_value ($;%) {
741     my $self = shift;
742 wakaba 1.10 my $r = '';
743     my $isc = $self->_is_same_class ($self->{value});
744     $r = $self->{value} unless $isc;
745     for (($isc?$self->{value}:()), @{$self->{node}}) {
746     my $nt = $_->{type};
747     if ($nt eq '#xml') {
748     $r .= $_->outer_xml;
749     } elsif ($nt ne '#attribute') {
750     $r .= $_->inner_text;
751 wakaba 1.7 }
752 wakaba 1.10 }
753     $r;
754 wakaba 1.7 }
755    
756 wakaba 1.1 =item $tag = $x->attribute
757    
758     Returns the attribute (name and value pair).
759    
760     =cut
761    
762     sub attribute ($) {
763     my $self = shift;
764     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
765     $self->attribute_name . '=' . $self->attribute_value;
766     } else {
767     '';
768     }
769     }
770    
771 wakaba 1.2 sub external_id ($;%) {
772     my $self = shift;
773     my %o = @_;
774     my ($pubid, $sysid, $ndata);
775     for (@{$self->{node}}) {
776 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
777 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
778 wakaba 1.4 $pubid = $_->inner_text;
779 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
780 wakaba 1.4 $sysid = $_->inner_text;
781 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
782 wakaba 1.4 $ndata = $_->inner_text;
783     undef $ndata unless $self->_check_ncname ($ndata);
784 wakaba 1.2 }
785     }
786     }
787     my $r = '';
788 wakaba 1.9 if (defined $pubid) {
789 wakaba 1.4 $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
790 wakaba 1.9 $pubid = '"' . $pubid . '"';
791 wakaba 1.4 }
792 wakaba 1.9 if (defined $sysid) {
793 wakaba 1.10 if (index ($sysid, '"') > -1) {
794     if (index ($sysid, "'") > -1) {
795     $sysid =~ s/"/%22/; $sysid = '"' . $sysid . '"';
796 wakaba 1.4 } else {
797     $sysid = "'" . $sysid . "'";
798     }
799     } else {
800     $sysid = '"' . $sysid . '"';
801     }
802     }
803 wakaba 1.2 if ($pubid && $sysid) {
804 wakaba 1.9 $r = 'PUBLIC ' . $pubid . ' ' . $sysid;
805 wakaba 1.2 } elsif ($sysid) {
806 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
807 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
808 wakaba 1.9 $r = 'PUBLIC ' . $pubid;
809 wakaba 1.2 }
810     if ($r && $ndata && $o{use_ndata}) {
811 wakaba 1.4 $r .= ' NDATA ' . $ndata;
812 wakaba 1.2 }
813     $r;
814     }
815    
816     =item $s = $x->content_spec
817    
818     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
819     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
820    
821     =cut
822    
823     sub content_spec ($) {
824     my $self = shift;
825     if ($self->{type} eq '#element') {
826 wakaba 1.10 my $text = 0;
827     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
828 wakaba 1.2 $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
829    
830     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
831     } elsif ($self->{type} eq '#attribute') {
832     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
833     my $default = $self->{default_decl};
834     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
835     unless ($default) {
836     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
837     }
838     return $attdef . $default;
839     }
840     }
841    
842 wakaba 1.1 =item $tag = $x->inner_xml
843    
844     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
845     element content without start- and end-tags is returned.)
846    
847     Note that for not all node types the behavior of this method is defined.
848     For example, returned value of C<#attribute> might be unexpected one
849     in this version of this module.
850    
851     =cut
852    
853 wakaba 1.3 sub inner_xml ($;%) {
854 wakaba 1.10 my ($self, %o) = @_;
855 wakaba 1.1 my $r = '';
856     if ($self->{type} eq '#comment') {
857     $r = $self->inner_text;
858     $r =~ s/--/-&#x45;/g;
859     } elsif ($self->{type} eq '#pi') {
860 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
861     if (!$isc && length ($self->{value})) {
862 wakaba 1.1 $r = ' ' . $self->{value};
863 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
864 wakaba 1.1 }
865 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
866 wakaba 1.1 if ($_->node_type eq '#attribute') {
867     $r .= ' ' . $_->attribute;
868     } else {
869     my $s = $_->inner_text;
870 wakaba 1.10 if (length $s) {
871     $s =~ s/\?>/? >/g;
872     $r .= ' ' . $s;
873     }
874 wakaba 1.1 }
875     }
876 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
877 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
878 wakaba 1.2 $r = sprintf '%02X', $self->{value};
879 wakaba 1.10 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
880 wakaba 1.2 $r = sprintf '%02d', $self->{value};
881 wakaba 1.10 } elsif (ref ($self->{local_name}) && $self->{local_name}->{type} eq '#declaration') {
882 wakaba 1.4 $r = $self->{local_name}->{local_name};
883     } elsif ($self->_check_ncname ($self->{local_name})) {
884     $r = ($self->{local_name});
885 wakaba 1.2 } else { # error
886     $r = '';
887     }
888 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
889 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'doctype') {
890 wakaba 1.4 my $root = $self->get_attribute ('qname');
891     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
892     for (@{$self->{parent}->{node}}) {
893     if ($_->{type} eq '#element') {
894     $root = $_->qname;
895     last if $root;
896     }
897     }
898     $root = '#IMPLIED';
899     }) : ($root = '#IMPLIED')); ## error!
900     my ($isub, $xid) = ('', $self->external_id);
901     for (@{$self->{node}}) {
902     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
903 wakaba 1.2 }
904     if ($xid) {
905 wakaba 1.4 $r = $xid;
906     if ($isub) {
907 wakaba 1.7 $r .= " [" . $isub . "]";
908 wakaba 1.4 }
909     } else {
910     if ($isub) {
911 wakaba 1.7 $r = "[" . $isub . "]";
912 wakaba 1.4 } else {
913     $r = "[]";
914     }
915     }
916     $r = $root . ' ' . $r;
917 wakaba 1.9 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity'
918     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter'
919     || $self->{namespace_uri} eq $NS{SGML}.'notation') {
920 wakaba 1.4 my %xid_opt;
921 wakaba 1.7 $r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref}
922     && $self->_check_ncname ($self->{local_name});
923 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
924 wakaba 1.4 #$r = '% ' . $r;
925 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') {
926 wakaba 1.4 $xid_opt{use_ndata} = 1;
927 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') {
928 wakaba 1.4 $xid_opt{allow_pubid_only} = 1;
929     }
930    
931 wakaba 1.10 my ($v, $xid) = ($self->{value});
932     $xid = $self->external_id (%xid_opt) unless $self->{flag}->{smxp__defined_with_param_ref};
933 wakaba 1.4 if ($xid) { ## External ID
934 wakaba 1.2 $r .= $xid;
935 wakaba 1.4 } else { ## EntityValue
936     my $entity_value = $self->get_attribute ('value');
937 wakaba 1.7 undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref};
938 wakaba 1.4 if ($entity_value) { # <!ENTITY foo "bar">
939     $r .= $entity_value->entity_value;
940     } else { ## Parameter entity reference
941 wakaba 1.9 my $isc = $self->_is_same_class ($self->{value});
942     $r .= $self->{value} unless $isc;
943     for (($isc?$self->{value}:()), @{$self->{node}}) {
944 wakaba 1.7 $r .= $_->outer_xml unless $_->{type} eq '#attribute';
945 wakaba 1.4 }
946 wakaba 1.2 }
947 wakaba 1.1 }
948 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'element') {
949 wakaba 1.4 my $qname = $self->get_attribute ('qname');
950     ref $qname ? $qname = $qname->inner_text : undef;
951     if ($qname && $self->_check_name ($qname)) {
952     ## Element type name is defined
953 wakaba 1.10 ## TODO: fix
954 wakaba 1.4 $r = $qname . ' ';
955     my $rs = $self->inner_text (output_ref_as_is => 1);
956     $r .= $rs || 'ANY';
957 wakaba 1.2 } else {
958 wakaba 1.4 #if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') {
959     ### Element prototype is given
960     # $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec;
961     #} elsif () {
962     ### Element type name and contentspec is given
963     # $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY');
964     #} else {
965 wakaba 1.2 ## (Element type name and contetnspac) is given
966 wakaba 1.4 $r .= $self->inner_text (output_ref_as_is => 1);
967     # || 'Name ANY'; # error
968     #}
969 wakaba 1.2 }
970 wakaba 1.7 ## TODO: reform
971 wakaba 1.11 } elsif ($self->{namespace_uri} eq $NS{SGML}.'attlist') {
972     if ($self->_check_name ($self->{local_name})) {
973     $r = $self->{local_name};
974 wakaba 1.2 }
975     my $t = $self->inner_text (output_ref_as_is => 1);
976     $r .= "\n\t" . $t if $t;
977     $r ||= 'Name'; # error!
978     for (@{$self->{node}}) {
979     if ($_->{type} eq '#attribute') {
980     $r .= "\n\t" . $_->content_spec;
981     }
982 wakaba 1.1 }
983 wakaba 1.2 } else { # unknown
984 wakaba 1.3 for (@{$self->{node}}) {
985     $r .= $_->outer_xml;
986     }
987 wakaba 1.1 }
988 wakaba 1.9 } elsif ($self->{type} eq '#section') {
989     my $status = $self->get_attribute ('status', make_new_node => 1)->inner_text;
990     if ($status eq 'CDATA') {
991     $r = $self->inner_text;
992     $r =~ s/]]>/]]>]]<![CDATA[>/g;
993     $r = 'CDATA['.$r;
994     } else {
995     my $sl = $self->get_attribute ('status_list', make_new_node => 1);
996     if ($sl->{flag}->{smxp__defined_with_param_ref}) {
997     my $isc = $self->_is_same_class ($self->{value});
998     $status = '';
999     $status = $sl->{value} unless $isc;
1000     for (($isc?$sl->{value}:()), @{$sl->{node}}) {
1001     $status .= $_->outer_xml unless $_->{type} eq '#attribute';
1002     }
1003     $r = $status.'['.$r;
1004     } elsif ($status) {
1005     $r = $status.'['.$r;
1006     } else {
1007     ## Must be an ignored section
1008     }
1009     my $isc = $self->_is_same_class ($self->{value});
1010     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $self->{value} unless $isc;
1011     for (($isc?$self->{value}:()), @{$self->{node}}) {
1012     if ($_->{type} eq '#text') {
1013     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $_->inner_text; ## Anyway, this is error
1014     } elsif ($_->{type} ne '#attribute') {
1015     $r .= $_->outer_xml;
1016     }
1017     }
1018     }
1019 wakaba 1.1 } else {
1020 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
1021     unless ($isc) {
1022     if ($self->{type} ne '#xml') {
1023     $r = $self->_entitize ($self->{value});
1024     } else {
1025     $r = $self->{value};
1026     }
1027 wakaba 1.1 }
1028 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
1029     my $nt = $_->{type};
1030     if (($self->{option}->{indent})
1031     && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
1032 wakaba 1.1 $r .= "\n";
1033     }
1034     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
1035     }
1036     }
1037     $r;
1038     }
1039    
1040    
1041     =item $tag = $x->outer_xml
1042    
1043     Returns the node in XML syntax.
1044    
1045     =cut
1046    
1047     sub outer_xml ($) {
1048     my $self = shift;
1049     if ($self->{type} eq '#attribute') {
1050 wakaba 1.10 return $self->attribute;
1051 wakaba 1.1 } else {
1052 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
1053 wakaba 1.1 my $r = $self->start_tag;
1054     my $c = $self->inner_xml;
1055 wakaba 1.10 if (!length ($c) && $self->{option}->{use_EmptyElemTag}) {
1056 wakaba 1.4 substr ($r, -1) = ' />';
1057     } else {
1058     if ($c) {
1059     $c =~ s/\n/\n /g;
1060     $r .= "\n " . $c . "\n";
1061     }
1062     $r .= $self->end_tag;
1063 wakaba 1.1 }
1064 wakaba 1.10 return $r;
1065 wakaba 1.1 } else {
1066 wakaba 1.4 my $r = $self->start_tag;
1067     my $c = $self->inner_xml;
1068 wakaba 1.8 if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) {
1069 wakaba 1.4 substr ($r, -1) = ' />';
1070     } else {
1071     $r .= $c . $self->end_tag;
1072     }
1073 wakaba 1.10 return $r;
1074 wakaba 1.11 #return '{'.$self->{type}.': '.$r.'}'; ## DEBUG: show structure
1075 wakaba 1.1 }
1076     }
1077     }
1078    
1079     =item $tag = $x->inner_text
1080    
1081     Returns the text content of the node. (In many case the returned value is same
1082     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
1083     But some classes that inherits this module might implement to return other
1084     value (eg. to return the value of the alt attribute of html:img element).
1085    
1086 wakaba 1.2 Available options: C<output_ref_as_is>.
1087    
1088 wakaba 1.1 =cut
1089    
1090 wakaba 1.2 sub inner_text ($;%) {
1091 wakaba 1.1 my $self = shift;
1092 wakaba 1.2 my %o = @_;
1093 wakaba 1.5 my $r = '';
1094 wakaba 1.10 if ($self->{type} eq '#reference'
1095     && ($self->{namespace_uri} eq $NS{SGML}.'char:ref'
1096     || $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) {
1097     $r = chr $self->{value};
1098 wakaba 1.7 } elsif ($self->{type} eq '#declaration'
1099     && ($self->{namespace_uri} eq $NS{SGML}.'entity'
1100     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) {
1101 wakaba 1.10 ## TODO:
1102     $r = $self->set_attribute ('value')->inner_text;
1103 wakaba 1.7 } else { # not #reference nor #declaration(ENTITY)
1104 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
1105     $r = $self->{value} unless $isc;
1106 wakaba 1.7 if ($o{output_ref_as_is}) { ## output as if RCDATA
1107     $r =~ s/&/&amp;/g;
1108 wakaba 1.10 for my $node (($isc?$self->{value}:()), @{$self->{node}}) {
1109 wakaba 1.7 my $nt = $node->node_type;
1110     if ($nt eq '#reference' || $nt eq '#xml') {
1111     $r .= $node->outer_xml;
1112     } elsif ($nt ne '#attribute') {
1113     $r .= map {s/&/&amp;/g; $_} $node->inner_text;
1114     }
1115     }
1116     } else {
1117 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
1118     $r .= $_->inner_text unless $_->{type} eq '#attribute';
1119 wakaba 1.2 }
1120     }
1121 wakaba 1.1 }
1122     $r;
1123     }
1124    
1125 wakaba 1.9 sub _is_same_class ($$) {
1126     my ($self, $something) = @_;
1127 wakaba 1.10 return 0 if {qw/ARRAY 1 HASH 1 CODE 1 :nonref: 1/}->{ref ($something) || ':nonref:'};
1128     eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME} ? 1 : 0;
1129 wakaba 1.9 }
1130    
1131     sub root_node ($) {
1132     my $self = shift;
1133     if ($self->{type} eq '#document') {
1134     return $self;
1135     } elsif (ref $self->{parent}) {
1136     return $self->{parent}->root_node;
1137     } else {
1138     return $self;
1139     }
1140 wakaba 1.1 }
1141    
1142 wakaba 1.7 sub _get_entity_manager ($) {
1143     my $self = shift;
1144     if ($self->{type} eq '#document') {
1145     unless ($self->{flag}->{smx__entity_manager}) {
1146     require SuikaWiki::Markup::XML::EntityManager;
1147     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1148     }
1149     return $self->{flag}->{smx__entity_manager};
1150     } elsif (ref $self->{parent}) {
1151     return $self->{parent}->_get_entity_manager;
1152     } else {
1153     unless ($self->{flag}->{smx__entity_manager}) {
1154     require SuikaWiki::Markup::XML::EntityManager;
1155     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1156     }
1157     return $self->{flag}->{smx__entity_manager};
1158     }
1159     }
1160    
1161 wakaba 1.10 sub _CLASS_NAME ($) { 'SuikaWiki::Markup::XML' }
1162 wakaba 1.5
1163 wakaba 1.1 # $s = $x->_entitize ($s)
1164 wakaba 1.4 sub _entitize ($$;%) {
1165     my ($self, $s, %o) = (shift, shift, @_);
1166 wakaba 1.1 $s =~ s/&/&amp;/g;
1167     $s =~ s/</&lt;/g;
1168     $s =~ s/>/&gt;/g;
1169     $s =~ s/"/&quot;/g;
1170 wakaba 1.7 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&amp;#%d;', ord $1/ge;
1171 wakaba 1.1 $s;
1172     }
1173    
1174 wakaba 1.2 # 1/0 = $x->_check_name ($s)
1175     sub _check_name ($$) {
1176     my $self = shift;
1177     my $s = shift;
1178     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1179     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1180     # \p{...}('*'/'+'/'{n,}') does not work...
1181     $Cache{name}->{$s} = 1;
1182     1;
1183     } else {
1184     $Cache{name}->{$s} = 0;
1185     0;
1186     }
1187     }
1188 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
1189     sub _check_ncname ($$) {
1190     my $self = shift;
1191     my $s = shift;
1192     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1193     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1194     # \p{...}('*'/'+'/'{n,}') does not work...
1195     $Cache{ncname}->{$s} = 1;
1196     1;
1197     } else {
1198     $Cache{ncname}->{$s} = 0;
1199     0;
1200     }
1201     }
1202    
1203     # 1/0 = $x->_check_namespace_prefix ($s)
1204     sub _check_namespace_prefix ($$) {
1205     my $self = shift;
1206     my $s = shift;
1207     return 0 unless defined $s;
1208     return 1 if $s eq '';
1209     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1210     $self->_check_ncname ($s);
1211     }
1212    
1213 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1214     ## multiple parents.
1215     ## TODO: normalize namespace URI (removing non URI chars)
1216    
1217     sub flag ($$;$) {
1218     my ($self, $name, $value) = @_;
1219     if (defined $value) {
1220     $self->{flag}->{$name} = $value;
1221     }
1222     $self->{flag}->{$name};
1223     }
1224    
1225 wakaba 1.3 sub option ($$;$) {
1226     my ($self, $name, $value) = @_;
1227     if (defined $value) {
1228     $self->{option}->{$name} = $value;
1229     }
1230     $self->{option}->{$name};
1231     }
1232    
1233 wakaba 1.1 =back
1234    
1235 wakaba 1.2 =head1 NODE TYPES
1236    
1237     =over 4
1238    
1239     =item #attribute
1240    
1241     Attribute. Its XML representation takes the form of NAME="VALUE".
1242    
1243     =item #comment
1244    
1245     Comment declarement. <!-- -->
1246    
1247     =item #declarement
1248    
1249     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1250     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1251    
1252     =item #element
1253    
1254     Element. Its XML representation consists of start tag, content and end tag,
1255     like <TYPE>content</TYPE>.
1256    
1257     =item #fragment
1258    
1259     Fragment of nodes. It's similar to DOM's fragment node.
1260    
1261     =item #pi
1262    
1263     Prosessing instruction. <?NAME VALUE?>
1264    
1265     =item #reference
1266    
1267     Character reference or general or parameter entity reference.
1268     &#nnnn;, &#xhhhh;, &name;, %name;.
1269    
1270     =item #section
1271    
1272     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1273     <![%type;[...]]>
1274    
1275     =item #text
1276    
1277     Text.
1278    
1279     =item #xml
1280    
1281     Preformatted XML text.
1282    
1283     =cut
1284    
1285 wakaba 1.4 =head1 RESTRICTIONS
1286    
1287     =over 4
1288    
1289     =item XML without XML Namespace is not supported.
1290    
1291     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1292    
1293     For example, next code generates invalid (non-well-formed) XML Namespace document.
1294    
1295     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1296     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1297    
1298     So you must write like:
1299    
1300     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1301     $x->define_new_namespace ('' => '');
1302     print $x; # <elementType xmlns=""></elementType>
1303    
1304     =back
1305    
1306 wakaba 1.1 =head1 LICENSE
1307    
1308     Copyright 2003 Wakaba <w@suika.fam.cx>
1309    
1310     This program is free software; you can redistribute it and/or
1311     modify it under the same terms as Perl itself.
1312    
1313     =cut
1314    
1315 wakaba 1.11 1; # $Date: 2003/06/30 11:06:28 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24