/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (hide annotations) (download)
Fri Jan 16 08:23:20 2004 UTC (21 years, 6 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.16: +2 -2 lines
FILE REMOVED
Removed

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.17 our $VERSION = do{my @r=(q$Revision: 1.16 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.9 use overload '""' => \&outer_xml,
22 wakaba 1.2 fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.16 use Carp;
25     Carp::carp "Obsoleted module SuikaWiki::Markup::XML loaded";
26    
27 wakaba 1.1 our %Namespace_URI_to_prefix = (
28     'DAV:' => [qw/dav webdav/],
29     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
30     'http://icl.com/saxon' => [qw/saxon/],
31     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
32     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
33     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
34     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
35     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
36 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
37 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
38     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
39     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
40     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
41     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
42     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
43     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
44     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
45     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
46 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
47 wakaba 1.6 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/],
48 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
49 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
50 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
51     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
52     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
53     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
54     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
55 wakaba 1.7 'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/],
56 wakaba 1.1 );
57     my %Cache;
58 wakaba 1.10 our %NS = (
59 wakaba 1.7 SGML => 'urn:x-suika-fam-cx:markup:sgml:',
60 w 1.12 XML => 'urn:x-suika-fam-cx:markup:xml:',
61 wakaba 1.10 internal_attr_duplicate => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/invalid-attr#',
62     internal_invalid_sysid => 'http://system.identifier.invalid/',
63     internal_ns_invalid => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/unknown-namespace#',
64 wakaba 1.8 xml => 'http://www.w3.org/XML/1998/namespace',
65     xmlns => 'http://www.w3.org/2000/xmlns/',
66 wakaba 1.7 );
67 wakaba 1.1
68     =head1 METHODS
69    
70     =over 4
71    
72     =item $x = SuikaWiki::Markup::XML->new (%options)
73    
74     Returns new instance of the module. It is itself a node.
75    
76 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
77 wakaba 1.1
78     =cut
79    
80     sub new ($;%) {
81     my $class = shift;
82     my $self = bless {@_}, $class;
83     $self->{type} ||= '#element';
84 wakaba 1.5 if ($self->{qname}) {
85     ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
86     $self->{_qname} = $self->{qname};
87     }
88     if (defined $self->{namespace_prefix}) {
89     $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
90     && substr ($self->{namespace_prefix}, -1) ne ':';
91 wakaba 1.10 $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri}
92     if defined $self->{namespace_uri};
93 wakaba 1.5 }
94 wakaba 1.4 for (qw/local_name value/) {
95 wakaba 1.10 if ($self->_is_same_class ($self->{$_})) {
96 wakaba 1.2 $self->{$_}->{parent} = $self;
97     }
98     }
99 wakaba 1.1 $self->{node} ||= [];
100     $self;
101     }
102    
103 wakaba 1.5 sub _ns_parse_qname ($$) {
104 wakaba 1.10 my $qname = $_[1];
105 wakaba 1.5 if ($qname =~ /:/) {
106     return split /:/, $qname, 2;
107     } else {
108     return (undef, $qname);
109     }
110     }
111 wakaba 1.1
112     =item $x->append_node ($node)
113    
114     Appending given node to the object (as the last child).
115     If the type of given node is C<#fragment>, its all children, not the node
116     itself, are appended.
117    
118     This method returns the appended node unless the type of given node is C<#fragment>.
119     In such cases, this node (C<$x>) is returned.
120    
121 wakaba 1.2 Available options: C<node_or_text>.
122    
123 wakaba 1.1 =cut
124    
125 wakaba 1.2 sub append_node ($$;%) {
126 wakaba 1.1 my $self = shift;
127 wakaba 1.2 my ($new_node, %o) = @_;
128     unless (ref $new_node) {
129     if ($o{node_or_text}) {
130     return $self->append_text ($new_node);
131     } else {
132 wakaba 1.10 die "append_node: Invalid node";
133 wakaba 1.2 }
134     }
135 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
136     for (@{$new_node->{node}}) {
137     push @{$self->{node}}, $_;
138     $_->{parent} = $self;
139     }
140     $self;
141     } else {
142     push @{$self->{node}}, $new_node;
143     $new_node->{parent} = $self;
144     $new_node;
145     }
146     }
147    
148     =item $new_node = $x->append_new_node (%options)
149    
150     Appending a new node. The new node is returned.
151    
152     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
153    
154     =cut
155    
156     sub append_new_node ($;%) {
157     my $self = shift;
158 wakaba 1.10 my $new_node = __PACKAGE__->new (@_);
159 wakaba 1.1 push @{$self->{node}}, $new_node;
160     $new_node->{parent} = $self;
161     $new_node;
162     }
163    
164     =item $new_node = $x->append_text ($text)
165    
166     Appending given text as a new text node. The new text node is returned.
167    
168     =cut
169    
170     sub append_text ($$;%) {
171     my $self = shift;
172     my $s = shift;
173 wakaba 1.10 $self->append_new_node (type => '#text', value => $s);
174 wakaba 1.1 }
175    
176 wakaba 1.2 sub append_baretext ($$;%) {
177     my $self = shift;
178     my $s = shift;
179     $self->append_new_node (type => '#xml', value => $s);
180     }
181    
182 wakaba 1.9 sub remove_child_node ($$) {
183     my ($self, $node) = @_;
184     return unless ref $node;
185     $node = overload::StrVal ($node);
186     $self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}];
187     }
188    
189 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
190    
191     Returns the attribute node whose local-name is C<$local_name>.
192    
193     Available options: C<namespace_uri>, C<make_new_node>.
194    
195     =cut
196    
197     sub get_attribute ($$;%) {
198 wakaba 1.10 my ($self, $name, %o) = @_;
199 wakaba 1.1 for (@{$self->{node}}) {
200 wakaba 1.10 if ($_->{type} eq '#attribute'
201     && $_->{local_name} eq $name
202     && $o{namespace_uri} eq $_->{namespace_uri}) {
203 wakaba 1.1 return $_;
204     }
205     }
206     ## Node is not exist
207     if ($o{make_new_node}) {
208 wakaba 1.10 return $self->append_new_node (type => '#attribute', local_name => $name,
209     namespace_uri => $o{namespace_uri});
210 wakaba 1.1 } else {
211     return undef;
212     }
213     }
214    
215     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
216    
217     Set the value of the attribute. The attribute node is returned.
218    
219     Available options: C<namespace_uri>.
220    
221     =cut
222    
223     sub set_attribute ($$$;%) {
224 wakaba 1.10 my ($self, $name, $val, %o) = @_;
225     if ({qw/ARRAY 1 HASH 1 CODE 1/}->{ref ($val)}) {
226 wakaba 1.7 ## TODO: common error handling
227     require Carp;
228     Carp::croak "set_attribute: new attribute value must be string or blessed object";
229     }
230 wakaba 1.1 for (@{$self->{node}}) {
231 wakaba 1.10 if ($_->{type} eq '#attribute'
232     && $_->{local_name} eq $name
233     && $o{namespace_uri} eq $_->{namespace_uri}) {
234 wakaba 1.1 $_->{value} = $val;
235     $_->{node} = [];
236     return $_;
237     }
238     }
239 wakaba 1.10 return $self->append_new_node (type => '#attribute', local_name => $name,
240     value => $val, namespace_uri => $o{namespace_uri});
241 wakaba 1.1 }
242    
243     =item \@children = $x->child_nodes
244    
245     Returns an array reference to child nodes.
246    
247 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
248 wakaba 1.1
249 wakaba 1.4 Returns or set the local-name.
250    
251     =item $uri = $x->namespace_uri ([$new_uri])
252    
253     Returns or set namespace name (URI) of the element or the attribute
254 wakaba 1.1
255 wakaba 1.5 =item $uri = $x->namespace_prefix ([$new_prefix])
256    
257     Returns or set namespace prefix of the element or the attribute.
258     You may give C<$new_prefix> in form either 'foo' or 'foo:'.
259     To indicate "default" prefix, use '' (length == 0 string).
260    
261 wakaba 1.15 =item $uri or ($uri, $name) = $x->expanded_name
262    
263     Returns expanded name of the node (element or attribute).
264     In array context, array of namespace name (URI) and local part
265     is returned; otherwise, a URI which identify name of the node
266     (in RDF or WebDAV) is returned.
267    
268 wakaba 1.1 =item $type = $x->node_type
269    
270     Returns the node type.
271    
272     =item $node = $x->parent_node
273    
274     Returns the parent node. If there is no parent node, undef is returned.
275    
276     =cut
277    
278 wakaba 1.10 sub child_nodes ($) { $_[0]->{node} }
279 wakaba 1.4 sub local_name ($;$) {
280     my ($self, $newname) = @_;
281 wakaba 1.10 $self->{local_name} = $newname if $newname;
282 wakaba 1.4 if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
283     $self->{local_name}->{local_name};
284     } else {
285     $self->{local_name}
286     }
287     }
288 wakaba 1.10 sub node_type ($) { $_[0]->{type} }
289     sub parent_node ($) { $_[0]->{parent} }
290 wakaba 1.1
291 wakaba 1.4 sub namespace_uri ($;$) {
292     my ($self, $new_uri) = @_;
293 wakaba 1.10 $self->{namespace_uri} = $new_uri if defined $new_uri;
294 wakaba 1.4 $self->{namespace_uri};
295     }
296 wakaba 1.5 sub namespace_prefix ($;$) {
297     my ($self, $new_pfx) = @_;
298     if (defined $new_pfx && $self->{namespace_uri}) {
299     $new_pfx .= ':' if $new_pfx;
300     $self->{namespace_prefix} = $new_pfx;
301     $self->{ns}->{$new_pfx} = $self->{namespace_uri};
302     }
303     $self->_get_namespace_prefix ($self->{namespace_uri});
304     }
305 wakaba 1.4
306 wakaba 1.15 sub expanded_name ($) {
307     my $self = shift;
308     wantarray ? ($self->{namespace_uri}, $self->{local_name})
309     : $self->{namespace_uri} . $self->{local_name};
310     }
311    
312 wakaba 1.1 =item $i = $x->count
313    
314     Returns the number of child nodes.
315    
316     =cut
317    
318     # TODO: support counting by type
319     sub count ($;@) {
320 wakaba 1.10 (defined $_[0]->{value} ? 1 : 0) + scalar @{$_[0]->{node}};
321 wakaba 1.1 }
322    
323     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
324     sub _get_namespace_prefix ($$;%) {
325 wakaba 1.10 my ($self, $uri, %o) = @_;
326 wakaba 1.1 if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
327     return $p if $self->_prefix_to_uri ($p) eq $uri;
328     } if ($Namespace_URI_to_prefix{$uri}) {
329     for (@{$Namespace_URI_to_prefix{$uri}}) {
330     my $pfx = $_; $pfx .= ':' if $pfx;
331     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
332     return $self->_uri_to_prefix ($uri => $pfx, %o);
333     }
334     }
335     } else {
336     my ($u_r_i, $pfx) = ($uri);
337 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
338     my @u_r_i = split / /, $u_r_i;
339     for (reverse @u_r_i) {
340     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
341     my $p_f_x = $1 . ':';
342     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
343     unless ($self->_prefix_to_uri ($p_f_x)) {
344     $pfx = $p_f_x;
345     last;
346     }
347 wakaba 1.1 }
348     }
349     if ($pfx) {
350     return $self->_uri_to_prefix ($uri => $pfx, %o);
351     } else {
352     while (1) {
353     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
354     unless ($self->_prefix_to_uri ($pfx)) {
355     return $self->_uri_to_prefix ($uri => $pfx, %o);
356     }
357     }
358     }
359     }
360     }
361    
362 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
363     my ($self, $prefix => $uri, %o) = @_;
364     return undef unless $self->_check_namespace_prefix ($prefix);
365     $self->{ns}->{$prefix} = $uri;
366     $self->_prefix_to_uri ($prefix);
367     }
368    
369     ## TODO: removing ns declare (1.1) support
370     # $uri or undef = $x->_prefix_to_uri ($prefix)
371 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
372 wakaba 1.4 my ($self, $prefix, %o) = @_;
373 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
374     if (uc (substr $prefix, 0, 3) eq 'XML') {
375 wakaba 1.8 return $NS{xml} if $prefix eq 'xml:';
376     return $NS{xmlns} if $prefix eq 'xmlns:';
377 wakaba 1.2 }
378     if (defined $self->{ns}->{$prefix}) {
379 wakaba 1.1 $self->{ns}->{$prefix};
380     } elsif (ref $self->{parent}) {
381     shift; # $self
382     $self->{parent}->_prefix_to_uri (@_);
383     } else {
384     undef;
385     }
386     }
387    
388     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
389     # use_no_prefix (default: 1): Allow default namespace (no prefix).
390     sub _uri_to_prefix ($$;$%) {
391     my ($self, $uri, $new_prefix, %o) = @_;
392     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
393     $self->{ns}->{$new_prefix} = $uri;
394     $new_prefix;
395     } else {
396 wakaba 1.8 return 'xml:' if $uri eq $NS{xml};
397     return 'xmlns:' if $uri eq $NS{xmlns};
398 wakaba 1.1 for (keys %{$self->{ns}||{}}) {
399     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
400     return $_ if $self->{ns}->{$_} eq $uri;
401     }
402 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
403 wakaba 1.1 shift; # $self
404     $self->{parent}->_uri_to_prefix (@_);
405     } else {
406     undef;
407     }
408     }
409     }
410    
411     =item $x->define_new_namespace ($prefix => $uri)
412    
413     Defines a new XML Namespace. This method is useful for root or section-level
414     element node.
415    
416     Returned value is unspecified in this version of this module.
417    
418     =cut
419    
420 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
421 wakaba 1.1 sub define_new_namespace ($$$) {
422     my ($self, $prefix, $uri) = @_;
423     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
424     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
425 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
426 wakaba 1.1 } else {
427     undef;
428     }
429     }
430    
431 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
432    
433     Query whether the namespace prefix is defined or not.
434     If defined, return namespace name (URI).
435    
436     =cut
437    
438     sub defined_namespace_prefix ($$) {
439     my ($self, $prefix) = @_;
440 wakaba 1.10 $prefix .= ':' if $prefix;
441 wakaba 1.4 $self->_prefix_to_uri ($prefix);
442     }
443    
444 wakaba 1.1 =item $qname = $x->qname
445    
446     Returns QName ((namespace-)qualified name) of the element type.
447     Undef is retuened when the type does not have its QName
448     (ie. when type is neither C<#element> or C<#attribute>).
449    
450     =cut
451    
452     sub qname ($) {
453     my $self = shift;
454     if ($self->_check_ncname ($self->{local_name})) {
455     if ($self->{type} eq '#element') {
456     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
457     unless $self->{_qname};
458     return $self->{_qname};
459     } elsif ($self->{type} eq '#attribute') {
460     return $self->attribute_name;
461     }
462     }
463     undef;
464     }
465    
466 wakaba 1.9 sub merge_external_subset ($) {
467     my $self = shift;
468     unless ($self->{type} eq '#declaration' && $self->{namespace_uri} eq $NS{SGML}.'doctype') {
469     return unless $self->{type} eq '#document' || $self->{type} eq '#fragment';
470     for (@{$self->{node}}) {
471     $_->merge_external_subset;
472     }
473     return;
474     }
475     my $xsub = $self->get_attribute ('external-subset');
476     return unless ref $xsub;
477     for (@{$xsub->{node}}) {
478     $_->{parent} = $self;
479     }
480     push @{$self->{node}}, @{$xsub->{node}};
481     $self->remove_child_node ($xsub);
482     $self->remove_child_node ($self->get_attribute ('PUBLIC'));
483     $self->remove_child_node ($self->get_attribute ('SYSTEM'));
484     $self->remove_marked_section;
485     }
486    
487     sub remove_marked_section ($) {
488     my $self = shift;
489     my @node;
490     for (@{$self->{node}}) {
491     $_->remove_marked_section;
492     }
493     for (@{$self->{node}}) {
494     if ($_->{type} ne '#section') {
495     push @node, $_;
496     } else {
497     my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text;
498     if ($status eq 'CDATA') {
499     $_->{type} = '#text';
500     $_->remove_child_node ($_->get_attribute ('status'));
501     push @node, $_;
502     } elsif ($status ne 'IGNORE') { # INCLUDE
503     for my $e (@{$_->{node}}) {
504     if ($e->{type} ne '#attribute') {
505     $e->{parent} = $self;
506     push @node, $e;
507     }
508     }
509     }
510     }
511     }
512     $self->{node} = \@node;
513     }
514    
515 wakaba 1.7 sub remove_references ($) {
516     my $self = shift;
517     my @node;
518 wakaba 1.8 for (@{$self->{node}}) {
519     $_->remove_references;
520     }
521 wakaba 1.7 for (@{$self->{node}}) {
522     if ($_->{type} ne '#reference'
523     || ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
524     push @node, $_;
525     } else {
526     if ($_->{namespace_uri} =~ /char/) {
527     my $e = ref ($_)->new (type => '#text', value => chr $_->{value});
528     $e->{parent} = $self;
529     push @node, $e;
530     } elsif ($_->{flag}->{smxp__ref_expanded}) {
531     for my $e (@{$_->{node}}) {
532 wakaba 1.8 if ($e->{type} ne '#attribute') {
533     $e->{parent} = $self;
534     push @node, $e;
535     }
536 wakaba 1.7 }
537     } else { ## reference is not expanded
538     push @node, $_;
539     }
540     }
541 wakaba 1.8 $_->{flag}->{smxp__defined_with_param_ref} = 0
542 w 1.13 if $_->{flag}->{smxp__defined_with_param_ref}
543     && !$_->{flag}->{smxp__non_processed_declaration};
544 wakaba 1.7 }
545     $self->{node} = \@node;
546     }
547    
548 wakaba 1.8 sub resolve_relative_uri ($;$%) {
549     require URI;
550     my ($self, $rel, %o) = @_;
551     my $base = $self->get_attribute ('base', namespace_uri => $NS{xml});
552     $base = ref ($base) ? $base->inner_text : undef;
553     if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative
554     $base = $self->_resolve_relative_uri_by_parent ($base, \%o);
555     }
556     eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...)
557     return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false
558     } or return $rel;
559     }
560     sub _resolve_relative_uri_by_parent ($$$) {
561     my ($self, $rel, $o) = @_;
562     if (ref $self->{parent}) {
563     if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') {
564     ## This case is necessary to work with
565     ## <element> <!-- element can have base URI -->
566     ## text <!-- text cannot have base URI -->
567     ## &ent; <!-- ref's base URI is referred entity's one (in this module) -->
568     ## <!-- expantion of ent -->
569     ## entity's text <!-- text cannot have base URI, so use <element>'s one -->
570     ## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one -->
571     ## </element>
572     return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o);
573     } else {
574     return $self->{parent}->resolve_relative_uri ($rel, %$o);
575     }
576     } else {
577     return $rel;
578     }
579     }
580     sub base_uri ($;$) {
581     my ($self, $new_uri) = @_;
582     my $base;
583     if (defined $new_uri) {
584     $base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml});
585     }
586     $base ||= $self->get_attribute ('base', namespace_uri => $NS{xml});
587     ref ($base) ? $base->inner_text : undef;
588     }
589    
590 wakaba 1.1 =item $tag = $x->start_tag
591    
592 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
593 wakaba 1.1 for C<#comment> nodes).
594    
595     =cut
596    
597     sub start_tag ($) {
598     my $self = shift;
599     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
600     my $r = '<';
601     $r .= $self->qname;
602     for (@{$self->{node}}) {
603     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
604     }
605     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
606     if ($prefix) {
607     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
608     } else {
609     $r .= ' xmlns';
610     }
611     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
612     }
613     $r .= '>';
614     $r;
615     } elsif ($self->{type} eq '#comment') {
616 wakaba 1.4 '<!--';
617     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
618     '<?' . ($self->{local_name});
619 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
620 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
621 wakaba 1.2 '&#x';
622 wakaba 1.10 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
623 wakaba 1.2 '&#';
624 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
625 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
626 wakaba 1.2 '%';
627     } else {
628     '&';
629     }
630     } else { # error
631     '';
632     }
633 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
634     '<!' . {
635 wakaba 1.8 $NS{SGML}.'attlist' => 'ATTLIST',
636     $NS{SGML}.'doctype' => 'DOCTYPE',
637     $NS{SGML}.'element' => 'ELEMENT',
638     $NS{SGML}.'entity' => 'ENTITY',
639     $NS{SGML}.'entity:parameter' => 'ENTITY',
640     $NS{SGML}.'notation' => 'NOTATION',
641 wakaba 1.10 }->{$self->{namespace_uri}} . ' ' .
642     ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ?
643 wakaba 1.7 ($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):'');
644 wakaba 1.2 } elsif ($self->{type} eq '#section') {
645 wakaba 1.9 '<![';
646 wakaba 1.1 } else {
647     '';
648     }
649     }
650    
651     =item $tag = $x->end_tag
652    
653 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
654 wakaba 1.1 for C<#comment> nodes).
655    
656     =cut
657    
658     sub end_tag ($) {
659     my $self = shift;
660     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
661     '</' . $self->qname . '>';
662     } elsif ($self->{type} eq '#comment') {
663 wakaba 1.4 '-->';
664 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
665     '?>';
666 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
667     ';';
668 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
669     '>';
670 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
671 wakaba 1.4 '>';
672 wakaba 1.2 } elsif ($self->{type} eq '#section') {
673 wakaba 1.9 ']]>';
674 wakaba 1.1 } else {
675     '';
676     }
677     }
678    
679     =item $tag = $x->attribute_name
680    
681     Returns the attribute name.
682    
683     =cut
684    
685     sub attribute_name ($) {
686     my $self = shift;
687     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
688     ($self->{namespace_uri} ?
689     (ref $self->{parent} ? $self->{parent} : $self)
690     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
691     .$self->{local_name};
692     } else {
693     '';
694     }
695     }
696    
697     =item $tag = $x->attribute_value
698    
699     Returns the attribute value.
700    
701     =cut
702    
703 wakaba 1.4 sub attribute_value ($;%) {
704 wakaba 1.10 my ($self, %o) = @_;
705 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
706 wakaba 1.5 my $r = '"';
707 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
708 w 1.12 $r .= $self->_entitize ($self->{value}, keep_wsp => 1) unless $isc;
709 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
710     my $nt = $_->{type};
711 wakaba 1.4 if ($nt eq '#reference' || $nt eq '#xml') {
712     $r .= $_->outer_xml;
713     } elsif ($nt ne '#attribute') {
714 w 1.12 $r .= $self->_entitize ($_->inner_text, keep_wsp => 1);
715 wakaba 1.4 }
716     }
717     return $r . '"';
718     } else {
719     '';
720     }
721     }
722    
723     sub entity_value ($;%) {
724 wakaba 1.10 my ($self, %o) = @_;
725 wakaba 1.4 my $_entitize = sub {
726     my $s = shift;
727 wakaba 1.7 $s =~ s/&/&#x26;/g;
728     $s =~ s/&#x26;(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g;
729 w 1.12 $s =~ s/([\x0D%"])/sprintf '&#x%02X;', ord $1/ge;
730     $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F\x7F])/sprintf '&amp;#x%02X;', ord $1/ge;
731 wakaba 1.4 $s;
732     };
733     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
734     my $r = '"' . &$_entitize ($self->{value});
735     for (@{$self->{node}}) {
736 wakaba 1.10 my $nt = $_->{type};
737 wakaba 1.4 if ($nt eq '#reference' || $nt eq '#xml') {
738     $r .= $_->outer_xml;
739     } elsif ($nt ne '#attribute') {
740     $r .= &$_entitize ($_->inner_text);
741 wakaba 1.1 }
742     }
743 wakaba 1.4 return $r . '"';
744 wakaba 1.1 } else {
745     '';
746     }
747     }
748    
749 wakaba 1.7 ## This method should be called only from SuikaWiki::Markup::XML::* family modules,
750     ## since this is NOT a FORMAL interface.
751     sub _entity_parameter_literal_value ($;%) {
752     my $self = shift;
753 wakaba 1.10 my $r = '';
754     my $isc = $self->_is_same_class ($self->{value});
755     $r = $self->{value} unless $isc;
756     for (($isc?$self->{value}:()), @{$self->{node}}) {
757     my $nt = $_->{type};
758 w 1.12 ## Bare node and general entity reference node
759     if ($nt eq '#xml' || ($nt eq '#reference' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
760 wakaba 1.10 $r .= $_->outer_xml;
761 w 1.12 ## Text node and parameter entity reference node
762 wakaba 1.10 } elsif ($nt ne '#attribute') {
763     $r .= $_->inner_text;
764 wakaba 1.7 }
765 wakaba 1.10 }
766     $r;
767 wakaba 1.7 }
768    
769 wakaba 1.1 =item $tag = $x->attribute
770    
771     Returns the attribute (name and value pair).
772    
773     =cut
774    
775     sub attribute ($) {
776     my $self = shift;
777     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
778     $self->attribute_name . '=' . $self->attribute_value;
779     } else {
780     '';
781     }
782     }
783    
784 wakaba 1.2 sub external_id ($;%) {
785     my $self = shift;
786     my %o = @_;
787     my ($pubid, $sysid, $ndata);
788     for (@{$self->{node}}) {
789 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
790 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
791 wakaba 1.4 $pubid = $_->inner_text;
792 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
793 wakaba 1.4 $sysid = $_->inner_text;
794 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
795 wakaba 1.4 $ndata = $_->inner_text;
796     undef $ndata unless $self->_check_ncname ($ndata);
797 wakaba 1.2 }
798     }
799     }
800     my $r = '';
801 wakaba 1.9 if (defined $pubid) {
802 wakaba 1.4 $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
803 wakaba 1.9 $pubid = '"' . $pubid . '"';
804 wakaba 1.4 }
805 wakaba 1.9 if (defined $sysid) {
806 wakaba 1.10 if (index ($sysid, '"') > -1) {
807     if (index ($sysid, "'") > -1) {
808     $sysid =~ s/"/%22/; $sysid = '"' . $sysid . '"';
809 wakaba 1.4 } else {
810     $sysid = "'" . $sysid . "'";
811     }
812     } else {
813     $sysid = '"' . $sysid . '"';
814     }
815     }
816 wakaba 1.2 if ($pubid && $sysid) {
817 wakaba 1.9 $r = 'PUBLIC ' . $pubid . ' ' . $sysid;
818 wakaba 1.2 } elsif ($sysid) {
819 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
820 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
821 wakaba 1.9 $r = 'PUBLIC ' . $pubid;
822 wakaba 1.2 }
823     if ($r && $ndata && $o{use_ndata}) {
824 wakaba 1.4 $r .= ' NDATA ' . $ndata;
825 wakaba 1.2 }
826     $r;
827     }
828    
829 wakaba 1.1 =item $tag = $x->inner_xml
830    
831     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
832     element content without start- and end-tags is returned.)
833    
834     Note that for not all node types the behavior of this method is defined.
835     For example, returned value of C<#attribute> might be unexpected one
836     in this version of this module.
837    
838     =cut
839    
840 wakaba 1.3 sub inner_xml ($;%) {
841 wakaba 1.10 my ($self, %o) = @_;
842 wakaba 1.1 my $r = '';
843     if ($self->{type} eq '#comment') {
844     $r = $self->inner_text;
845     $r =~ s/--/-&#x45;/g;
846     } elsif ($self->{type} eq '#pi') {
847 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
848     if (!$isc && length ($self->{value})) {
849 wakaba 1.1 $r = ' ' . $self->{value};
850 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
851 wakaba 1.1 }
852 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
853 wakaba 1.1 if ($_->node_type eq '#attribute') {
854     $r .= ' ' . $_->attribute;
855     } else {
856     my $s = $_->inner_text;
857 wakaba 1.10 if (length $s) {
858     $s =~ s/\?>/? >/g;
859     $r .= ' ' . $s;
860     }
861 wakaba 1.1 }
862     }
863 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
864 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
865 wakaba 1.2 $r = sprintf '%02X', $self->{value};
866 wakaba 1.10 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
867 wakaba 1.2 $r = sprintf '%02d', $self->{value};
868 wakaba 1.10 } elsif (ref ($self->{local_name}) && $self->{local_name}->{type} eq '#declaration') {
869 wakaba 1.4 $r = $self->{local_name}->{local_name};
870     } elsif ($self->_check_ncname ($self->{local_name})) {
871     $r = ($self->{local_name});
872 wakaba 1.2 } else { # error
873     $r = '';
874     }
875 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
876 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'doctype') {
877 wakaba 1.4 my $root = $self->get_attribute ('qname');
878     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
879     for (@{$self->{parent}->{node}}) {
880     if ($_->{type} eq '#element') {
881     $root = $_->qname;
882     last if $root;
883     }
884     }
885     $root = '#IMPLIED';
886     }) : ($root = '#IMPLIED')); ## error!
887     my ($isub, $xid) = ('', $self->external_id);
888     for (@{$self->{node}}) {
889     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
890 wakaba 1.2 }
891     if ($xid) {
892 wakaba 1.4 $r = $xid;
893     if ($isub) {
894 wakaba 1.7 $r .= " [" . $isub . "]";
895 wakaba 1.4 }
896     } else {
897     if ($isub) {
898 wakaba 1.7 $r = "[" . $isub . "]";
899 wakaba 1.4 } else {
900     $r = "[]";
901     }
902     }
903     $r = $root . ' ' . $r;
904 wakaba 1.9 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity'
905     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter'
906     || $self->{namespace_uri} eq $NS{SGML}.'notation') {
907 wakaba 1.4 my %xid_opt;
908 wakaba 1.7 $r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref}
909     && $self->_check_ncname ($self->{local_name});
910 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
911 wakaba 1.4 #$r = '% ' . $r;
912 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') {
913 wakaba 1.4 $xid_opt{use_ndata} = 1;
914 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') {
915 wakaba 1.4 $xid_opt{allow_pubid_only} = 1;
916     }
917    
918 wakaba 1.10 my ($v, $xid) = ($self->{value});
919     $xid = $self->external_id (%xid_opt) unless $self->{flag}->{smxp__defined_with_param_ref};
920 wakaba 1.4 if ($xid) { ## External ID
921 wakaba 1.2 $r .= $xid;
922 wakaba 1.4 } else { ## EntityValue
923     my $entity_value = $self->get_attribute ('value');
924 wakaba 1.7 undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref};
925 wakaba 1.4 if ($entity_value) { # <!ENTITY foo "bar">
926     $r .= $entity_value->entity_value;
927     } else { ## Parameter entity reference
928 wakaba 1.9 my $isc = $self->_is_same_class ($self->{value});
929     $r .= $self->{value} unless $isc;
930     for (($isc?$self->{value}:()), @{$self->{node}}) {
931 wakaba 1.7 $r .= $_->outer_xml unless $_->{type} eq '#attribute';
932 wakaba 1.4 }
933 wakaba 1.2 }
934 wakaba 1.1 }
935 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'element') {
936 w 1.12 if (!$self->{flag}->{smxp__defined_with_param_ref}) {
937     $r = $self->get_attribute ('qname');
938     $r = $r->inner_text if $r;
939     unless ($self->_check_ncname ($r)) {
940     $r = undef;
941     } else {
942     $r .= ' ';
943     }
944    
945     my $cmodel = $self->get_attribute ('content', make_new_node => 1)->inner_text;
946     if ($cmodel && $cmodel ne 'mixed') {
947     $r .= $cmodel;
948     } else {
949     my $make_cmodel;
950     $make_cmodel = sub {
951     my $c = shift;
952     my @tt;
953     for (@{$c->child_nodes}) {
954     if ($_->node_type eq '#element' && $_->namespace_uri eq $NS{SGML}.'element') {
955     if ($_->local_name eq 'group') {
956     my $tt = &$make_cmodel ($_);
957     push @tt, '(' . $tt . ')'
958     . ($_->get_attribute ('occurence', make_new_node => 1)->inner_text)
959     if $tt;
960     } elsif ($_->local_name eq 'element') {
961 w 1.14 push @tt, $_->get_attribute ('qname', make_new_node => 1)->inner_text
962     . ($_->get_attribute ('occurence', make_new_node => 1)->inner_text);
963 w 1.12 }
964     }
965     }
966     return join scalar ($c->get_attribute ('connector', make_new_node => 1)->inner_text
967     || '|'),
968     grep {$_} @tt;
969     };
970     my $tt;
971     my $grp_node;
972     for (@{$self->child_nodes}) {
973     if ($_->node_type eq '#element' && $_->namespace_uri eq $NS{SGML}.'element'
974     && $_->local_name eq 'group') {
975     $grp_node = $_;
976     $tt = &$make_cmodel ($grp_node);
977     last;
978     }
979     }
980     if ($cmodel eq 'mixed') { ## mixed content
981     if ($tt) {
982     $r .= '(#PCDATA|' . $tt . ')*';
983     } else {
984     $r .= '(#PCDATA)'
985 w 1.14 . ($grp_node->get_attribute ('occurence', make_new_node => 1)->inner_text eq '*'
986 w 1.12 ? '*' : '');
987     }
988     } else { ## element content
989     if ($tt) {
990     $r .= '(' . $tt . ')'
991     . ($grp_node->get_attribute ('occurence', make_new_node => 1)->inner_text);
992     } else {
993     $r .= 'EMPTY';
994     }
995     } # mixed or element content
996     } # content model group
997     } else { ## Save source doc's description as far as possible
998     my $isc = $self->_is_same_class ($self->{value});
999     $r .= $self->{value} unless $isc;
1000     for (($isc?$self->{value}:()), @{$self->{node}}) {
1001     unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') {
1002     $r .= $_->outer_xml;
1003     } elsif ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{SGML}.'group') {
1004     $r .= $_->outer_xml;
1005     }
1006     }
1007 wakaba 1.2 }
1008 wakaba 1.11 } elsif ($self->{namespace_uri} eq $NS{SGML}.'attlist') {
1009 w 1.12 if (!$self->{flag}->{smxp__defined_with_param_ref}) {
1010     $r = $self->get_attribute ('qname');
1011     $r = $r->inner_text if $r;
1012     unless ($self->_check_ncname ($r)) {
1013     $r = undef;
1014     } else {
1015     $r .= ' ';
1016     }
1017     for (@{$self->{node}}) {
1018     if ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{XML}.'attlist'
1019     && $_->{local_name} eq 'AttDef') {
1020     $r .= "\n\t" . $_->get_attribute ('qname', make_new_node => 1)->inner_text;
1021     my $attr_type = $_->get_attribute ('type', make_new_node => 1)->inner_text;
1022     if ($attr_type ne 'enum') {
1023     $r .= "\t" . $attr_type;
1024     }
1025     if ($attr_type eq 'enum' || $attr_type eq 'NOTATION') {
1026     my @l;
1027     for my $item (@{$_->{node}}) {
1028     if ($item->{type} eq '#element' && $item->{namespace_uri} eq $NS{XML}.'attlist'
1029     && $item->{local_name} eq 'enum') {
1030     push @l, $item->inner_text;
1031     }
1032     }
1033     $r .= "\t(" . join ('|', @l) . ')';
1034     }
1035     ## DefaultDecl
1036     my $deftype = $_->get_attribute ('default_type', make_new_node => 1)->inner_text;
1037     if ($deftype) {
1038     $r .= "\t#" . $deftype;
1039     }
1040     if (!$deftype || $deftype eq 'FIXED') {
1041     $r .= "\t" . $_->get_attribute ('default_value', make_new_node => 1)
1042     ->attribute_value;
1043     }
1044     } # AttDef
1045 wakaba 1.2 }
1046 w 1.12 } else { ## Save source doc's description as far as possible
1047     my $isc = $self->_is_same_class ($self->{value});
1048     $r .= $self->{value} unless $isc;
1049     for (($isc?$self->{value}:()), @{$self->{node}}) {
1050     unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') {
1051     $r .= $_->outer_xml;
1052     } elsif ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{SGML}.'group') {
1053     $r .= $_->outer_xml;
1054     }
1055     }
1056 wakaba 1.1 }
1057 wakaba 1.2 } else { # unknown
1058 wakaba 1.3 for (@{$self->{node}}) {
1059     $r .= $_->outer_xml;
1060     }
1061 wakaba 1.1 }
1062 wakaba 1.9 } elsif ($self->{type} eq '#section') {
1063     my $status = $self->get_attribute ('status', make_new_node => 1)->inner_text;
1064     if ($status eq 'CDATA') {
1065     $r = $self->inner_text;
1066     $r =~ s/]]>/]]>]]<![CDATA[>/g;
1067     $r = 'CDATA['.$r;
1068     } else {
1069     my $sl = $self->get_attribute ('status_list', make_new_node => 1);
1070     if ($sl->{flag}->{smxp__defined_with_param_ref}) {
1071     my $isc = $self->_is_same_class ($self->{value});
1072     $status = '';
1073     $status = $sl->{value} unless $isc;
1074     for (($isc?$sl->{value}:()), @{$sl->{node}}) {
1075     $status .= $_->outer_xml unless $_->{type} eq '#attribute';
1076     }
1077     $r = $status.'['.$r;
1078     } elsif ($status) {
1079     $r = $status.'['.$r;
1080     } else {
1081     ## Must be an ignored section
1082     }
1083     my $isc = $self->_is_same_class ($self->{value});
1084     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $self->{value} unless $isc;
1085     for (($isc?$self->{value}:()), @{$self->{node}}) {
1086     if ($_->{type} eq '#text') {
1087     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $_->inner_text; ## Anyway, this is error
1088     } elsif ($_->{type} ne '#attribute') {
1089     $r .= $_->outer_xml;
1090     }
1091     }
1092     }
1093 wakaba 1.1 } else {
1094 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
1095     unless ($isc) {
1096     if ($self->{type} ne '#xml') {
1097     $r = $self->_entitize ($self->{value});
1098     } else {
1099     $r = $self->{value};
1100     }
1101 wakaba 1.1 }
1102 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
1103     my $nt = $_->{type};
1104     if (($self->{option}->{indent})
1105     && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
1106 wakaba 1.1 $r .= "\n";
1107     }
1108     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
1109     }
1110     }
1111     $r;
1112     }
1113    
1114    
1115     =item $tag = $x->outer_xml
1116    
1117     Returns the node in XML syntax.
1118    
1119     =cut
1120    
1121     sub outer_xml ($) {
1122     my $self = shift;
1123     if ($self->{type} eq '#attribute') {
1124 wakaba 1.10 return $self->attribute;
1125 wakaba 1.1 } else {
1126 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
1127 wakaba 1.1 my $r = $self->start_tag;
1128     my $c = $self->inner_xml;
1129 wakaba 1.10 if (!length ($c) && $self->{option}->{use_EmptyElemTag}) {
1130 wakaba 1.4 substr ($r, -1) = ' />';
1131     } else {
1132     if ($c) {
1133     $c =~ s/\n/\n /g;
1134     $r .= "\n " . $c . "\n";
1135     }
1136     $r .= $self->end_tag;
1137 wakaba 1.1 }
1138 wakaba 1.10 return $r;
1139 wakaba 1.1 } else {
1140 wakaba 1.4 my $r = $self->start_tag;
1141     my $c = $self->inner_xml;
1142 wakaba 1.8 if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) {
1143 wakaba 1.4 substr ($r, -1) = ' />';
1144     } else {
1145     $r .= $c . $self->end_tag;
1146     }
1147 wakaba 1.10 return $r;
1148 wakaba 1.11 #return '{'.$self->{type}.': '.$r.'}'; ## DEBUG: show structure
1149 wakaba 1.1 }
1150     }
1151     }
1152    
1153     =item $tag = $x->inner_text
1154    
1155     Returns the text content of the node. (In many case the returned value is same
1156     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
1157     But some classes that inherits this module might implement to return other
1158     value (eg. to return the value of the alt attribute of html:img element).
1159    
1160 wakaba 1.2 Available options: C<output_ref_as_is>.
1161    
1162 wakaba 1.1 =cut
1163    
1164 wakaba 1.2 sub inner_text ($;%) {
1165 wakaba 1.1 my $self = shift;
1166 wakaba 1.2 my %o = @_;
1167 wakaba 1.5 my $r = '';
1168 wakaba 1.10 if ($self->{type} eq '#reference'
1169     && ($self->{namespace_uri} eq $NS{SGML}.'char:ref'
1170     || $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) {
1171     $r = chr $self->{value};
1172 wakaba 1.7 } elsif ($self->{type} eq '#declaration'
1173     && ($self->{namespace_uri} eq $NS{SGML}.'entity'
1174     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) {
1175 wakaba 1.10 ## TODO:
1176     $r = $self->set_attribute ('value')->inner_text;
1177 wakaba 1.7 } else { # not #reference nor #declaration(ENTITY)
1178 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
1179 w 1.14 $r = $self->{value} if !$isc && defined $self->{value};
1180 wakaba 1.7 if ($o{output_ref_as_is}) { ## output as if RCDATA
1181     $r =~ s/&/&amp;/g;
1182 wakaba 1.10 for my $node (($isc?$self->{value}:()), @{$self->{node}}) {
1183 wakaba 1.7 my $nt = $node->node_type;
1184     if ($nt eq '#reference' || $nt eq '#xml') {
1185     $r .= $node->outer_xml;
1186     } elsif ($nt ne '#attribute') {
1187     $r .= map {s/&/&amp;/g; $_} $node->inner_text;
1188     }
1189     }
1190     } else {
1191 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
1192     $r .= $_->inner_text unless $_->{type} eq '#attribute';
1193 wakaba 1.2 }
1194     }
1195 wakaba 1.1 }
1196     $r;
1197     }
1198    
1199 wakaba 1.9 sub _is_same_class ($$) {
1200     my ($self, $something) = @_;
1201 wakaba 1.10 return 0 if {qw/ARRAY 1 HASH 1 CODE 1 :nonref: 1/}->{ref ($something) || ':nonref:'};
1202     eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME} ? 1 : 0;
1203 wakaba 1.9 }
1204    
1205     sub root_node ($) {
1206     my $self = shift;
1207     if ($self->{type} eq '#document') {
1208     return $self;
1209     } elsif (ref $self->{parent}) {
1210     return $self->{parent}->root_node;
1211     } else {
1212     return $self;
1213     }
1214 wakaba 1.1 }
1215    
1216 wakaba 1.7 sub _get_entity_manager ($) {
1217     my $self = shift;
1218     if ($self->{type} eq '#document') {
1219     unless ($self->{flag}->{smx__entity_manager}) {
1220     require SuikaWiki::Markup::XML::EntityManager;
1221     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1222     }
1223     return $self->{flag}->{smx__entity_manager};
1224     } elsif (ref $self->{parent}) {
1225     return $self->{parent}->_get_entity_manager;
1226     } else {
1227     unless ($self->{flag}->{smx__entity_manager}) {
1228     require SuikaWiki::Markup::XML::EntityManager;
1229     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1230     }
1231     return $self->{flag}->{smx__entity_manager};
1232     }
1233     }
1234    
1235 wakaba 1.10 sub _CLASS_NAME ($) { 'SuikaWiki::Markup::XML' }
1236 wakaba 1.5
1237 wakaba 1.1 # $s = $x->_entitize ($s)
1238 wakaba 1.4 sub _entitize ($$;%) {
1239     my ($self, $s, %o) = (shift, shift, @_);
1240 wakaba 1.1 $s =~ s/&/&amp;/g;
1241     $s =~ s/</&lt;/g;
1242     $s =~ s/>/&gt;/g;
1243     $s =~ s/"/&quot;/g;
1244 wakaba 1.7 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&amp;#%d;', ord $1/ge;
1245 w 1.12 $s =~ s/([\x09\x0A\x0D])/sprintf '&#%d;', ord $1/ge if $o{keep_wsp};
1246 wakaba 1.1 $s;
1247     }
1248    
1249 wakaba 1.2 # 1/0 = $x->_check_name ($s)
1250     sub _check_name ($$) {
1251     my $self = shift;
1252     my $s = shift;
1253     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1254     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1255     # \p{...}('*'/'+'/'{n,}') does not work...
1256     $Cache{name}->{$s} = 1;
1257     1;
1258     } else {
1259     $Cache{name}->{$s} = 0;
1260     0;
1261     }
1262     }
1263 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
1264     sub _check_ncname ($$) {
1265     my $self = shift;
1266     my $s = shift;
1267     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1268     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1269     # \p{...}('*'/'+'/'{n,}') does not work...
1270     $Cache{ncname}->{$s} = 1;
1271     1;
1272     } else {
1273     $Cache{ncname}->{$s} = 0;
1274     0;
1275     }
1276     }
1277    
1278     # 1/0 = $x->_check_namespace_prefix ($s)
1279     sub _check_namespace_prefix ($$) {
1280     my $self = shift;
1281     my $s = shift;
1282     return 0 unless defined $s;
1283     return 1 if $s eq '';
1284     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1285     $self->_check_ncname ($s);
1286     }
1287    
1288 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1289     ## multiple parents.
1290     ## TODO: normalize namespace URI (removing non URI chars)
1291    
1292     sub flag ($$;$) {
1293     my ($self, $name, $value) = @_;
1294     if (defined $value) {
1295     $self->{flag}->{$name} = $value;
1296     }
1297     $self->{flag}->{$name};
1298     }
1299    
1300 wakaba 1.3 sub option ($$;$) {
1301     my ($self, $name, $value) = @_;
1302     if (defined $value) {
1303     $self->{option}->{$name} = $value;
1304     }
1305     $self->{option}->{$name};
1306     }
1307    
1308 wakaba 1.1 =back
1309    
1310 wakaba 1.2 =head1 NODE TYPES
1311    
1312     =over 4
1313    
1314     =item #attribute
1315    
1316     Attribute. Its XML representation takes the form of NAME="VALUE".
1317    
1318     =item #comment
1319    
1320     Comment declarement. <!-- -->
1321    
1322     =item #declarement
1323    
1324     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1325     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1326    
1327     =item #element
1328    
1329     Element. Its XML representation consists of start tag, content and end tag,
1330     like <TYPE>content</TYPE>.
1331    
1332     =item #fragment
1333    
1334     Fragment of nodes. It's similar to DOM's fragment node.
1335    
1336     =item #pi
1337    
1338     Prosessing instruction. <?NAME VALUE?>
1339    
1340     =item #reference
1341    
1342     Character reference or general or parameter entity reference.
1343     &#nnnn;, &#xhhhh;, &name;, %name;.
1344    
1345     =item #section
1346    
1347     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1348     <![%type;[...]]>
1349    
1350     =item #text
1351    
1352     Text.
1353    
1354     =item #xml
1355    
1356     Preformatted XML text.
1357    
1358     =cut
1359    
1360 wakaba 1.4 =head1 RESTRICTIONS
1361    
1362     =over 4
1363    
1364     =item XML without XML Namespace is not supported.
1365    
1366     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1367    
1368     For example, next code generates invalid (non-well-formed) XML Namespace document.
1369    
1370     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1371     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1372    
1373     So you must write like:
1374    
1375     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1376     $x->define_new_namespace ('' => '');
1377     print $x; # <elementType xmlns=""></elementType>
1378    
1379     =back
1380    
1381 wakaba 1.1 =head1 LICENSE
1382    
1383     Copyright 2003 Wakaba <w@suika.fam.cx>
1384    
1385     This program is free software; you can redistribute it and/or
1386     modify it under the same terms as Perl itself.
1387    
1388     =cut
1389    
1390 wakaba 1.17 1; # $Date: 2004/01/16 08:04:59 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24