/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (hide annotations) (download)
Sun Jul 13 02:32:24 2003 UTC (22 years ago) by w
Branch: MAIN
Changes since 1.12: +4 -3 lines
More attribute default support

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 w 1.13 our $VERSION = do{my @r=(q$Revision: 1.12 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.9 use overload '""' => \&outer_xml,
22 wakaba 1.2 fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.1 our %Namespace_URI_to_prefix = (
25     'DAV:' => [qw/dav webdav/],
26     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
27     'http://icl.com/saxon' => [qw/saxon/],
28     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
29     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
30     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
31     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
32     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
33 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
34 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
35     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
36     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
37     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
38     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
39     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
40     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
41     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
42     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
43 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
44 wakaba 1.6 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/],
45 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
46 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
47 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
48     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
49     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
50     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
51     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
52 wakaba 1.7 'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/],
53 wakaba 1.1 );
54     my %Cache;
55 wakaba 1.10 our %NS = (
56 wakaba 1.7 SGML => 'urn:x-suika-fam-cx:markup:sgml:',
57 w 1.12 XML => 'urn:x-suika-fam-cx:markup:xml:',
58 wakaba 1.10 internal_attr_duplicate => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/invalid-attr#',
59     internal_invalid_sysid => 'http://system.identifier.invalid/',
60     internal_ns_invalid => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/unknown-namespace#',
61 wakaba 1.8 xml => 'http://www.w3.org/XML/1998/namespace',
62     xmlns => 'http://www.w3.org/2000/xmlns/',
63 wakaba 1.7 );
64 wakaba 1.1
65     =head1 METHODS
66    
67     =over 4
68    
69     =item $x = SuikaWiki::Markup::XML->new (%options)
70    
71     Returns new instance of the module. It is itself a node.
72    
73 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
74 wakaba 1.1
75     =cut
76    
77     sub new ($;%) {
78     my $class = shift;
79     my $self = bless {@_}, $class;
80     $self->{type} ||= '#element';
81 wakaba 1.5 if ($self->{qname}) {
82     ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
83     $self->{_qname} = $self->{qname};
84     }
85     if (defined $self->{namespace_prefix}) {
86     $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
87     && substr ($self->{namespace_prefix}, -1) ne ':';
88 wakaba 1.10 $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri}
89     if defined $self->{namespace_uri};
90 wakaba 1.5 }
91 wakaba 1.4 for (qw/local_name value/) {
92 wakaba 1.10 if ($self->_is_same_class ($self->{$_})) {
93 wakaba 1.2 $self->{$_}->{parent} = $self;
94     }
95     }
96 wakaba 1.1 $self->{node} ||= [];
97     $self;
98     }
99    
100 wakaba 1.5 sub _ns_parse_qname ($$) {
101 wakaba 1.10 my $qname = $_[1];
102 wakaba 1.5 if ($qname =~ /:/) {
103     return split /:/, $qname, 2;
104     } else {
105     return (undef, $qname);
106     }
107     }
108 wakaba 1.1
109     =item $x->append_node ($node)
110    
111     Appending given node to the object (as the last child).
112     If the type of given node is C<#fragment>, its all children, not the node
113     itself, are appended.
114    
115     This method returns the appended node unless the type of given node is C<#fragment>.
116     In such cases, this node (C<$x>) is returned.
117    
118 wakaba 1.2 Available options: C<node_or_text>.
119    
120 wakaba 1.1 =cut
121    
122 wakaba 1.2 sub append_node ($$;%) {
123 wakaba 1.1 my $self = shift;
124 wakaba 1.2 my ($new_node, %o) = @_;
125     unless (ref $new_node) {
126     if ($o{node_or_text}) {
127     return $self->append_text ($new_node);
128     } else {
129 wakaba 1.10 die "append_node: Invalid node";
130 wakaba 1.2 }
131     }
132 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
133     for (@{$new_node->{node}}) {
134     push @{$self->{node}}, $_;
135     $_->{parent} = $self;
136     }
137     $self;
138     } else {
139     push @{$self->{node}}, $new_node;
140     $new_node->{parent} = $self;
141     $new_node;
142     }
143     }
144    
145     =item $new_node = $x->append_new_node (%options)
146    
147     Appending a new node. The new node is returned.
148    
149     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
150    
151     =cut
152    
153     sub append_new_node ($;%) {
154     my $self = shift;
155 wakaba 1.10 my $new_node = __PACKAGE__->new (@_);
156 wakaba 1.1 push @{$self->{node}}, $new_node;
157     $new_node->{parent} = $self;
158     $new_node;
159     }
160    
161     =item $new_node = $x->append_text ($text)
162    
163     Appending given text as a new text node. The new text node is returned.
164    
165     =cut
166    
167     sub append_text ($$;%) {
168     my $self = shift;
169     my $s = shift;
170 wakaba 1.10 $self->append_new_node (type => '#text', value => $s);
171 wakaba 1.1 }
172    
173 wakaba 1.2 sub append_baretext ($$;%) {
174     my $self = shift;
175     my $s = shift;
176     $self->append_new_node (type => '#xml', value => $s);
177     }
178    
179 wakaba 1.9 sub remove_child_node ($$) {
180     my ($self, $node) = @_;
181     return unless ref $node;
182     $node = overload::StrVal ($node);
183     $self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}];
184     }
185    
186 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
187    
188     Returns the attribute node whose local-name is C<$local_name>.
189    
190     Available options: C<namespace_uri>, C<make_new_node>.
191    
192     =cut
193    
194     sub get_attribute ($$;%) {
195 wakaba 1.10 my ($self, $name, %o) = @_;
196 wakaba 1.1 for (@{$self->{node}}) {
197 wakaba 1.10 if ($_->{type} eq '#attribute'
198     && $_->{local_name} eq $name
199     && $o{namespace_uri} eq $_->{namespace_uri}) {
200 wakaba 1.1 return $_;
201     }
202     }
203     ## Node is not exist
204     if ($o{make_new_node}) {
205 wakaba 1.10 return $self->append_new_node (type => '#attribute', local_name => $name,
206     namespace_uri => $o{namespace_uri});
207 wakaba 1.1 } else {
208     return undef;
209     }
210     }
211    
212     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
213    
214     Set the value of the attribute. The attribute node is returned.
215    
216     Available options: C<namespace_uri>.
217    
218     =cut
219    
220     sub set_attribute ($$$;%) {
221 wakaba 1.10 my ($self, $name, $val, %o) = @_;
222     if ({qw/ARRAY 1 HASH 1 CODE 1/}->{ref ($val)}) {
223 wakaba 1.7 ## TODO: common error handling
224     require Carp;
225     Carp::croak "set_attribute: new attribute value must be string or blessed object";
226     }
227 wakaba 1.1 for (@{$self->{node}}) {
228 wakaba 1.10 if ($_->{type} eq '#attribute'
229     && $_->{local_name} eq $name
230     && $o{namespace_uri} eq $_->{namespace_uri}) {
231 wakaba 1.1 $_->{value} = $val;
232     $_->{node} = [];
233     return $_;
234     }
235     }
236 wakaba 1.10 return $self->append_new_node (type => '#attribute', local_name => $name,
237     value => $val, namespace_uri => $o{namespace_uri});
238 wakaba 1.1 }
239    
240     =item \@children = $x->child_nodes
241    
242     Returns an array reference to child nodes.
243    
244 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
245 wakaba 1.1
246 wakaba 1.4 Returns or set the local-name.
247    
248     =item $uri = $x->namespace_uri ([$new_uri])
249    
250     Returns or set namespace name (URI) of the element or the attribute
251 wakaba 1.1
252 wakaba 1.5 =item $uri = $x->namespace_prefix ([$new_prefix])
253    
254     Returns or set namespace prefix of the element or the attribute.
255     You may give C<$new_prefix> in form either 'foo' or 'foo:'.
256     To indicate "default" prefix, use '' (length == 0 string).
257    
258 wakaba 1.1 =item $type = $x->node_type
259    
260     Returns the node type.
261    
262     =item $node = $x->parent_node
263    
264     Returns the parent node. If there is no parent node, undef is returned.
265    
266     =cut
267    
268 wakaba 1.10 sub child_nodes ($) { $_[0]->{node} }
269 wakaba 1.4 sub local_name ($;$) {
270     my ($self, $newname) = @_;
271 wakaba 1.10 $self->{local_name} = $newname if $newname;
272 wakaba 1.4 if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
273     $self->{local_name}->{local_name};
274     } else {
275     $self->{local_name}
276     }
277     }
278 wakaba 1.10 sub node_type ($) { $_[0]->{type} }
279     sub parent_node ($) { $_[0]->{parent} }
280 wakaba 1.1
281 wakaba 1.4 sub namespace_uri ($;$) {
282     my ($self, $new_uri) = @_;
283 wakaba 1.10 $self->{namespace_uri} = $new_uri if defined $new_uri;
284 wakaba 1.4 $self->{namespace_uri};
285     }
286 wakaba 1.5 sub namespace_prefix ($;$) {
287     my ($self, $new_pfx) = @_;
288     if (defined $new_pfx && $self->{namespace_uri}) {
289     $new_pfx .= ':' if $new_pfx;
290     $self->{namespace_prefix} = $new_pfx;
291     $self->{ns}->{$new_pfx} = $self->{namespace_uri};
292     }
293     $self->_get_namespace_prefix ($self->{namespace_uri});
294     }
295 wakaba 1.4
296 wakaba 1.1 =item $i = $x->count
297    
298     Returns the number of child nodes.
299    
300     =cut
301    
302     # TODO: support counting by type
303     sub count ($;@) {
304 wakaba 1.10 (defined $_[0]->{value} ? 1 : 0) + scalar @{$_[0]->{node}};
305 wakaba 1.1 }
306    
307     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
308     sub _get_namespace_prefix ($$;%) {
309 wakaba 1.10 my ($self, $uri, %o) = @_;
310 wakaba 1.1 if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
311     return $p if $self->_prefix_to_uri ($p) eq $uri;
312     } if ($Namespace_URI_to_prefix{$uri}) {
313     for (@{$Namespace_URI_to_prefix{$uri}}) {
314     my $pfx = $_; $pfx .= ':' if $pfx;
315     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
316     return $self->_uri_to_prefix ($uri => $pfx, %o);
317     }
318     }
319     } else {
320     my ($u_r_i, $pfx) = ($uri);
321 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
322     my @u_r_i = split / /, $u_r_i;
323     for (reverse @u_r_i) {
324     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
325     my $p_f_x = $1 . ':';
326     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
327     unless ($self->_prefix_to_uri ($p_f_x)) {
328     $pfx = $p_f_x;
329     last;
330     }
331 wakaba 1.1 }
332     }
333     if ($pfx) {
334     return $self->_uri_to_prefix ($uri => $pfx, %o);
335     } else {
336     while (1) {
337     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
338     unless ($self->_prefix_to_uri ($pfx)) {
339     return $self->_uri_to_prefix ($uri => $pfx, %o);
340     }
341     }
342     }
343     }
344     }
345    
346 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
347     my ($self, $prefix => $uri, %o) = @_;
348     return undef unless $self->_check_namespace_prefix ($prefix);
349     $self->{ns}->{$prefix} = $uri;
350     $self->_prefix_to_uri ($prefix);
351     }
352    
353     ## TODO: removing ns declare (1.1) support
354     # $uri or undef = $x->_prefix_to_uri ($prefix)
355 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
356 wakaba 1.4 my ($self, $prefix, %o) = @_;
357 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
358     if (uc (substr $prefix, 0, 3) eq 'XML') {
359 wakaba 1.8 return $NS{xml} if $prefix eq 'xml:';
360     return $NS{xmlns} if $prefix eq 'xmlns:';
361 wakaba 1.2 }
362     if (defined $self->{ns}->{$prefix}) {
363 wakaba 1.1 $self->{ns}->{$prefix};
364     } elsif (ref $self->{parent}) {
365     shift; # $self
366     $self->{parent}->_prefix_to_uri (@_);
367     } else {
368     undef;
369     }
370     }
371    
372     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
373     # use_no_prefix (default: 1): Allow default namespace (no prefix).
374     sub _uri_to_prefix ($$;$%) {
375     my ($self, $uri, $new_prefix, %o) = @_;
376     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
377     $self->{ns}->{$new_prefix} = $uri;
378     $new_prefix;
379     } else {
380 wakaba 1.8 return 'xml:' if $uri eq $NS{xml};
381     return 'xmlns:' if $uri eq $NS{xmlns};
382 wakaba 1.1 for (keys %{$self->{ns}||{}}) {
383     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
384     return $_ if $self->{ns}->{$_} eq $uri;
385     }
386 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
387 wakaba 1.1 shift; # $self
388     $self->{parent}->_uri_to_prefix (@_);
389     } else {
390     undef;
391     }
392     }
393     }
394    
395     =item $x->define_new_namespace ($prefix => $uri)
396    
397     Defines a new XML Namespace. This method is useful for root or section-level
398     element node.
399    
400     Returned value is unspecified in this version of this module.
401    
402     =cut
403    
404 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
405 wakaba 1.1 sub define_new_namespace ($$$) {
406     my ($self, $prefix, $uri) = @_;
407     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
408     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
409 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
410 wakaba 1.1 } else {
411     undef;
412     }
413     }
414    
415 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
416    
417     Query whether the namespace prefix is defined or not.
418     If defined, return namespace name (URI).
419    
420     =cut
421    
422     sub defined_namespace_prefix ($$) {
423     my ($self, $prefix) = @_;
424 wakaba 1.10 $prefix .= ':' if $prefix;
425 wakaba 1.4 $self->_prefix_to_uri ($prefix);
426     }
427    
428 wakaba 1.1 =item $qname = $x->qname
429    
430     Returns QName ((namespace-)qualified name) of the element type.
431     Undef is retuened when the type does not have its QName
432     (ie. when type is neither C<#element> or C<#attribute>).
433    
434     =cut
435    
436     sub qname ($) {
437     my $self = shift;
438     if ($self->_check_ncname ($self->{local_name})) {
439     if ($self->{type} eq '#element') {
440     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
441     unless $self->{_qname};
442     return $self->{_qname};
443     } elsif ($self->{type} eq '#attribute') {
444     return $self->attribute_name;
445     }
446     }
447     undef;
448     }
449    
450 wakaba 1.9 sub merge_external_subset ($) {
451     my $self = shift;
452     unless ($self->{type} eq '#declaration' && $self->{namespace_uri} eq $NS{SGML}.'doctype') {
453     return unless $self->{type} eq '#document' || $self->{type} eq '#fragment';
454     for (@{$self->{node}}) {
455     $_->merge_external_subset;
456     }
457     return;
458     }
459     my $xsub = $self->get_attribute ('external-subset');
460     return unless ref $xsub;
461     for (@{$xsub->{node}}) {
462     $_->{parent} = $self;
463     }
464     push @{$self->{node}}, @{$xsub->{node}};
465     $self->remove_child_node ($xsub);
466     $self->remove_child_node ($self->get_attribute ('PUBLIC'));
467     $self->remove_child_node ($self->get_attribute ('SYSTEM'));
468     $self->remove_marked_section;
469     }
470    
471     sub remove_marked_section ($) {
472     my $self = shift;
473     my @node;
474     for (@{$self->{node}}) {
475     $_->remove_marked_section;
476     }
477     for (@{$self->{node}}) {
478     if ($_->{type} ne '#section') {
479     push @node, $_;
480     } else {
481     my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text;
482     if ($status eq 'CDATA') {
483     $_->{type} = '#text';
484     $_->remove_child_node ($_->get_attribute ('status'));
485     push @node, $_;
486     } elsif ($status ne 'IGNORE') { # INCLUDE
487     for my $e (@{$_->{node}}) {
488     if ($e->{type} ne '#attribute') {
489     $e->{parent} = $self;
490     push @node, $e;
491     }
492     }
493     }
494     }
495     }
496     $self->{node} = \@node;
497     }
498    
499 wakaba 1.7 sub remove_references ($) {
500     my $self = shift;
501     my @node;
502 wakaba 1.8 for (@{$self->{node}}) {
503     $_->remove_references;
504     }
505 wakaba 1.7 for (@{$self->{node}}) {
506     if ($_->{type} ne '#reference'
507     || ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
508     push @node, $_;
509     } else {
510     if ($_->{namespace_uri} =~ /char/) {
511     my $e = ref ($_)->new (type => '#text', value => chr $_->{value});
512     $e->{parent} = $self;
513     push @node, $e;
514     } elsif ($_->{flag}->{smxp__ref_expanded}) {
515     for my $e (@{$_->{node}}) {
516 wakaba 1.8 if ($e->{type} ne '#attribute') {
517     $e->{parent} = $self;
518     push @node, $e;
519     }
520 wakaba 1.7 }
521     } else { ## reference is not expanded
522     push @node, $_;
523     }
524     }
525 wakaba 1.8 $_->{flag}->{smxp__defined_with_param_ref} = 0
526 w 1.13 if $_->{flag}->{smxp__defined_with_param_ref}
527     && !$_->{flag}->{smxp__non_processed_declaration};
528 wakaba 1.7 }
529     $self->{node} = \@node;
530     }
531    
532 wakaba 1.8 sub resolve_relative_uri ($;$%) {
533     require URI;
534     my ($self, $rel, %o) = @_;
535     my $base = $self->get_attribute ('base', namespace_uri => $NS{xml});
536     $base = ref ($base) ? $base->inner_text : undef;
537     if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative
538     $base = $self->_resolve_relative_uri_by_parent ($base, \%o);
539     }
540     eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...)
541     return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false
542     } or return $rel;
543     }
544     sub _resolve_relative_uri_by_parent ($$$) {
545     my ($self, $rel, $o) = @_;
546     if (ref $self->{parent}) {
547     if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') {
548     ## This case is necessary to work with
549     ## <element> <!-- element can have base URI -->
550     ## text <!-- text cannot have base URI -->
551     ## &ent; <!-- ref's base URI is referred entity's one (in this module) -->
552     ## <!-- expantion of ent -->
553     ## entity's text <!-- text cannot have base URI, so use <element>'s one -->
554     ## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one -->
555     ## </element>
556     return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o);
557     } else {
558     return $self->{parent}->resolve_relative_uri ($rel, %$o);
559     }
560     } else {
561     return $rel;
562     }
563     }
564     sub base_uri ($;$) {
565     my ($self, $new_uri) = @_;
566     my $base;
567     if (defined $new_uri) {
568     $base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml});
569     }
570     $base ||= $self->get_attribute ('base', namespace_uri => $NS{xml});
571     ref ($base) ? $base->inner_text : undef;
572     }
573    
574 wakaba 1.1 =item $tag = $x->start_tag
575    
576 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
577 wakaba 1.1 for C<#comment> nodes).
578    
579     =cut
580    
581     sub start_tag ($) {
582     my $self = shift;
583     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
584     my $r = '<';
585     $r .= $self->qname;
586     for (@{$self->{node}}) {
587     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
588     }
589     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
590     if ($prefix) {
591     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
592     } else {
593     $r .= ' xmlns';
594     }
595     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
596     }
597     $r .= '>';
598     $r;
599     } elsif ($self->{type} eq '#comment') {
600 wakaba 1.4 '<!--';
601     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
602     '<?' . ($self->{local_name});
603 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
604 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
605 wakaba 1.2 '&#x';
606 wakaba 1.10 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
607 wakaba 1.2 '&#';
608 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
609 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
610 wakaba 1.2 '%';
611     } else {
612     '&';
613     }
614     } else { # error
615     '';
616     }
617 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
618     '<!' . {
619 wakaba 1.8 $NS{SGML}.'attlist' => 'ATTLIST',
620     $NS{SGML}.'doctype' => 'DOCTYPE',
621     $NS{SGML}.'element' => 'ELEMENT',
622     $NS{SGML}.'entity' => 'ENTITY',
623     $NS{SGML}.'entity:parameter' => 'ENTITY',
624     $NS{SGML}.'notation' => 'NOTATION',
625 wakaba 1.10 }->{$self->{namespace_uri}} . ' ' .
626     ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ?
627 wakaba 1.7 ($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):'');
628 wakaba 1.2 } elsif ($self->{type} eq '#section') {
629 wakaba 1.9 '<![';
630 wakaba 1.1 } else {
631     '';
632     }
633     }
634    
635     =item $tag = $x->end_tag
636    
637 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
638 wakaba 1.1 for C<#comment> nodes).
639    
640     =cut
641    
642     sub end_tag ($) {
643     my $self = shift;
644     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
645     '</' . $self->qname . '>';
646     } elsif ($self->{type} eq '#comment') {
647 wakaba 1.4 '-->';
648 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
649     '?>';
650 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
651     ';';
652 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
653     '>';
654 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
655 wakaba 1.4 '>';
656 wakaba 1.2 } elsif ($self->{type} eq '#section') {
657 wakaba 1.9 ']]>';
658 wakaba 1.1 } else {
659     '';
660     }
661     }
662    
663     =item $tag = $x->attribute_name
664    
665     Returns the attribute name.
666    
667     =cut
668    
669     sub attribute_name ($) {
670     my $self = shift;
671     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
672     ($self->{namespace_uri} ?
673     (ref $self->{parent} ? $self->{parent} : $self)
674     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
675     .$self->{local_name};
676     } else {
677     '';
678     }
679     }
680    
681     =item $tag = $x->attribute_value
682    
683     Returns the attribute value.
684    
685     =cut
686    
687 wakaba 1.4 sub attribute_value ($;%) {
688 wakaba 1.10 my ($self, %o) = @_;
689 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
690 wakaba 1.5 my $r = '"';
691 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
692 w 1.12 $r .= $self->_entitize ($self->{value}, keep_wsp => 1) unless $isc;
693 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
694     my $nt = $_->{type};
695 wakaba 1.4 if ($nt eq '#reference' || $nt eq '#xml') {
696     $r .= $_->outer_xml;
697     } elsif ($nt ne '#attribute') {
698 w 1.12 $r .= $self->_entitize ($_->inner_text, keep_wsp => 1);
699 wakaba 1.4 }
700     }
701     return $r . '"';
702     } else {
703     '';
704     }
705     }
706    
707     sub entity_value ($;%) {
708 wakaba 1.10 my ($self, %o) = @_;
709 wakaba 1.4 my $_entitize = sub {
710     my $s = shift;
711 wakaba 1.7 $s =~ s/&/&#x26;/g;
712     $s =~ s/&#x26;(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g;
713 w 1.12 $s =~ s/([\x0D%"])/sprintf '&#x%02X;', ord $1/ge;
714     $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F\x7F])/sprintf '&amp;#x%02X;', ord $1/ge;
715 wakaba 1.4 $s;
716     };
717     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
718     my $r = '"' . &$_entitize ($self->{value});
719     for (@{$self->{node}}) {
720 wakaba 1.10 my $nt = $_->{type};
721 wakaba 1.4 if ($nt eq '#reference' || $nt eq '#xml') {
722     $r .= $_->outer_xml;
723     } elsif ($nt ne '#attribute') {
724     $r .= &$_entitize ($_->inner_text);
725 wakaba 1.1 }
726     }
727 wakaba 1.4 return $r . '"';
728 wakaba 1.1 } else {
729     '';
730     }
731     }
732    
733 wakaba 1.7 ## This method should be called only from SuikaWiki::Markup::XML::* family modules,
734     ## since this is NOT a FORMAL interface.
735     sub _entity_parameter_literal_value ($;%) {
736     my $self = shift;
737 wakaba 1.10 my $r = '';
738     my $isc = $self->_is_same_class ($self->{value});
739     $r = $self->{value} unless $isc;
740     for (($isc?$self->{value}:()), @{$self->{node}}) {
741     my $nt = $_->{type};
742 w 1.12 ## Bare node and general entity reference node
743     if ($nt eq '#xml' || ($nt eq '#reference' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
744 wakaba 1.10 $r .= $_->outer_xml;
745 w 1.12 ## Text node and parameter entity reference node
746 wakaba 1.10 } elsif ($nt ne '#attribute') {
747     $r .= $_->inner_text;
748 wakaba 1.7 }
749 wakaba 1.10 }
750     $r;
751 wakaba 1.7 }
752    
753 wakaba 1.1 =item $tag = $x->attribute
754    
755     Returns the attribute (name and value pair).
756    
757     =cut
758    
759     sub attribute ($) {
760     my $self = shift;
761     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
762     $self->attribute_name . '=' . $self->attribute_value;
763     } else {
764     '';
765     }
766     }
767    
768 wakaba 1.2 sub external_id ($;%) {
769     my $self = shift;
770     my %o = @_;
771     my ($pubid, $sysid, $ndata);
772     for (@{$self->{node}}) {
773 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
774 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
775 wakaba 1.4 $pubid = $_->inner_text;
776 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
777 wakaba 1.4 $sysid = $_->inner_text;
778 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
779 wakaba 1.4 $ndata = $_->inner_text;
780     undef $ndata unless $self->_check_ncname ($ndata);
781 wakaba 1.2 }
782     }
783     }
784     my $r = '';
785 wakaba 1.9 if (defined $pubid) {
786 wakaba 1.4 $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
787 wakaba 1.9 $pubid = '"' . $pubid . '"';
788 wakaba 1.4 }
789 wakaba 1.9 if (defined $sysid) {
790 wakaba 1.10 if (index ($sysid, '"') > -1) {
791     if (index ($sysid, "'") > -1) {
792     $sysid =~ s/"/%22/; $sysid = '"' . $sysid . '"';
793 wakaba 1.4 } else {
794     $sysid = "'" . $sysid . "'";
795     }
796     } else {
797     $sysid = '"' . $sysid . '"';
798     }
799     }
800 wakaba 1.2 if ($pubid && $sysid) {
801 wakaba 1.9 $r = 'PUBLIC ' . $pubid . ' ' . $sysid;
802 wakaba 1.2 } elsif ($sysid) {
803 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
804 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
805 wakaba 1.9 $r = 'PUBLIC ' . $pubid;
806 wakaba 1.2 }
807     if ($r && $ndata && $o{use_ndata}) {
808 wakaba 1.4 $r .= ' NDATA ' . $ndata;
809 wakaba 1.2 }
810     $r;
811     }
812    
813     =item $s = $x->content_spec
814    
815     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
816     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
817    
818     =cut
819    
820     sub content_spec ($) {
821     my $self = shift;
822     if ($self->{type} eq '#element') {
823 wakaba 1.10 my $text = 0;
824     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
825 wakaba 1.2 $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
826    
827     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
828     } elsif ($self->{type} eq '#attribute') {
829     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
830     my $default = $self->{default_decl};
831     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
832     unless ($default) {
833     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
834     }
835     return $attdef . $default;
836     }
837     }
838    
839 wakaba 1.1 =item $tag = $x->inner_xml
840    
841     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
842     element content without start- and end-tags is returned.)
843    
844     Note that for not all node types the behavior of this method is defined.
845     For example, returned value of C<#attribute> might be unexpected one
846     in this version of this module.
847    
848     =cut
849    
850 wakaba 1.3 sub inner_xml ($;%) {
851 wakaba 1.10 my ($self, %o) = @_;
852 wakaba 1.1 my $r = '';
853     if ($self->{type} eq '#comment') {
854     $r = $self->inner_text;
855     $r =~ s/--/-&#x45;/g;
856     } elsif ($self->{type} eq '#pi') {
857 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
858     if (!$isc && length ($self->{value})) {
859 wakaba 1.1 $r = ' ' . $self->{value};
860 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
861 wakaba 1.1 }
862 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
863 wakaba 1.1 if ($_->node_type eq '#attribute') {
864     $r .= ' ' . $_->attribute;
865     } else {
866     my $s = $_->inner_text;
867 wakaba 1.10 if (length $s) {
868     $s =~ s/\?>/? >/g;
869     $r .= ' ' . $s;
870     }
871 wakaba 1.1 }
872     }
873 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
874 wakaba 1.10 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
875 wakaba 1.2 $r = sprintf '%02X', $self->{value};
876 wakaba 1.10 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
877 wakaba 1.2 $r = sprintf '%02d', $self->{value};
878 wakaba 1.10 } elsif (ref ($self->{local_name}) && $self->{local_name}->{type} eq '#declaration') {
879 wakaba 1.4 $r = $self->{local_name}->{local_name};
880     } elsif ($self->_check_ncname ($self->{local_name})) {
881     $r = ($self->{local_name});
882 wakaba 1.2 } else { # error
883     $r = '';
884     }
885 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
886 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'doctype') {
887 wakaba 1.4 my $root = $self->get_attribute ('qname');
888     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
889     for (@{$self->{parent}->{node}}) {
890     if ($_->{type} eq '#element') {
891     $root = $_->qname;
892     last if $root;
893     }
894     }
895     $root = '#IMPLIED';
896     }) : ($root = '#IMPLIED')); ## error!
897     my ($isub, $xid) = ('', $self->external_id);
898     for (@{$self->{node}}) {
899     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
900 wakaba 1.2 }
901     if ($xid) {
902 wakaba 1.4 $r = $xid;
903     if ($isub) {
904 wakaba 1.7 $r .= " [" . $isub . "]";
905 wakaba 1.4 }
906     } else {
907     if ($isub) {
908 wakaba 1.7 $r = "[" . $isub . "]";
909 wakaba 1.4 } else {
910     $r = "[]";
911     }
912     }
913     $r = $root . ' ' . $r;
914 wakaba 1.9 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity'
915     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter'
916     || $self->{namespace_uri} eq $NS{SGML}.'notation') {
917 wakaba 1.4 my %xid_opt;
918 wakaba 1.7 $r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref}
919     && $self->_check_ncname ($self->{local_name});
920 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
921 wakaba 1.4 #$r = '% ' . $r;
922 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') {
923 wakaba 1.4 $xid_opt{use_ndata} = 1;
924 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') {
925 wakaba 1.4 $xid_opt{allow_pubid_only} = 1;
926     }
927    
928 wakaba 1.10 my ($v, $xid) = ($self->{value});
929     $xid = $self->external_id (%xid_opt) unless $self->{flag}->{smxp__defined_with_param_ref};
930 wakaba 1.4 if ($xid) { ## External ID
931 wakaba 1.2 $r .= $xid;
932 wakaba 1.4 } else { ## EntityValue
933     my $entity_value = $self->get_attribute ('value');
934 wakaba 1.7 undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref};
935 wakaba 1.4 if ($entity_value) { # <!ENTITY foo "bar">
936     $r .= $entity_value->entity_value;
937     } else { ## Parameter entity reference
938 wakaba 1.9 my $isc = $self->_is_same_class ($self->{value});
939     $r .= $self->{value} unless $isc;
940     for (($isc?$self->{value}:()), @{$self->{node}}) {
941 wakaba 1.7 $r .= $_->outer_xml unless $_->{type} eq '#attribute';
942 wakaba 1.4 }
943 wakaba 1.2 }
944 wakaba 1.1 }
945 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'element') {
946 w 1.12 if (!$self->{flag}->{smxp__defined_with_param_ref}) {
947     $r = $self->get_attribute ('qname');
948     $r = $r->inner_text if $r;
949     unless ($self->_check_ncname ($r)) {
950     $r = undef;
951     } else {
952     $r .= ' ';
953     }
954    
955     my $cmodel = $self->get_attribute ('content', make_new_node => 1)->inner_text;
956     if ($cmodel && $cmodel ne 'mixed') {
957     $r .= $cmodel;
958     } else {
959     my $make_cmodel;
960     $make_cmodel = sub {
961     my $c = shift;
962     my @tt;
963     for (@{$c->child_nodes}) {
964     if ($_->node_type eq '#element' && $_->namespace_uri eq $NS{SGML}.'element') {
965     if ($_->local_name eq 'group') {
966     my $tt = &$make_cmodel ($_);
967     push @tt, '(' . $tt . ')'
968     . ($_->get_attribute ('occurence', make_new_node => 1)->inner_text)
969     if $tt;
970     } elsif ($_->local_name eq 'element') {
971     push @tt, $_->get_attribute ('qname', make_new_node => 1)->inner_text;
972     }
973     }
974     }
975     return join scalar ($c->get_attribute ('connector', make_new_node => 1)->inner_text
976     || '|'),
977     grep {$_} @tt;
978     };
979     my $tt;
980     my $grp_node;
981     for (@{$self->child_nodes}) {
982     if ($_->node_type eq '#element' && $_->namespace_uri eq $NS{SGML}.'element'
983     && $_->local_name eq 'group') {
984     $grp_node = $_;
985     $tt = &$make_cmodel ($grp_node);
986     last;
987     }
988     }
989     if ($cmodel eq 'mixed') { ## mixed content
990     if ($tt) {
991     $r .= '(#PCDATA|' . $tt . ')*';
992     } else {
993     $r .= '(#PCDATA)'
994     . ($grp_node->get_attribute ('connector', make_new_node => 1)->inner_text eq '*'
995     ? '*' : '');
996     }
997     } else { ## element content
998     if ($tt) {
999     $r .= '(' . $tt . ')'
1000     . ($grp_node->get_attribute ('occurence', make_new_node => 1)->inner_text);
1001     } else {
1002     $r .= 'EMPTY';
1003     }
1004     } # mixed or element content
1005     } # content model group
1006     } else { ## Save source doc's description as far as possible
1007     my $isc = $self->_is_same_class ($self->{value});
1008     $r .= $self->{value} unless $isc;
1009     for (($isc?$self->{value}:()), @{$self->{node}}) {
1010     unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') {
1011     $r .= $_->outer_xml;
1012     } elsif ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{SGML}.'group') {
1013     $r .= $_->outer_xml;
1014     }
1015     }
1016 wakaba 1.2 }
1017 wakaba 1.11 } elsif ($self->{namespace_uri} eq $NS{SGML}.'attlist') {
1018 w 1.12 if (!$self->{flag}->{smxp__defined_with_param_ref}) {
1019     $r = $self->get_attribute ('qname');
1020     $r = $r->inner_text if $r;
1021     unless ($self->_check_ncname ($r)) {
1022     $r = undef;
1023     } else {
1024     $r .= ' ';
1025     }
1026     for (@{$self->{node}}) {
1027     if ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{XML}.'attlist'
1028     && $_->{local_name} eq 'AttDef') {
1029     $r .= "\n\t" . $_->get_attribute ('qname', make_new_node => 1)->inner_text;
1030     my $attr_type = $_->get_attribute ('type', make_new_node => 1)->inner_text;
1031     if ($attr_type ne 'enum') {
1032     $r .= "\t" . $attr_type;
1033     }
1034     if ($attr_type eq 'enum' || $attr_type eq 'NOTATION') {
1035     my @l;
1036     for my $item (@{$_->{node}}) {
1037     if ($item->{type} eq '#element' && $item->{namespace_uri} eq $NS{XML}.'attlist'
1038     && $item->{local_name} eq 'enum') {
1039     push @l, $item->inner_text;
1040     }
1041     }
1042     $r .= "\t(" . join ('|', @l) . ')';
1043     }
1044     ## DefaultDecl
1045     my $deftype = $_->get_attribute ('default_type', make_new_node => 1)->inner_text;
1046     if ($deftype) {
1047     $r .= "\t#" . $deftype;
1048     }
1049     if (!$deftype || $deftype eq 'FIXED') {
1050     $r .= "\t" . $_->get_attribute ('default_value', make_new_node => 1)
1051     ->attribute_value;
1052     }
1053     } # AttDef
1054 wakaba 1.2 }
1055 w 1.12 } else { ## Save source doc's description as far as possible
1056     my $isc = $self->_is_same_class ($self->{value});
1057     $r .= $self->{value} unless $isc;
1058     for (($isc?$self->{value}:()), @{$self->{node}}) {
1059     unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') {
1060     $r .= $_->outer_xml;
1061     } elsif ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{SGML}.'group') {
1062     $r .= $_->outer_xml;
1063     }
1064     }
1065 wakaba 1.1 }
1066 wakaba 1.2 } else { # unknown
1067 wakaba 1.3 for (@{$self->{node}}) {
1068     $r .= $_->outer_xml;
1069     }
1070 wakaba 1.1 }
1071 wakaba 1.9 } elsif ($self->{type} eq '#section') {
1072     my $status = $self->get_attribute ('status', make_new_node => 1)->inner_text;
1073     if ($status eq 'CDATA') {
1074     $r = $self->inner_text;
1075     $r =~ s/]]>/]]>]]<![CDATA[>/g;
1076     $r = 'CDATA['.$r;
1077     } else {
1078     my $sl = $self->get_attribute ('status_list', make_new_node => 1);
1079     if ($sl->{flag}->{smxp__defined_with_param_ref}) {
1080     my $isc = $self->_is_same_class ($self->{value});
1081     $status = '';
1082     $status = $sl->{value} unless $isc;
1083     for (($isc?$sl->{value}:()), @{$sl->{node}}) {
1084     $status .= $_->outer_xml unless $_->{type} eq '#attribute';
1085     }
1086     $r = $status.'['.$r;
1087     } elsif ($status) {
1088     $r = $status.'['.$r;
1089     } else {
1090     ## Must be an ignored section
1091     }
1092     my $isc = $self->_is_same_class ($self->{value});
1093     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $self->{value} unless $isc;
1094     for (($isc?$self->{value}:()), @{$self->{node}}) {
1095     if ($_->{type} eq '#text') {
1096     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $_->inner_text; ## Anyway, this is error
1097     } elsif ($_->{type} ne '#attribute') {
1098     $r .= $_->outer_xml;
1099     }
1100     }
1101     }
1102 wakaba 1.1 } else {
1103 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
1104     unless ($isc) {
1105     if ($self->{type} ne '#xml') {
1106     $r = $self->_entitize ($self->{value});
1107     } else {
1108     $r = $self->{value};
1109     }
1110 wakaba 1.1 }
1111 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
1112     my $nt = $_->{type};
1113     if (($self->{option}->{indent})
1114     && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
1115 wakaba 1.1 $r .= "\n";
1116     }
1117     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
1118     }
1119     }
1120     $r;
1121     }
1122    
1123    
1124     =item $tag = $x->outer_xml
1125    
1126     Returns the node in XML syntax.
1127    
1128     =cut
1129    
1130     sub outer_xml ($) {
1131     my $self = shift;
1132     if ($self->{type} eq '#attribute') {
1133 wakaba 1.10 return $self->attribute;
1134 wakaba 1.1 } else {
1135 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
1136 wakaba 1.1 my $r = $self->start_tag;
1137     my $c = $self->inner_xml;
1138 wakaba 1.10 if (!length ($c) && $self->{option}->{use_EmptyElemTag}) {
1139 wakaba 1.4 substr ($r, -1) = ' />';
1140     } else {
1141     if ($c) {
1142     $c =~ s/\n/\n /g;
1143     $r .= "\n " . $c . "\n";
1144     }
1145     $r .= $self->end_tag;
1146 wakaba 1.1 }
1147 wakaba 1.10 return $r;
1148 wakaba 1.1 } else {
1149 wakaba 1.4 my $r = $self->start_tag;
1150     my $c = $self->inner_xml;
1151 wakaba 1.8 if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) {
1152 wakaba 1.4 substr ($r, -1) = ' />';
1153     } else {
1154     $r .= $c . $self->end_tag;
1155     }
1156 wakaba 1.10 return $r;
1157 wakaba 1.11 #return '{'.$self->{type}.': '.$r.'}'; ## DEBUG: show structure
1158 wakaba 1.1 }
1159     }
1160     }
1161    
1162     =item $tag = $x->inner_text
1163    
1164     Returns the text content of the node. (In many case the returned value is same
1165     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
1166     But some classes that inherits this module might implement to return other
1167     value (eg. to return the value of the alt attribute of html:img element).
1168    
1169 wakaba 1.2 Available options: C<output_ref_as_is>.
1170    
1171 wakaba 1.1 =cut
1172    
1173 wakaba 1.2 sub inner_text ($;%) {
1174 wakaba 1.1 my $self = shift;
1175 wakaba 1.2 my %o = @_;
1176 wakaba 1.5 my $r = '';
1177 wakaba 1.10 if ($self->{type} eq '#reference'
1178     && ($self->{namespace_uri} eq $NS{SGML}.'char:ref'
1179     || $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) {
1180     $r = chr $self->{value};
1181 wakaba 1.7 } elsif ($self->{type} eq '#declaration'
1182     && ($self->{namespace_uri} eq $NS{SGML}.'entity'
1183     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) {
1184 wakaba 1.10 ## TODO:
1185     $r = $self->set_attribute ('value')->inner_text;
1186 wakaba 1.7 } else { # not #reference nor #declaration(ENTITY)
1187 wakaba 1.10 my $isc = $self->_is_same_class ($self->{value});
1188     $r = $self->{value} unless $isc;
1189 wakaba 1.7 if ($o{output_ref_as_is}) { ## output as if RCDATA
1190     $r =~ s/&/&amp;/g;
1191 wakaba 1.10 for my $node (($isc?$self->{value}:()), @{$self->{node}}) {
1192 wakaba 1.7 my $nt = $node->node_type;
1193     if ($nt eq '#reference' || $nt eq '#xml') {
1194     $r .= $node->outer_xml;
1195     } elsif ($nt ne '#attribute') {
1196     $r .= map {s/&/&amp;/g; $_} $node->inner_text;
1197     }
1198     }
1199     } else {
1200 wakaba 1.10 for (($isc?$self->{value}:()), @{$self->{node}}) {
1201     $r .= $_->inner_text unless $_->{type} eq '#attribute';
1202 wakaba 1.2 }
1203     }
1204 wakaba 1.1 }
1205     $r;
1206     }
1207    
1208 wakaba 1.9 sub _is_same_class ($$) {
1209     my ($self, $something) = @_;
1210 wakaba 1.10 return 0 if {qw/ARRAY 1 HASH 1 CODE 1 :nonref: 1/}->{ref ($something) || ':nonref:'};
1211     eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME} ? 1 : 0;
1212 wakaba 1.9 }
1213    
1214     sub root_node ($) {
1215     my $self = shift;
1216     if ($self->{type} eq '#document') {
1217     return $self;
1218     } elsif (ref $self->{parent}) {
1219     return $self->{parent}->root_node;
1220     } else {
1221     return $self;
1222     }
1223 wakaba 1.1 }
1224    
1225 wakaba 1.7 sub _get_entity_manager ($) {
1226     my $self = shift;
1227     if ($self->{type} eq '#document') {
1228     unless ($self->{flag}->{smx__entity_manager}) {
1229     require SuikaWiki::Markup::XML::EntityManager;
1230     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1231     }
1232     return $self->{flag}->{smx__entity_manager};
1233     } elsif (ref $self->{parent}) {
1234     return $self->{parent}->_get_entity_manager;
1235     } else {
1236     unless ($self->{flag}->{smx__entity_manager}) {
1237     require SuikaWiki::Markup::XML::EntityManager;
1238     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1239     }
1240     return $self->{flag}->{smx__entity_manager};
1241     }
1242     }
1243    
1244 wakaba 1.10 sub _CLASS_NAME ($) { 'SuikaWiki::Markup::XML' }
1245 wakaba 1.5
1246 wakaba 1.1 # $s = $x->_entitize ($s)
1247 wakaba 1.4 sub _entitize ($$;%) {
1248     my ($self, $s, %o) = (shift, shift, @_);
1249 wakaba 1.1 $s =~ s/&/&amp;/g;
1250     $s =~ s/</&lt;/g;
1251     $s =~ s/>/&gt;/g;
1252     $s =~ s/"/&quot;/g;
1253 wakaba 1.7 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&amp;#%d;', ord $1/ge;
1254 w 1.12 $s =~ s/([\x09\x0A\x0D])/sprintf '&#%d;', ord $1/ge if $o{keep_wsp};
1255 wakaba 1.1 $s;
1256     }
1257    
1258 wakaba 1.2 # 1/0 = $x->_check_name ($s)
1259     sub _check_name ($$) {
1260     my $self = shift;
1261     my $s = shift;
1262     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1263     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1264     # \p{...}('*'/'+'/'{n,}') does not work...
1265     $Cache{name}->{$s} = 1;
1266     1;
1267     } else {
1268     $Cache{name}->{$s} = 0;
1269     0;
1270     }
1271     }
1272 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
1273     sub _check_ncname ($$) {
1274     my $self = shift;
1275     my $s = shift;
1276     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1277     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1278     # \p{...}('*'/'+'/'{n,}') does not work...
1279     $Cache{ncname}->{$s} = 1;
1280     1;
1281     } else {
1282     $Cache{ncname}->{$s} = 0;
1283     0;
1284     }
1285     }
1286    
1287     # 1/0 = $x->_check_namespace_prefix ($s)
1288     sub _check_namespace_prefix ($$) {
1289     my $self = shift;
1290     my $s = shift;
1291     return 0 unless defined $s;
1292     return 1 if $s eq '';
1293     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1294     $self->_check_ncname ($s);
1295     }
1296    
1297 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1298     ## multiple parents.
1299     ## TODO: normalize namespace URI (removing non URI chars)
1300    
1301     sub flag ($$;$) {
1302     my ($self, $name, $value) = @_;
1303     if (defined $value) {
1304     $self->{flag}->{$name} = $value;
1305     }
1306     $self->{flag}->{$name};
1307     }
1308    
1309 wakaba 1.3 sub option ($$;$) {
1310     my ($self, $name, $value) = @_;
1311     if (defined $value) {
1312     $self->{option}->{$name} = $value;
1313     }
1314     $self->{option}->{$name};
1315     }
1316    
1317 wakaba 1.1 =back
1318    
1319 wakaba 1.2 =head1 NODE TYPES
1320    
1321     =over 4
1322    
1323     =item #attribute
1324    
1325     Attribute. Its XML representation takes the form of NAME="VALUE".
1326    
1327     =item #comment
1328    
1329     Comment declarement. <!-- -->
1330    
1331     =item #declarement
1332    
1333     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1334     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1335    
1336     =item #element
1337    
1338     Element. Its XML representation consists of start tag, content and end tag,
1339     like <TYPE>content</TYPE>.
1340    
1341     =item #fragment
1342    
1343     Fragment of nodes. It's similar to DOM's fragment node.
1344    
1345     =item #pi
1346    
1347     Prosessing instruction. <?NAME VALUE?>
1348    
1349     =item #reference
1350    
1351     Character reference or general or parameter entity reference.
1352     &#nnnn;, &#xhhhh;, &name;, %name;.
1353    
1354     =item #section
1355    
1356     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1357     <![%type;[...]]>
1358    
1359     =item #text
1360    
1361     Text.
1362    
1363     =item #xml
1364    
1365     Preformatted XML text.
1366    
1367     =cut
1368    
1369 wakaba 1.4 =head1 RESTRICTIONS
1370    
1371     =over 4
1372    
1373     =item XML without XML Namespace is not supported.
1374    
1375     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1376    
1377     For example, next code generates invalid (non-well-formed) XML Namespace document.
1378    
1379     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1380     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1381    
1382     So you must write like:
1383    
1384     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1385     $x->define_new_namespace ('' => '');
1386     print $x; # <elementType xmlns=""></elementType>
1387    
1388     =back
1389    
1390 wakaba 1.1 =head1 LICENSE
1391    
1392     Copyright 2003 Wakaba <w@suika.fam.cx>
1393    
1394     This program is free software; you can redistribute it and/or
1395     modify it under the same terms as Perl itself.
1396    
1397     =cut
1398    
1399 w 1.13 1; # $Date: 2003/07/12 06:12:00 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24