/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Sun Jun 29 08:34:36 2003 UTC (22 years ago) by wakaba
Branch: MAIN
Changes since 1.8: +125 -31 lines
Marked section support

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.9 our $VERSION = do{my @r=(q$Revision: 1.8 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21     use overload '""' => \&outer_xml,
22 wakaba 1.2 fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.1 our %Namespace_URI_to_prefix = (
25     'DAV:' => [qw/dav webdav/],
26     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
27     'http://icl.com/saxon' => [qw/saxon/],
28     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
29     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
30     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
31     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
32     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
33 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
34 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
35     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
36     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
37     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
38     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
39     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
40     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
41     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
42     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
43 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
44 wakaba 1.6 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/],
45 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
46 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
47 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
48     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
49     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
50     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
51     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
52 wakaba 1.7 'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/],
53 wakaba 1.1 );
54     my %Cache;
55 wakaba 1.7 my %NS = (
56     SGML => 'urn:x-suika-fam-cx:markup:sgml:',
57 wakaba 1.8 xml => 'http://www.w3.org/XML/1998/namespace',
58     xmlns => 'http://www.w3.org/2000/xmlns/',
59 wakaba 1.7 );
60 wakaba 1.1
61     =head1 METHODS
62    
63     =over 4
64    
65     =item $x = SuikaWiki::Markup::XML->new (%options)
66    
67     Returns new instance of the module. It is itself a node.
68    
69 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
70 wakaba 1.1
71     =cut
72    
73     sub new ($;%) {
74     my $class = shift;
75     my $self = bless {@_}, $class;
76     $self->{type} ||= '#element';
77 wakaba 1.5 if ($self->{qname}) {
78     ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
79     $self->{_qname} = $self->{qname};
80     }
81     if (defined $self->{namespace_prefix}) {
82     $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
83     && substr ($self->{namespace_prefix}, -1) ne ':';
84     $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri} if defined $self->{namespace_uri};
85     }
86 wakaba 1.4 for (qw/local_name value/) {
87 wakaba 1.8 if (ref ($self->{$_}) && eval q($self->{$_}->_CLASS_NAME eq $self->_CLASS_NAME)) {
88 wakaba 1.2 $self->{$_}->{parent} = $self;
89     }
90     }
91 wakaba 1.1 $self->{node} ||= [];
92     $self;
93     }
94    
95 wakaba 1.5 sub _ns_parse_qname ($$) {
96     shift;
97     my $qname = shift;
98     if ($qname =~ /:/) {
99     return split /:/, $qname, 2;
100     } else {
101     return (undef, $qname);
102     }
103     }
104 wakaba 1.1
105     =item $x->append_node ($node)
106    
107     Appending given node to the object (as the last child).
108     If the type of given node is C<#fragment>, its all children, not the node
109     itself, are appended.
110    
111     This method returns the appended node unless the type of given node is C<#fragment>.
112     In such cases, this node (C<$x>) is returned.
113    
114 wakaba 1.2 Available options: C<node_or_text>.
115    
116 wakaba 1.1 =cut
117    
118 wakaba 1.2 sub append_node ($$;%) {
119 wakaba 1.1 my $self = shift;
120 wakaba 1.2 my ($new_node, %o) = @_;
121     unless (ref $new_node) {
122     if ($o{node_or_text}) {
123     return $self->append_text ($new_node);
124     } else {
125     die "append_node: Invalid node" unless ref $new_node;
126     }
127     }
128 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
129     for (@{$new_node->{node}}) {
130     push @{$self->{node}}, $_;
131     $_->{parent} = $self;
132     }
133     $self;
134     } else {
135     push @{$self->{node}}, $new_node;
136     $new_node->{parent} = $self;
137     $new_node;
138     }
139     }
140    
141     =item $new_node = $x->append_new_node (%options)
142    
143     Appending a new node. The new node is returned.
144    
145     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
146    
147     =cut
148    
149     sub append_new_node ($;%) {
150     my $self = shift;
151     my %o = @_;
152     my $new_node = __PACKAGE__->new (%o);
153     push @{$self->{node}}, $new_node;
154     $new_node->{parent} = $self;
155     $new_node;
156     }
157    
158     =item $new_node = $x->append_text ($text)
159    
160     Appending given text as a new text node. The new text node is returned.
161    
162     =cut
163    
164     sub append_text ($$;%) {
165     my $self = shift;
166     my $s = shift;
167     #if (@{$self->{node}}[-1]->{type} eq '#text') {
168     # $self->{node}}[-1]->append_new_node (type => '#text', value => $s);
169     #} else {
170     $self->append_new_node (type => '#text', value => $s);
171     #}
172     }
173    
174 wakaba 1.2 sub append_baretext ($$;%) {
175     my $self = shift;
176     my $s = shift;
177     $self->append_new_node (type => '#xml', value => $s);
178     }
179    
180 wakaba 1.9 sub remove_child_node ($$) {
181     my ($self, $node) = @_;
182     return unless ref $node;
183     $node = overload::StrVal ($node);
184     $self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}];
185     }
186    
187 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
188    
189     Returns the attribute node whose local-name is C<$local_name>.
190    
191     Available options: C<namespace_uri>, C<make_new_node>.
192    
193     =cut
194    
195     sub get_attribute ($$;%) {
196     my $self = shift;
197     my ($name, %o) = @_;
198     for (@{$self->{node}}) {
199     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
200     return $_;
201     }
202     }
203     ## Node is not exist
204     if ($o{make_new_node}) {
205     return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri});
206     } else {
207     return undef;
208     }
209     }
210    
211     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
212    
213     Set the value of the attribute. The attribute node is returned.
214    
215     Available options: C<namespace_uri>.
216    
217     =cut
218    
219     sub set_attribute ($$$;%) {
220     my $self = shift;
221     my ($name, $val, %o) = @_;
222 wakaba 1.7 if (ref ($val) eq 'ARRAY' || ref ($val) eq 'HASH' || ref ($val) eq 'CODE') {
223     ## TODO: common error handling
224     require Carp;
225     Carp::croak "set_attribute: new attribute value must be string or blessed object";
226     }
227 wakaba 1.1 for (@{$self->{node}}) {
228     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
229     $_->{value} = $val;
230     $_->{node} = [];
231     return $_;
232     }
233     }
234     return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri});
235     }
236    
237     =item \@children = $x->child_nodes
238    
239     Returns an array reference to child nodes.
240    
241 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
242 wakaba 1.1
243 wakaba 1.4 Returns or set the local-name.
244    
245     =item $uri = $x->namespace_uri ([$new_uri])
246    
247     Returns or set namespace name (URI) of the element or the attribute
248 wakaba 1.1
249 wakaba 1.5 =item $uri = $x->namespace_prefix ([$new_prefix])
250    
251     Returns or set namespace prefix of the element or the attribute.
252     You may give C<$new_prefix> in form either 'foo' or 'foo:'.
253     To indicate "default" prefix, use '' (length == 0 string).
254    
255 wakaba 1.1 =item $type = $x->node_type
256    
257     Returns the node type.
258    
259     =item $node = $x->parent_node
260    
261     Returns the parent node. If there is no parent node, undef is returned.
262    
263     =cut
264    
265     sub child_nodes ($) { shift->{node} }
266 wakaba 1.4 sub local_name ($;$) {
267     my ($self, $newname) = @_;
268     if ($newname) {
269     $self->{local_name} = $newname;
270     }
271     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
272     $self->{local_name}->{local_name};
273     } else {
274     $self->{local_name}
275     }
276     }
277 wakaba 1.1 sub node_type ($) { shift->{type} }
278     sub parent_node ($) { shift->{parent} }
279    
280 wakaba 1.4 ## TODO: obsolete
281     sub target_name ($;$) {
282     my ($self, $new) = @_;
283     if (defined $new) {
284     $self->{target_name} = $new;
285     }
286     $self->{target_name};
287     }
288    
289     sub namespace_uri ($;$) {
290     my ($self, $new_uri) = @_;
291     if (defined $new_uri) {
292     $self->{namespace_uri} = $new_uri;
293     }
294     $self->{namespace_uri};
295     }
296 wakaba 1.5 sub namespace_prefix ($;$) {
297     my ($self, $new_pfx) = @_;
298     if (defined $new_pfx && $self->{namespace_uri}) {
299     $new_pfx .= ':' if $new_pfx;
300     $self->{namespace_prefix} = $new_pfx;
301     $self->{ns}->{$new_pfx} = $self->{namespace_uri};
302     }
303     $self->_get_namespace_prefix ($self->{namespace_uri});
304     }
305 wakaba 1.4
306 wakaba 1.1 =item $i = $x->count
307    
308     Returns the number of child nodes.
309    
310     =cut
311    
312     # TODO: support counting by type
313     sub count ($;@) {
314 wakaba 1.2 my $self = shift;
315     (defined $self->{value} ? 1 : 0) + scalar @{$self->{node}};
316 wakaba 1.1 }
317    
318     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
319     sub _get_namespace_prefix ($$;%) {
320     my ($self, $uri) = (shift, shift);
321     my %o = @_;
322     if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
323     return $p if $self->_prefix_to_uri ($p) eq $uri;
324     } if ($Namespace_URI_to_prefix{$uri}) {
325     for (@{$Namespace_URI_to_prefix{$uri}}) {
326     my $pfx = $_; $pfx .= ':' if $pfx;
327     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
328     return $self->_uri_to_prefix ($uri => $pfx, %o);
329     }
330     }
331     } else {
332     my ($u_r_i, $pfx) = ($uri);
333 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
334     my @u_r_i = split / /, $u_r_i;
335     for (reverse @u_r_i) {
336     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
337     my $p_f_x = $1 . ':';
338     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
339     unless ($self->_prefix_to_uri ($p_f_x)) {
340     $pfx = $p_f_x;
341     last;
342     }
343 wakaba 1.1 }
344     }
345     if ($pfx) {
346     return $self->_uri_to_prefix ($uri => $pfx, %o);
347     } else {
348     while (1) {
349     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
350     unless ($self->_prefix_to_uri ($pfx)) {
351     return $self->_uri_to_prefix ($uri => $pfx, %o);
352     }
353     }
354     }
355     }
356     }
357    
358 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
359     my ($self, $prefix => $uri, %o) = @_;
360     return undef unless $self->_check_namespace_prefix ($prefix);
361     $self->{ns}->{$prefix} = $uri;
362     $self->_prefix_to_uri ($prefix);
363     }
364    
365     ## TODO: removing ns declare (1.1) support
366     # $uri or undef = $x->_prefix_to_uri ($prefix)
367 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
368 wakaba 1.4 my ($self, $prefix, %o) = @_;
369 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
370     if (uc (substr $prefix, 0, 3) eq 'XML') {
371 wakaba 1.8 return $NS{xml} if $prefix eq 'xml:';
372     return $NS{xmlns} if $prefix eq 'xmlns:';
373 wakaba 1.2 }
374     if (defined $self->{ns}->{$prefix}) {
375 wakaba 1.1 $self->{ns}->{$prefix};
376     } elsif (ref $self->{parent}) {
377     shift; # $self
378     $self->{parent}->_prefix_to_uri (@_);
379     } else {
380     undef;
381     }
382     }
383    
384     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
385     # use_no_prefix (default: 1): Allow default namespace (no prefix).
386     sub _uri_to_prefix ($$;$%) {
387     my ($self, $uri, $new_prefix, %o) = @_;
388     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
389     $self->{ns}->{$new_prefix} = $uri;
390     $new_prefix;
391     } else {
392 wakaba 1.8 return 'xml:' if $uri eq $NS{xml};
393     return 'xmlns:' if $uri eq $NS{xmlns};
394 wakaba 1.1 for (keys %{$self->{ns}||{}}) {
395     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
396     return $_ if $self->{ns}->{$_} eq $uri;
397     }
398 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
399 wakaba 1.1 shift; # $self
400     $self->{parent}->_uri_to_prefix (@_);
401     } else {
402     undef;
403     }
404     }
405     }
406    
407     =item $x->define_new_namespace ($prefix => $uri)
408    
409     Defines a new XML Namespace. This method is useful for root or section-level
410     element node.
411    
412     Returned value is unspecified in this version of this module.
413    
414     =cut
415    
416 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
417 wakaba 1.1 sub define_new_namespace ($$$) {
418     my ($self, $prefix, $uri) = @_;
419     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
420     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
421 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
422 wakaba 1.1 } else {
423     undef;
424     }
425     }
426    
427 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
428    
429     Query whether the namespace prefix is defined or not.
430     If defined, return namespace name (URI).
431    
432     =cut
433    
434     sub defined_namespace_prefix ($$) {
435     my ($self, $prefix) = @_;
436     $prefix .= ':' if $prefix;#Carp::carp join ",", ';',@_;
437     $self->_prefix_to_uri ($prefix);
438     }
439    
440 wakaba 1.1 =item $qname = $x->qname
441    
442     Returns QName ((namespace-)qualified name) of the element type.
443     Undef is retuened when the type does not have its QName
444     (ie. when type is neither C<#element> or C<#attribute>).
445    
446     =cut
447    
448     sub qname ($) {
449     my $self = shift;
450     if ($self->_check_ncname ($self->{local_name})) {
451     if ($self->{type} eq '#element') {
452     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
453     unless $self->{_qname};
454     return $self->{_qname};
455     } elsif ($self->{type} eq '#attribute') {
456     return $self->attribute_name;
457     }
458     }
459     undef;
460     }
461    
462 wakaba 1.9 sub merge_external_subset ($) {
463     my $self = shift;
464     unless ($self->{type} eq '#declaration' && $self->{namespace_uri} eq $NS{SGML}.'doctype') {
465     return unless $self->{type} eq '#document' || $self->{type} eq '#fragment';
466     for (@{$self->{node}}) {
467     $_->merge_external_subset;
468     }
469     return;
470     }
471     my $xsub = $self->get_attribute ('external-subset');
472     return unless ref $xsub;
473     for (@{$xsub->{node}}) {
474     $_->{parent} = $self;
475     }
476     push @{$self->{node}}, @{$xsub->{node}};
477     $self->remove_child_node ($xsub);
478     $self->remove_child_node ($self->get_attribute ('PUBLIC'));
479     $self->remove_child_node ($self->get_attribute ('SYSTEM'));
480     $self->remove_marked_section;
481     }
482    
483     sub remove_marked_section ($) {
484     my $self = shift;
485     my @node;
486     for (@{$self->{node}}) {
487     $_->remove_marked_section;
488     }
489     for (@{$self->{node}}) {
490     if ($_->{type} ne '#section') {
491     push @node, $_;
492     } else {
493     my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text;
494     if ($status eq 'CDATA') {
495     $_->{type} = '#text';
496     $_->remove_child_node ($_->get_attribute ('status'));
497     push @node, $_;
498     } elsif ($status ne 'IGNORE') { # INCLUDE
499     for my $e (@{$_->{node}}) {
500     if ($e->{type} ne '#attribute') {
501     $e->{parent} = $self;
502     push @node, $e;
503     }
504     }
505     }
506     }
507     }
508     $self->{node} = \@node;
509     }
510    
511 wakaba 1.7 sub remove_references ($) {
512     my $self = shift;
513     my @node;
514 wakaba 1.8 for (@{$self->{node}}) {
515     $_->remove_references;
516     }
517 wakaba 1.7 for (@{$self->{node}}) {
518     if ($_->{type} ne '#reference'
519     || ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
520     push @node, $_;
521     } else {
522     if ($_->{namespace_uri} =~ /char/) {
523     my $e = ref ($_)->new (type => '#text', value => chr $_->{value});
524     $e->{parent} = $self;
525     push @node, $e;
526     } elsif ($_->{flag}->{smxp__ref_expanded}) {
527     for my $e (@{$_->{node}}) {
528 wakaba 1.8 if ($e->{type} ne '#attribute') {
529     $e->{parent} = $self;
530     push @node, $e;
531     }
532 wakaba 1.7 }
533     } else { ## reference is not expanded
534     push @node, $_;
535     }
536     }
537 wakaba 1.8 $_->{flag}->{smxp__defined_with_param_ref} = 0
538     if $_->{flag}->{smxp__defined_with_param_ref};
539 wakaba 1.7 }
540     $self->{node} = \@node;
541     }
542    
543 wakaba 1.8 sub resolve_relative_uri ($;$%) {
544     require URI;
545     my ($self, $rel, %o) = @_;
546     my $base = $self->get_attribute ('base', namespace_uri => $NS{xml});
547     $base = ref ($base) ? $base->inner_text : undef;
548     if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative
549     $base = $self->_resolve_relative_uri_by_parent ($base, \%o);
550     }
551     eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...)
552     return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false
553     } or return $rel;
554     }
555     sub _resolve_relative_uri_by_parent ($$$) {
556     my ($self, $rel, $o) = @_;
557     if (ref $self->{parent}) {
558     if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') {
559     ## This case is necessary to work with
560     ## <element> <!-- element can have base URI -->
561     ## text <!-- text cannot have base URI -->
562     ## &ent; <!-- ref's base URI is referred entity's one (in this module) -->
563     ## <!-- expantion of ent -->
564     ## entity's text <!-- text cannot have base URI, so use <element>'s one -->
565     ## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one -->
566     ## </element>
567     return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o);
568     } else {
569     return $self->{parent}->resolve_relative_uri ($rel, %$o);
570     }
571     } else {
572     return $rel;
573     }
574     }
575     sub base_uri ($;$) {
576     my ($self, $new_uri) = @_;
577     my $base;
578     if (defined $new_uri) {
579     $base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml});
580     }
581     $base ||= $self->get_attribute ('base', namespace_uri => $NS{xml});
582     ref ($base) ? $base->inner_text : undef;
583     }
584    
585 wakaba 1.1 =item $tag = $x->start_tag
586    
587 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
588 wakaba 1.1 for C<#comment> nodes).
589    
590     =cut
591    
592     sub start_tag ($) {
593     my $self = shift;
594     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
595     my $r = '<';
596     $r .= $self->qname;
597     for (@{$self->{node}}) {
598     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
599     }
600     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
601     if ($prefix) {
602     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
603     } else {
604     $r .= ' xmlns';
605     }
606     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
607     }
608     $r .= '>';
609     $r;
610     } elsif ($self->{type} eq '#comment') {
611 wakaba 1.4 '<!--';
612     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
613     '<?' . ($self->{local_name});
614 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
615     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
616     '&#x';
617     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
618     '&#';
619 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
620     if ($self->{local_name}->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
621 wakaba 1.2 '%';
622     } else {
623     '&';
624     }
625 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
626 wakaba 1.2 if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
627     '%';
628     } else {
629     '&';
630     }
631     } else { # error
632     '';
633     }
634 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
635     '<!' . {
636 wakaba 1.8 $NS{SGML}.'attlist' => 'ATTLIST',
637     $NS{SGML}.'doctype' => 'DOCTYPE',
638     $NS{SGML}.'element' => 'ELEMENT',
639     $NS{SGML}.'entity' => 'ENTITY',
640     $NS{SGML}.'entity:parameter' => 'ENTITY',
641     $NS{SGML}.'notation' => 'NOTATION',
642 wakaba 1.7 }->{$self->{namespace_uri}} . ' ' . ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ?
643     ($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):'');
644 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
645     my $r = '<!' . $self->{local_name} . ' ';
646     if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) {
647     my $qname;
648     for (@{$self->{parent}->{node}}) {
649     if ($_->{type} eq '#element') {
650     $qname = $_->qname;
651     last if $qname;
652     }
653     }
654     $r .= ($qname ? $qname : '#IMPLIED') . ' ';
655 wakaba 1.2 }
656     $r;
657     } elsif ($self->{type} eq '#section') {
658 wakaba 1.9 '<![';
659 wakaba 1.1 } else {
660     '';
661     }
662     }
663    
664     =item $tag = $x->end_tag
665    
666 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
667 wakaba 1.1 for C<#comment> nodes).
668    
669     =cut
670    
671     sub end_tag ($) {
672     my $self = shift;
673     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
674     '</' . $self->qname . '>';
675     } elsif ($self->{type} eq '#comment') {
676 wakaba 1.4 '-->';
677 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
678     '?>';
679 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
680     ';';
681 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
682     '>';
683 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
684 wakaba 1.4 '>';
685 wakaba 1.2 } elsif ($self->{type} eq '#section') {
686 wakaba 1.9 ']]>';
687 wakaba 1.1 } else {
688     '';
689     }
690     }
691    
692     =item $tag = $x->attribute_name
693    
694     Returns the attribute name.
695    
696     =cut
697    
698     sub attribute_name ($) {
699     my $self = shift;
700     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
701     ($self->{namespace_uri} ?
702     (ref $self->{parent} ? $self->{parent} : $self)
703     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
704     .$self->{local_name};
705     } else {
706     '';
707     }
708     }
709    
710     =item $tag = $x->attribute_value
711    
712     Returns the attribute value.
713    
714     =cut
715    
716 wakaba 1.4 sub attribute_value ($;%) {
717 wakaba 1.1 my $self = shift;
718 wakaba 1.4 my %o = @_;
719 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
720 wakaba 1.5 my $r = '"';
721 wakaba 1.8 if (ref ($self->{value}) && eval q($self->{value}->_CLASS_NAME eq $self->_CLASS_NAME)) {
722 wakaba 1.5 unshift @{$self->{node}}, $self->{value};
723     undef $self->{value};
724     } else {
725     $r .= $self->_entitize ($self->{value});
726     }
727 wakaba 1.4 for (@{$self->{node}}) {
728     my $nt = $_->node_type;
729     if ($nt eq '#reference' || $nt eq '#xml') {
730     $r .= $_->outer_xml;
731     } elsif ($nt ne '#attribute') {
732     $r .= $self->_entitize ($_->inner_text, percent => $o{escape_percent});
733     }
734     }
735     return $r . '"';
736     } else {
737     '';
738     }
739     }
740    
741     sub entity_value ($;%) {
742     my $self = shift;
743     my %o = @_;
744     my $_entitize = sub {
745     my $s = shift;
746 wakaba 1.7 $s =~ s/&/&#x26;/g;
747     $s =~ s/&#x26;(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g;
748 wakaba 1.4 $s =~ s/%/&#x25;/g;
749 wakaba 1.7 $s =~ s/"/&#x22;/g;
750 wakaba 1.4 $s;
751     };
752     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
753     my $r = '"' . &$_entitize ($self->{value});
754     for (@{$self->{node}}) {
755     my $nt = $_->node_type;
756     if ($nt eq '#reference' || $nt eq '#xml') {
757     $r .= $_->outer_xml;
758     } elsif ($nt ne '#attribute') {
759     $r .= &$_entitize ($_->inner_text);
760 wakaba 1.1 }
761     }
762 wakaba 1.4 return $r . '"';
763 wakaba 1.1 } else {
764     '';
765     }
766     }
767    
768 wakaba 1.7 ## This method should be called only from SuikaWiki::Markup::XML::* family modules,
769     ## since this is NOT a FORMAL interface.
770     sub _entity_parameter_literal_value ($;%) {
771     my $self = shift;
772     my $r = $self->{value};
773     for (@{$self->{node}}) {
774     my $nt = $_->node_type;
775 wakaba 1.8 #if ($nt eq '#reference' || $nt eq '#xml') {
776     if ($nt eq '#xml') {
777 wakaba 1.7 $r .= $_->outer_xml;
778     } elsif ($nt ne '#attribute') {
779     $r .= $_->inner_text;
780     }
781     }
782     return $r;
783     }
784    
785 wakaba 1.1 =item $tag = $x->attribute
786    
787     Returns the attribute (name and value pair).
788    
789     =cut
790    
791     sub attribute ($) {
792     my $self = shift;
793     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
794     $self->attribute_name . '=' . $self->attribute_value;
795     } else {
796     '';
797     }
798     }
799    
800 wakaba 1.2 sub external_id ($;%) {
801     my $self = shift;
802     my %o = @_;
803     my ($pubid, $sysid, $ndata);
804     for (@{$self->{node}}) {
805 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
806 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
807 wakaba 1.4 $pubid = $_->inner_text;
808 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
809 wakaba 1.4 $sysid = $_->inner_text;
810 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
811 wakaba 1.4 $ndata = $_->inner_text;
812     undef $ndata unless $self->_check_ncname ($ndata);
813 wakaba 1.2 }
814     }
815     }
816     my $r = '';
817 wakaba 1.9 if (defined $pubid) {
818 wakaba 1.4 $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
819 wakaba 1.9 $pubid = '"' . $pubid . '"';
820 wakaba 1.4 }
821 wakaba 1.9 if (defined $sysid) {
822 wakaba 1.4 if ($sysid =~ /"/) {
823     if ($sysid =~ /'/) {
824     $sysid =~ s/"/&quot;/; $sysid = '"' . $sysid . '"';
825     } else {
826     $sysid = "'" . $sysid . "'";
827     }
828     } else {
829     $sysid = '"' . $sysid . '"';
830     }
831     }
832 wakaba 1.2 if ($pubid && $sysid) {
833 wakaba 1.9 $r = 'PUBLIC ' . $pubid . ' ' . $sysid;
834 wakaba 1.2 } elsif ($sysid) {
835 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
836 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
837 wakaba 1.9 $r = 'PUBLIC ' . $pubid;
838 wakaba 1.2 }
839     if ($r && $ndata && $o{use_ndata}) {
840 wakaba 1.4 $r .= ' NDATA ' . $ndata;
841 wakaba 1.2 }
842     $r;
843     }
844    
845     =item $s = $x->content_spec
846    
847     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
848     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
849    
850     =cut
851    
852     sub content_spec ($) {
853     my $self = shift;
854     if ($self->{type} eq '#element') {
855     my $text = 0;
856     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
857     $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
858    
859     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
860     } elsif ($self->{type} eq '#attribute') {
861     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
862     my $default = $self->{default_decl};
863     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
864     unless ($default) {
865     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
866     }
867     return $attdef . $default;
868     }
869     }
870    
871 wakaba 1.1 =item $tag = $x->inner_xml
872    
873     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
874     element content without start- and end-tags is returned.)
875    
876     Note that for not all node types the behavior of this method is defined.
877     For example, returned value of C<#attribute> might be unexpected one
878     in this version of this module.
879    
880     =cut
881    
882 wakaba 1.3 sub inner_xml ($;%) {
883 wakaba 1.1 my $self = shift;
884 wakaba 1.3 my %o = @_;
885 wakaba 1.1 my $r = '';
886     if ($self->{type} eq '#comment') {
887     $r = $self->inner_text;
888     $r =~ s/--/-&#x45;/g;
889     } elsif ($self->{type} eq '#pi') {
890     if (length $self->{value}) {
891     $r = ' ' . $self->{value};
892 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
893 wakaba 1.1 }
894     for (@{$self->{node}}) {
895     if ($_->node_type eq '#attribute') {
896     $r .= ' ' . $_->attribute;
897     } else {
898     my $s = $_->inner_text;
899     $s =~ s/\?>/?&gt;/g;
900     $r .= ' ' . $s if length $s;
901     }
902     }
903 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
904     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
905     $r = sprintf '%02X', $self->{value};
906     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
907     $r = sprintf '%02d', $self->{value};
908 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
909     $r = $self->{local_name}->{local_name};
910     } elsif ($self->_check_ncname ($self->{local_name})) {
911     $r = ($self->{local_name});
912 wakaba 1.2 } else { # error
913     $r = '';
914     }
915 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
916 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'doctype') {
917 wakaba 1.4 my $root = $self->get_attribute ('qname');
918     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
919     for (@{$self->{parent}->{node}}) {
920     if ($_->{type} eq '#element') {
921     $root = $_->qname;
922     last if $root;
923     }
924     }
925     $root = '#IMPLIED';
926     }) : ($root = '#IMPLIED')); ## error!
927     my ($isub, $xid) = ('', $self->external_id);
928     for (@{$self->{node}}) {
929     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
930 wakaba 1.2 }
931     if ($xid) {
932 wakaba 1.4 $r = $xid;
933     if ($isub) {
934 wakaba 1.7 $r .= " [" . $isub . "]";
935 wakaba 1.4 }
936     } else {
937     if ($isub) {
938 wakaba 1.7 $r = "[" . $isub . "]";
939 wakaba 1.4 } else {
940     $r = "[]";
941     }
942     }
943     $r = $root . ' ' . $r;
944 wakaba 1.9 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity'
945     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter'
946     || $self->{namespace_uri} eq $NS{SGML}.'notation') {
947 wakaba 1.4 my %xid_opt;
948 wakaba 1.7 $r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref}
949     && $self->_check_ncname ($self->{local_name});
950 wakaba 1.8 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
951 wakaba 1.4 #$r = '% ' . $r;
952 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') {
953 wakaba 1.4 $xid_opt{use_ndata} = 1;
954 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') {
955 wakaba 1.4 $xid_opt{allow_pubid_only} = 1;
956     }
957    
958     my ($v, $xid) = ($self->{value}, $self->external_id (%xid_opt));
959 wakaba 1.7 undef $xid if $self->{flag}->{smxp__defined_with_param_ref};
960 wakaba 1.4 if ($xid) { ## External ID
961 wakaba 1.2 $r .= $xid;
962 wakaba 1.4 } else { ## EntityValue
963     my $entity_value = $self->get_attribute ('value');
964 wakaba 1.7 undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref};
965 wakaba 1.4 if ($entity_value) { # <!ENTITY foo "bar">
966     $r .= $entity_value->entity_value;
967     } else { ## Parameter entity reference
968 wakaba 1.9 my $isc = $self->_is_same_class ($self->{value});
969     $r .= $self->{value} unless $isc;
970     for (($isc?$self->{value}:()), @{$self->{node}}) {
971 wakaba 1.7 $r .= $_->outer_xml unless $_->{type} eq '#attribute';
972 wakaba 1.4 }
973 wakaba 1.2 }
974 wakaba 1.1 }
975 wakaba 1.8 } elsif ($self->{namespace_uri} eq $NS{SGML}.'element') {
976 wakaba 1.4 my $qname = $self->get_attribute ('qname');
977     ref $qname ? $qname = $qname->inner_text : undef;
978     if ($qname && $self->_check_name ($qname)) {
979     ## Element type name is defined
980     $r = $qname . ' ';
981 wakaba 1.3 if ($o{output_tag_omit_declaration}) {
982 wakaba 1.4 $r .= ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ')
983     .
984     ($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- ');
985 wakaba 1.3 }
986 wakaba 1.4 my $rs = $self->inner_text (output_ref_as_is => 1);
987     $r .= $rs || 'ANY';
988 wakaba 1.2 } else {
989 wakaba 1.4 #if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') {
990     ### Element prototype is given
991     # $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec;
992     #} elsif () {
993     ### Element type name and contentspec is given
994     # $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY');
995     #} else {
996 wakaba 1.2 ## (Element type name and contetnspac) is given
997 wakaba 1.4 $r .= $self->inner_text (output_ref_as_is => 1);
998     # || 'Name ANY'; # error
999     #}
1000 wakaba 1.2 }
1001 wakaba 1.7 ## TODO: reform
1002 wakaba 1.2 } elsif ($self->{local_name} eq 'ATTLIST') {
1003     if ($self->_check_name ($self->{target_name})) {
1004     $r = $self->{target_name};
1005     }
1006     my $t = $self->inner_text (output_ref_as_is => 1);
1007     $r .= "\n\t" . $t if $t;
1008     $r ||= 'Name'; # error!
1009     for (@{$self->{node}}) {
1010     if ($_->{type} eq '#attribute') {
1011     $r .= "\n\t" . $_->content_spec;
1012     }
1013 wakaba 1.1 }
1014 wakaba 1.2 } else { # unknown
1015 wakaba 1.3 for (@{$self->{node}}) {
1016     $r .= $_->outer_xml;
1017     }
1018 wakaba 1.1 }
1019 wakaba 1.9 } elsif ($self->{type} eq '#section') {
1020     my $status = $self->get_attribute ('status', make_new_node => 1)->inner_text;
1021     if ($status eq 'CDATA') {
1022     $r = $self->inner_text;
1023     $r =~ s/]]>/]]>]]<![CDATA[>/g;
1024     $r = 'CDATA['.$r;
1025     } else {
1026     my $sl = $self->get_attribute ('status_list', make_new_node => 1);
1027     if ($sl->{flag}->{smxp__defined_with_param_ref}) {
1028     my $isc = $self->_is_same_class ($self->{value});
1029     $status = '';
1030     $status = $sl->{value} unless $isc;
1031     for (($isc?$sl->{value}:()), @{$sl->{node}}) {
1032     $status .= $_->outer_xml unless $_->{type} eq '#attribute';
1033     }
1034     $r = $status.'['.$r;
1035     } elsif ($status) {
1036     $r = $status.'['.$r;
1037     } else {
1038     ## Must be an ignored section
1039     }
1040     my $isc = $self->_is_same_class ($self->{value});
1041     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $self->{value} unless $isc;
1042     for (($isc?$self->{value}:()), @{$self->{node}}) {
1043     if ($_->{type} eq '#text') {
1044     $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $_->inner_text; ## Anyway, this is error
1045     } elsif ($_->{type} ne '#attribute') {
1046     $r .= $_->outer_xml;
1047     }
1048     }
1049     }
1050 wakaba 1.1 } else {
1051     if ($self->{type} ne '#xml') {
1052     $r = $self->_entitize ($self->{value});
1053     } else {
1054     $r = $self->{value};
1055     }
1056     for (@{$self->{node}}) {
1057     my $nt = $_->node_type;
1058 wakaba 1.4 if (($self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
1059 wakaba 1.1 $r .= "\n";
1060     }
1061     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
1062     }
1063     }
1064     $r;
1065     }
1066    
1067    
1068     =item $tag = $x->outer_xml
1069    
1070     Returns the node in XML syntax.
1071    
1072     =cut
1073    
1074     sub outer_xml ($) {
1075     my $self = shift;
1076     if ($self->{type} eq '#attribute') {
1077     $self->attribute;
1078     } else {
1079 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
1080 wakaba 1.1 my $r = $self->start_tag;
1081     my $c = $self->inner_xml;
1082 wakaba 1.4 if (!length $c && $self->{option}->{use_EmptyElemTag}) {
1083     substr ($r, -1) = ' />';
1084     } else {
1085     if ($c) {
1086     $c =~ s/\n/\n /g;
1087     $r .= "\n " . $c . "\n";
1088     }
1089     $r .= $self->end_tag;
1090 wakaba 1.1 }
1091     $r;
1092     } else {
1093 wakaba 1.4 my $r = $self->start_tag;
1094     my $c = $self->inner_xml;
1095 wakaba 1.8 if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) {
1096 wakaba 1.4 substr ($r, -1) = ' />';
1097     } else {
1098     $r .= $c . $self->end_tag;
1099     #$r .= "\n" if $self->{type} eq '#declaration';
1100     }
1101 wakaba 1.2 $r;
1102     #'{'.$self->{type}.': '.$r.'}'; # for debug
1103 wakaba 1.1 }
1104     }
1105     }
1106    
1107     =item $tag = $x->inner_text
1108    
1109     Returns the text content of the node. (In many case the returned value is same
1110     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
1111     But some classes that inherits this module might implement to return other
1112     value (eg. to return the value of the alt attribute of html:img element).
1113    
1114 wakaba 1.2 Available options: C<output_ref_as_is>.
1115    
1116 wakaba 1.1 =cut
1117    
1118 wakaba 1.2 sub inner_text ($;%) {
1119 wakaba 1.1 my $self = shift;
1120 wakaba 1.2 my %o = @_;
1121 wakaba 1.5 my $r = '';
1122 wakaba 1.7 if ($self->{type} eq '#reference' && #) {
1123     #if (
1124 wakaba 1.8 ($self->{namespace_uri} eq $NS{SGML}.'char:ref' ||
1125     $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) {
1126 wakaba 1.7 $r = chr $self->{value};
1127     #} else { # general entity ref. or parameter entity ref.
1128     # ## TODO: how implement? is this ok?
1129     # my $em = $self->_get_entity_manager;
1130     # $r = $em->get_entity_value ($self->{local_name}, namespace_uri => $self->{namespace_uri});
1131     #}
1132     } elsif ($self->{type} eq '#declaration'
1133     && ($self->{namespace_uri} eq $NS{SGML}.'entity'
1134     || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) {
1135     ## TODO:
1136     $r = $self->get_attribute ('value')->inner_text;
1137     } else { # not #reference nor #declaration(ENTITY)
1138 wakaba 1.8 if (ref ($self->{value}) && eval q($self->{value}->_CLASS_NAME eq $self->_CLASS_NAME)) {
1139 wakaba 1.7 unshift @{$self->{node}}, $self->{value};
1140     undef $self->{value};
1141     } else {
1142     $r = $self->{value};
1143     }
1144     if ($o{output_ref_as_is}) { ## output as if RCDATA
1145     $r =~ s/&/&amp;/g;
1146     for my $node (@{$self->{node}}) {
1147     my $nt = $node->node_type;
1148     if ($nt eq '#reference' || $nt eq '#xml') {
1149     $r .= $node->outer_xml;
1150     } elsif ($nt ne '#attribute') {
1151     $r .= map {s/&/&amp;/g; $_} $node->inner_text;
1152     }
1153     }
1154     } else {
1155     for (@{$self->{node}}) {
1156     $r .= $_->inner_text unless $_->node_type eq '#attribute';
1157 wakaba 1.2 }
1158     }
1159 wakaba 1.1 }
1160     $r;
1161     }
1162    
1163 wakaba 1.9 sub _is_same_class ($$) {
1164     my ($self, $something) = @_;
1165     return 0 if !ref $something
1166     || ref ($something) eq 'ARRAY'
1167     || ref ($something) eq 'HASH'
1168     || ref ($something) eq 'CODE';
1169     if (eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME}) {
1170     return 1;
1171     } else {
1172     return 0;
1173     }
1174     }
1175    
1176     sub root_node ($) {
1177     my $self = shift;
1178     if ($self->{type} eq '#document') {
1179     return $self;
1180     } elsif (ref $self->{parent}) {
1181     return $self->{parent}->root_node;
1182     } else {
1183     return $self;
1184     }
1185 wakaba 1.1 }
1186    
1187 wakaba 1.7 sub _get_entity_manager ($) {
1188     my $self = shift;
1189     if ($self->{type} eq '#document') {
1190     unless ($self->{flag}->{smx__entity_manager}) {
1191     require SuikaWiki::Markup::XML::EntityManager;
1192     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1193     }
1194     return $self->{flag}->{smx__entity_manager};
1195     } elsif (ref $self->{parent}) {
1196     return $self->{parent}->_get_entity_manager;
1197     } else {
1198     unless ($self->{flag}->{smx__entity_manager}) {
1199     require SuikaWiki::Markup::XML::EntityManager;
1200     $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1201     }
1202     return $self->{flag}->{smx__entity_manager};
1203     }
1204     }
1205    
1206 wakaba 1.5 sub _CLASS_NAME ($) {
1207     'SuikaWiki::Markup::XML';
1208     }
1209    
1210 wakaba 1.1 # $s = $x->_entitize ($s)
1211 wakaba 1.4 sub _entitize ($$;%) {
1212     my ($self, $s, %o) = (shift, shift, @_);
1213 wakaba 1.1 $s =~ s/&/&amp;/g;
1214     $s =~ s/</&lt;/g;
1215     $s =~ s/>/&gt;/g;
1216     $s =~ s/"/&quot;/g;
1217 wakaba 1.4 $s =~ s/%/&#x25;/g if $o{percent};
1218     $s =~ s/'/&#x27;/g if $o{apos};
1219 wakaba 1.7 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&amp;#%d;', ord $1/ge;
1220 wakaba 1.1 $s;
1221     }
1222    
1223 wakaba 1.2 # 1/0 = $x->_check_name ($s)
1224     sub _check_name ($$) {
1225     my $self = shift;
1226     my $s = shift;
1227     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1228     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1229     # \p{...}('*'/'+'/'{n,}') does not work...
1230     $Cache{name}->{$s} = 1;
1231     1;
1232     } else {
1233     $Cache{name}->{$s} = 0;
1234     0;
1235     }
1236     }
1237 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
1238     sub _check_ncname ($$) {
1239     my $self = shift;
1240     my $s = shift;
1241     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1242     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1243     # \p{...}('*'/'+'/'{n,}') does not work...
1244     $Cache{ncname}->{$s} = 1;
1245     1;
1246     } else {
1247     $Cache{ncname}->{$s} = 0;
1248     0;
1249     }
1250     }
1251    
1252     # 1/0 = $x->_check_namespace_prefix ($s)
1253     sub _check_namespace_prefix ($$) {
1254     my $self = shift;
1255     my $s = shift;
1256     return 0 unless defined $s;
1257     return 1 if $s eq '';
1258     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1259     $self->_check_ncname ($s);
1260     }
1261    
1262 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1263     ## multiple parents.
1264     ## TODO: normalize namespace URI (removing non URI chars)
1265    
1266     sub flag ($$;$) {
1267     my ($self, $name, $value) = @_;
1268     if (defined $value) {
1269     $self->{flag}->{$name} = $value;
1270     }
1271     $self->{flag}->{$name};
1272     }
1273    
1274 wakaba 1.3 sub option ($$;$) {
1275     my ($self, $name, $value) = @_;
1276     if (defined $value) {
1277     $self->{option}->{$name} = $value;
1278     }
1279     $self->{option}->{$name};
1280     }
1281    
1282 wakaba 1.1 =back
1283    
1284 wakaba 1.2 =head1 NODE TYPES
1285    
1286     =over 4
1287    
1288     =item #attribute
1289    
1290     Attribute. Its XML representation takes the form of NAME="VALUE".
1291    
1292     =item #comment
1293    
1294     Comment declarement. <!-- -->
1295    
1296     =item #declarement
1297    
1298     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1299     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1300    
1301     =item #element
1302    
1303     Element. Its XML representation consists of start tag, content and end tag,
1304     like <TYPE>content</TYPE>.
1305    
1306     =item #fragment
1307    
1308     Fragment of nodes. It's similar to DOM's fragment node.
1309    
1310     =item #pi
1311    
1312     Prosessing instruction. <?NAME VALUE?>
1313    
1314     =item #reference
1315    
1316     Character reference or general or parameter entity reference.
1317     &#nnnn;, &#xhhhh;, &name;, %name;.
1318    
1319     =item #section
1320    
1321     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1322     <![%type;[...]]>
1323    
1324     =item #text
1325    
1326     Text.
1327    
1328     =item #xml
1329    
1330     Preformatted XML text.
1331    
1332     =cut
1333    
1334 wakaba 1.4 =head1 RESTRICTIONS
1335    
1336     =over 4
1337    
1338     =item XML without XML Namespace is not supported.
1339    
1340     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1341    
1342     For example, next code generates invalid (non-well-formed) XML Namespace document.
1343    
1344     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1345     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1346    
1347     So you must write like:
1348    
1349     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1350     $x->define_new_namespace ('' => '');
1351     print $x; # <elementType xmlns=""></elementType>
1352    
1353     =back
1354    
1355 wakaba 1.1 =head1 LICENSE
1356    
1357     Copyright 2003 Wakaba <w@suika.fam.cx>
1358    
1359     This program is free software; you can redistribute it and/or
1360     modify it under the same terms as Perl itself.
1361    
1362     =cut
1363    
1364 wakaba 1.9 1; # $Date: 2003/06/27 13:07:44 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24