/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (show annotations) (download)
Fri Jan 16 08:23:20 2004 UTC (21 years, 6 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.16: +2 -2 lines
FILE REMOVED
Removed

1
2 =head1 NAME
3
4 SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5
6 =head1 DESCRIPTION
7
8 This module can be used to generate the document fragment of XML, SGML or
9 other well-formed (in XML meaning) data formats with the object oriented manner.
10
11 This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12 by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13 of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14 markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15
16 =cut
17
18 package SuikaWiki::Markup::XML;
19 use strict;
20 our $VERSION = do{my @r=(q$Revision: 1.16 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 use overload '""' => \&outer_xml,
22 fallback => 1;
23 use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 use Carp;
25 Carp::carp "Obsoleted module SuikaWiki::Markup::XML loaded";
26
27 our %Namespace_URI_to_prefix = (
28 'DAV:' => [qw/dav webdav/],
29 'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
30 'http://icl.com/saxon' => [qw/saxon/],
31 'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
32 'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
33 'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
34 'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
35 'http://www.mozilla.org/xbl' => ['', qw/xbl/],
36 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
37 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
38 'http://www.w3.org/1999/xlink' => [qw/l xlink/],
39 'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
40 'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
41 'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
42 'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
43 'http://www.w3.org/2000/svg' => ['', qw/s svg/],
44 'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
45 'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
46 'http://www.w3.org/2002/07/owl' => [qw/owl/],
47 'http://www.w3.org/2002/xforms/cr' => [qw/f xforms/],
48 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
49 'http://www.wapforum.org/2001/wml' => [qw/wap/],
50 'http://xml.apache.org/xalan' => [qw/xalan/],
51 'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
52 'urn:schemas-microsoft-com:vml' => [qw/v vml/],
53 'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
54 'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
55 'urn:x-suika-fam-cx:markup:ietf:rfc:2629' => ['', qw/rfc rfc2629/],
56 );
57 my %Cache;
58 our %NS = (
59 SGML => 'urn:x-suika-fam-cx:markup:sgml:',
60 XML => 'urn:x-suika-fam-cx:markup:xml:',
61 internal_attr_duplicate => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/invalid-attr#',
62 internal_invalid_sysid => 'http://system.identifier.invalid/',
63 internal_ns_invalid => 'http://suika.fam.cx/~wakaba/-temp/2003/05/17/unknown-namespace#',
64 xml => 'http://www.w3.org/XML/1998/namespace',
65 xmlns => 'http://www.w3.org/2000/xmlns/',
66 );
67
68 =head1 METHODS
69
70 =over 4
71
72 =item $x = SuikaWiki::Markup::XML->new (%options)
73
74 Returns new instance of the module. It is itself a node.
75
76 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
77
78 =cut
79
80 sub new ($;%) {
81 my $class = shift;
82 my $self = bless {@_}, $class;
83 $self->{type} ||= '#element';
84 if ($self->{qname}) {
85 ($self->{namespace_prefix}, $self->{local_name}) = $self->_ns_parse_qname ($self->{qname});
86 $self->{_qname} = $self->{qname};
87 }
88 if (defined $self->{namespace_prefix}) {
89 $self->{namespace_prefix} .= ':' if $self->{namespace_prefix}
90 && substr ($self->{namespace_prefix}, -1) ne ':';
91 $self->{ns}->{$self->{namespace_prefix}||''} = $self->{namespace_uri}
92 if defined $self->{namespace_uri};
93 }
94 for (qw/local_name value/) {
95 if ($self->_is_same_class ($self->{$_})) {
96 $self->{$_}->{parent} = $self;
97 }
98 }
99 $self->{node} ||= [];
100 $self;
101 }
102
103 sub _ns_parse_qname ($$) {
104 my $qname = $_[1];
105 if ($qname =~ /:/) {
106 return split /:/, $qname, 2;
107 } else {
108 return (undef, $qname);
109 }
110 }
111
112 =item $x->append_node ($node)
113
114 Appending given node to the object (as the last child).
115 If the type of given node is C<#fragment>, its all children, not the node
116 itself, are appended.
117
118 This method returns the appended node unless the type of given node is C<#fragment>.
119 In such cases, this node (C<$x>) is returned.
120
121 Available options: C<node_or_text>.
122
123 =cut
124
125 sub append_node ($$;%) {
126 my $self = shift;
127 my ($new_node, %o) = @_;
128 unless (ref $new_node) {
129 if ($o{node_or_text}) {
130 return $self->append_text ($new_node);
131 } else {
132 die "append_node: Invalid node";
133 }
134 }
135 if ($new_node->{type} eq '#fragment') {
136 for (@{$new_node->{node}}) {
137 push @{$self->{node}}, $_;
138 $_->{parent} = $self;
139 }
140 $self;
141 } else {
142 push @{$self->{node}}, $new_node;
143 $new_node->{parent} = $self;
144 $new_node;
145 }
146 }
147
148 =item $new_node = $x->append_new_node (%options)
149
150 Appending a new node. The new node is returned.
151
152 Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
153
154 =cut
155
156 sub append_new_node ($;%) {
157 my $self = shift;
158 my $new_node = __PACKAGE__->new (@_);
159 push @{$self->{node}}, $new_node;
160 $new_node->{parent} = $self;
161 $new_node;
162 }
163
164 =item $new_node = $x->append_text ($text)
165
166 Appending given text as a new text node. The new text node is returned.
167
168 =cut
169
170 sub append_text ($$;%) {
171 my $self = shift;
172 my $s = shift;
173 $self->append_new_node (type => '#text', value => $s);
174 }
175
176 sub append_baretext ($$;%) {
177 my $self = shift;
178 my $s = shift;
179 $self->append_new_node (type => '#xml', value => $s);
180 }
181
182 sub remove_child_node ($$) {
183 my ($self, $node) = @_;
184 return unless ref $node;
185 $node = overload::StrVal ($node);
186 $self->{node} = [grep { overload::StrVal ($_) ne $node } @{$self->{node}}];
187 }
188
189 =item $attr_node = $x->get_attribute ($local_name, %options)
190
191 Returns the attribute node whose local-name is C<$local_name>.
192
193 Available options: C<namespace_uri>, C<make_new_node>.
194
195 =cut
196
197 sub get_attribute ($$;%) {
198 my ($self, $name, %o) = @_;
199 for (@{$self->{node}}) {
200 if ($_->{type} eq '#attribute'
201 && $_->{local_name} eq $name
202 && $o{namespace_uri} eq $_->{namespace_uri}) {
203 return $_;
204 }
205 }
206 ## Node is not exist
207 if ($o{make_new_node}) {
208 return $self->append_new_node (type => '#attribute', local_name => $name,
209 namespace_uri => $o{namespace_uri});
210 } else {
211 return undef;
212 }
213 }
214
215 =item $attr_node = $x->set_attribute ($local_name => $value, %options)
216
217 Set the value of the attribute. The attribute node is returned.
218
219 Available options: C<namespace_uri>.
220
221 =cut
222
223 sub set_attribute ($$$;%) {
224 my ($self, $name, $val, %o) = @_;
225 if ({qw/ARRAY 1 HASH 1 CODE 1/}->{ref ($val)}) {
226 ## TODO: common error handling
227 require Carp;
228 Carp::croak "set_attribute: new attribute value must be string or blessed object";
229 }
230 for (@{$self->{node}}) {
231 if ($_->{type} eq '#attribute'
232 && $_->{local_name} eq $name
233 && $o{namespace_uri} eq $_->{namespace_uri}) {
234 $_->{value} = $val;
235 $_->{node} = [];
236 return $_;
237 }
238 }
239 return $self->append_new_node (type => '#attribute', local_name => $name,
240 value => $val, namespace_uri => $o{namespace_uri});
241 }
242
243 =item \@children = $x->child_nodes
244
245 Returns an array reference to child nodes.
246
247 =item $local_name = $x->local_name ([$new_name])
248
249 Returns or set the local-name.
250
251 =item $uri = $x->namespace_uri ([$new_uri])
252
253 Returns or set namespace name (URI) of the element or the attribute
254
255 =item $uri = $x->namespace_prefix ([$new_prefix])
256
257 Returns or set namespace prefix of the element or the attribute.
258 You may give C<$new_prefix> in form either 'foo' or 'foo:'.
259 To indicate "default" prefix, use '' (length == 0 string).
260
261 =item $uri or ($uri, $name) = $x->expanded_name
262
263 Returns expanded name of the node (element or attribute).
264 In array context, array of namespace name (URI) and local part
265 is returned; otherwise, a URI which identify name of the node
266 (in RDF or WebDAV) is returned.
267
268 =item $type = $x->node_type
269
270 Returns the node type.
271
272 =item $node = $x->parent_node
273
274 Returns the parent node. If there is no parent node, undef is returned.
275
276 =cut
277
278 sub child_nodes ($) { $_[0]->{node} }
279 sub local_name ($;$) {
280 my ($self, $newname) = @_;
281 $self->{local_name} = $newname if $newname;
282 if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
283 $self->{local_name}->{local_name};
284 } else {
285 $self->{local_name}
286 }
287 }
288 sub node_type ($) { $_[0]->{type} }
289 sub parent_node ($) { $_[0]->{parent} }
290
291 sub namespace_uri ($;$) {
292 my ($self, $new_uri) = @_;
293 $self->{namespace_uri} = $new_uri if defined $new_uri;
294 $self->{namespace_uri};
295 }
296 sub namespace_prefix ($;$) {
297 my ($self, $new_pfx) = @_;
298 if (defined $new_pfx && $self->{namespace_uri}) {
299 $new_pfx .= ':' if $new_pfx;
300 $self->{namespace_prefix} = $new_pfx;
301 $self->{ns}->{$new_pfx} = $self->{namespace_uri};
302 }
303 $self->_get_namespace_prefix ($self->{namespace_uri});
304 }
305
306 sub expanded_name ($) {
307 my $self = shift;
308 wantarray ? ($self->{namespace_uri}, $self->{local_name})
309 : $self->{namespace_uri} . $self->{local_name};
310 }
311
312 =item $i = $x->count
313
314 Returns the number of child nodes.
315
316 =cut
317
318 # TODO: support counting by type
319 sub count ($;@) {
320 (defined $_[0]->{value} ? 1 : 0) + scalar @{$_[0]->{node}};
321 }
322
323 # $prefix = $x->_get_namespace_prefix ($namespace_uri)
324 sub _get_namespace_prefix ($$;%) {
325 my ($self, $uri, %o) = @_;
326 if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
327 return $p if $self->_prefix_to_uri ($p) eq $uri;
328 } if ($Namespace_URI_to_prefix{$uri}) {
329 for (@{$Namespace_URI_to_prefix{$uri}}) {
330 my $pfx = $_; $pfx .= ':' if $pfx;
331 if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
332 return $self->_uri_to_prefix ($uri => $pfx, %o);
333 }
334 }
335 } else {
336 my ($u_r_i, $pfx) = ($uri);
337 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
338 my @u_r_i = split / /, $u_r_i;
339 for (reverse @u_r_i) {
340 if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
341 my $p_f_x = $1 . ':';
342 next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
343 unless ($self->_prefix_to_uri ($p_f_x)) {
344 $pfx = $p_f_x;
345 last;
346 }
347 }
348 }
349 if ($pfx) {
350 return $self->_uri_to_prefix ($uri => $pfx, %o);
351 } else {
352 while (1) {
353 my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
354 unless ($self->_prefix_to_uri ($pfx)) {
355 return $self->_uri_to_prefix ($uri => $pfx, %o);
356 }
357 }
358 }
359 }
360 }
361
362 sub _set_prefix_to_uri ($$$;%) {
363 my ($self, $prefix => $uri, %o) = @_;
364 return undef unless $self->_check_namespace_prefix ($prefix);
365 $self->{ns}->{$prefix} = $uri;
366 $self->_prefix_to_uri ($prefix);
367 }
368
369 ## TODO: removing ns declare (1.1) support
370 # $uri or undef = $x->_prefix_to_uri ($prefix)
371 sub _prefix_to_uri ($$;$%) {
372 my ($self, $prefix, %o) = @_;
373 return undef unless $self->_check_namespace_prefix ($prefix);
374 if (uc (substr $prefix, 0, 3) eq 'XML') {
375 return $NS{xml} if $prefix eq 'xml:';
376 return $NS{xmlns} if $prefix eq 'xmlns:';
377 }
378 if (defined $self->{ns}->{$prefix}) {
379 $self->{ns}->{$prefix};
380 } elsif (ref $self->{parent}) {
381 shift; # $self
382 $self->{parent}->_prefix_to_uri (@_);
383 } else {
384 undef;
385 }
386 }
387
388 # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
389 # use_no_prefix (default: 1): Allow default namespace (no prefix).
390 sub _uri_to_prefix ($$;$%) {
391 my ($self, $uri, $new_prefix, %o) = @_;
392 if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
393 $self->{ns}->{$new_prefix} = $uri;
394 $new_prefix;
395 } else {
396 return 'xml:' if $uri eq $NS{xml};
397 return 'xmlns:' if $uri eq $NS{xmlns};
398 for (keys %{$self->{ns}||{}}) {
399 next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
400 return $_ if $self->{ns}->{$_} eq $uri;
401 }
402 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
403 shift; # $self
404 $self->{parent}->_uri_to_prefix (@_);
405 } else {
406 undef;
407 }
408 }
409 }
410
411 =item $x->define_new_namespace ($prefix => $uri)
412
413 Defines a new XML Namespace. This method is useful for root or section-level
414 element node.
415
416 Returned value is unspecified in this version of this module.
417
418 =cut
419
420 ## TODO: structured URI (such as http://&server;/) support
421 sub define_new_namespace ($$$) {
422 my ($self, $prefix, $uri) = @_;
423 if ($prefix eq '' || $self->_check_ncname ($prefix)) {
424 $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
425 $self->_set_prefix_to_uri ($prefix => $uri);
426 } else {
427 undef;
428 }
429 }
430
431 =item $uri = $x->defined_namespace_prefix ($prefix)
432
433 Query whether the namespace prefix is defined or not.
434 If defined, return namespace name (URI).
435
436 =cut
437
438 sub defined_namespace_prefix ($$) {
439 my ($self, $prefix) = @_;
440 $prefix .= ':' if $prefix;
441 $self->_prefix_to_uri ($prefix);
442 }
443
444 =item $qname = $x->qname
445
446 Returns QName ((namespace-)qualified name) of the element type.
447 Undef is retuened when the type does not have its QName
448 (ie. when type is neither C<#element> or C<#attribute>).
449
450 =cut
451
452 sub qname ($) {
453 my $self = shift;
454 if ($self->_check_ncname ($self->{local_name})) {
455 if ($self->{type} eq '#element') {
456 $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
457 unless $self->{_qname};
458 return $self->{_qname};
459 } elsif ($self->{type} eq '#attribute') {
460 return $self->attribute_name;
461 }
462 }
463 undef;
464 }
465
466 sub merge_external_subset ($) {
467 my $self = shift;
468 unless ($self->{type} eq '#declaration' && $self->{namespace_uri} eq $NS{SGML}.'doctype') {
469 return unless $self->{type} eq '#document' || $self->{type} eq '#fragment';
470 for (@{$self->{node}}) {
471 $_->merge_external_subset;
472 }
473 return;
474 }
475 my $xsub = $self->get_attribute ('external-subset');
476 return unless ref $xsub;
477 for (@{$xsub->{node}}) {
478 $_->{parent} = $self;
479 }
480 push @{$self->{node}}, @{$xsub->{node}};
481 $self->remove_child_node ($xsub);
482 $self->remove_child_node ($self->get_attribute ('PUBLIC'));
483 $self->remove_child_node ($self->get_attribute ('SYSTEM'));
484 $self->remove_marked_section;
485 }
486
487 sub remove_marked_section ($) {
488 my $self = shift;
489 my @node;
490 for (@{$self->{node}}) {
491 $_->remove_marked_section;
492 }
493 for (@{$self->{node}}) {
494 if ($_->{type} ne '#section') {
495 push @node, $_;
496 } else {
497 my $status = $_->get_attribute ('status', make_new_node => 1)->inner_text;
498 if ($status eq 'CDATA') {
499 $_->{type} = '#text';
500 $_->remove_child_node ($_->get_attribute ('status'));
501 push @node, $_;
502 } elsif ($status ne 'IGNORE') { # INCLUDE
503 for my $e (@{$_->{node}}) {
504 if ($e->{type} ne '#attribute') {
505 $e->{parent} = $self;
506 push @node, $e;
507 }
508 }
509 }
510 }
511 }
512 $self->{node} = \@node;
513 }
514
515 sub remove_references ($) {
516 my $self = shift;
517 my @node;
518 for (@{$self->{node}}) {
519 $_->remove_references;
520 }
521 for (@{$self->{node}}) {
522 if ($_->{type} ne '#reference'
523 || ($self->{type} eq '#declaration' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
524 push @node, $_;
525 } else {
526 if ($_->{namespace_uri} =~ /char/) {
527 my $e = ref ($_)->new (type => '#text', value => chr $_->{value});
528 $e->{parent} = $self;
529 push @node, $e;
530 } elsif ($_->{flag}->{smxp__ref_expanded}) {
531 for my $e (@{$_->{node}}) {
532 if ($e->{type} ne '#attribute') {
533 $e->{parent} = $self;
534 push @node, $e;
535 }
536 }
537 } else { ## reference is not expanded
538 push @node, $_;
539 }
540 }
541 $_->{flag}->{smxp__defined_with_param_ref} = 0
542 if $_->{flag}->{smxp__defined_with_param_ref}
543 && !$_->{flag}->{smxp__non_processed_declaration};
544 }
545 $self->{node} = \@node;
546 }
547
548 sub resolve_relative_uri ($;$%) {
549 require URI;
550 my ($self, $rel, %o) = @_;
551 my $base = $self->get_attribute ('base', namespace_uri => $NS{xml});
552 $base = ref ($base) ? $base->inner_text : undef;
553 if ($base !~ /^(?:[0-9A-Za-z.+-]|%[0-9A-Fa-f]{2})+:/) { # $base is relative
554 $base = $self->_resolve_relative_uri_by_parent ($base, \%o);
555 }
556 eval q{ ## Catch error such as $base is 'data:,foo' (non hierarchic scheme,...)
557 return URI->new ($rel)->abs ($base || '.'); ## BUG (or spec) of URI: $base == false
558 } or return $rel;
559 }
560 sub _resolve_relative_uri_by_parent ($$$) {
561 my ($self, $rel, $o) = @_;
562 if (ref $self->{parent}) {
563 if (!$o->{use_references_base_uri} && $self->{parent}->{type} eq '#reference') {
564 ## This case is necessary to work with
565 ## <element> <!-- element can have base URI -->
566 ## text <!-- text cannot have base URI -->
567 ## &ent; <!-- ref's base URI is referred entity's one (in this module) -->
568 ## <!-- expantion of ent -->
569 ## entity's text <!-- text cannot have base URI, so use <element>'s one -->
570 ## <entitys-element/> <!-- element can have base URI, otherwise ENTITY's one -->
571 ## </element>
572 return $self->{parent}->_resolve_relative_uri_by_parent ($rel, $o);
573 } else {
574 return $self->{parent}->resolve_relative_uri ($rel, %$o);
575 }
576 } else {
577 return $rel;
578 }
579 }
580 sub base_uri ($;$) {
581 my ($self, $new_uri) = @_;
582 my $base;
583 if (defined $new_uri) {
584 $base = $self->set_attribute (base => $new_uri, namespace_uri => $NS{xml});
585 }
586 $base ||= $self->get_attribute ('base', namespace_uri => $NS{xml});
587 ref ($base) ? $base->inner_text : undef;
588 }
589
590 =item $tag = $x->start_tag
591
592 Returns the start tag (or something that marks the start of something, such as '<!--'
593 for C<#comment> nodes).
594
595 =cut
596
597 sub start_tag ($) {
598 my $self = shift;
599 if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
600 my $r = '<';
601 $r .= $self->qname;
602 for (@{$self->{node}}) {
603 $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
604 }
605 for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
606 if ($prefix) {
607 $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
608 } else {
609 $r .= ' xmlns';
610 }
611 $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
612 }
613 $r .= '>';
614 $r;
615 } elsif ($self->{type} eq '#comment') {
616 '<!--';
617 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
618 '<?' . ($self->{local_name});
619 } elsif ($self->{type} eq '#reference') {
620 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
621 '&#x';
622 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
623 '&#';
624 } elsif ($self->_check_ncname ($self->{local_name})) {
625 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
626 '%';
627 } else {
628 '&';
629 }
630 } else { # error
631 '';
632 }
633 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
634 '<!' . {
635 $NS{SGML}.'attlist' => 'ATTLIST',
636 $NS{SGML}.'doctype' => 'DOCTYPE',
637 $NS{SGML}.'element' => 'ELEMENT',
638 $NS{SGML}.'entity' => 'ENTITY',
639 $NS{SGML}.'entity:parameter' => 'ENTITY',
640 $NS{SGML}.'notation' => 'NOTATION',
641 }->{$self->{namespace_uri}} . ' ' .
642 ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter' ?
643 ($self->{flag}->{smxp__defined_with_param_ref}?'':'% '):'');
644 } elsif ($self->{type} eq '#section') {
645 '<![';
646 } else {
647 '';
648 }
649 }
650
651 =item $tag = $x->end_tag
652
653 Returns the end tag (or something that marks the end of something, such as '-->'
654 for C<#comment> nodes).
655
656 =cut
657
658 sub end_tag ($) {
659 my $self = shift;
660 if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
661 '</' . $self->qname . '>';
662 } elsif ($self->{type} eq '#comment') {
663 '-->';
664 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
665 '?>';
666 } elsif ($self->{type} eq '#reference') {
667 ';';
668 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
669 '>';
670 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
671 '>';
672 } elsif ($self->{type} eq '#section') {
673 ']]>';
674 } else {
675 '';
676 }
677 }
678
679 =item $tag = $x->attribute_name
680
681 Returns the attribute name.
682
683 =cut
684
685 sub attribute_name ($) {
686 my $self = shift;
687 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
688 ($self->{namespace_uri} ?
689 (ref $self->{parent} ? $self->{parent} : $self)
690 ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
691 .$self->{local_name};
692 } else {
693 '';
694 }
695 }
696
697 =item $tag = $x->attribute_value
698
699 Returns the attribute value.
700
701 =cut
702
703 sub attribute_value ($;%) {
704 my ($self, %o) = @_;
705 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
706 my $r = '"';
707 my $isc = $self->_is_same_class ($self->{value});
708 $r .= $self->_entitize ($self->{value}, keep_wsp => 1) unless $isc;
709 for (($isc?$self->{value}:()), @{$self->{node}}) {
710 my $nt = $_->{type};
711 if ($nt eq '#reference' || $nt eq '#xml') {
712 $r .= $_->outer_xml;
713 } elsif ($nt ne '#attribute') {
714 $r .= $self->_entitize ($_->inner_text, keep_wsp => 1);
715 }
716 }
717 return $r . '"';
718 } else {
719 '';
720 }
721 }
722
723 sub entity_value ($;%) {
724 my ($self, %o) = @_;
725 my $_entitize = sub {
726 my $s = shift;
727 $s =~ s/&/&#x26;/g;
728 $s =~ s/&#x26;(\p{InXML_NameStartChar}\p{InXMLNameChar}*);/&$1;/g;
729 $s =~ s/([\x0D%"])/sprintf '&#x%02X;', ord $1/ge;
730 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F\x7F])/sprintf '&amp;#x%02X;', ord $1/ge;
731 $s;
732 };
733 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
734 my $r = '"' . &$_entitize ($self->{value});
735 for (@{$self->{node}}) {
736 my $nt = $_->{type};
737 if ($nt eq '#reference' || $nt eq '#xml') {
738 $r .= $_->outer_xml;
739 } elsif ($nt ne '#attribute') {
740 $r .= &$_entitize ($_->inner_text);
741 }
742 }
743 return $r . '"';
744 } else {
745 '';
746 }
747 }
748
749 ## This method should be called only from SuikaWiki::Markup::XML::* family modules,
750 ## since this is NOT a FORMAL interface.
751 sub _entity_parameter_literal_value ($;%) {
752 my $self = shift;
753 my $r = '';
754 my $isc = $self->_is_same_class ($self->{value});
755 $r = $self->{value} unless $isc;
756 for (($isc?$self->{value}:()), @{$self->{node}}) {
757 my $nt = $_->{type};
758 ## Bare node and general entity reference node
759 if ($nt eq '#xml' || ($nt eq '#reference' && $_->{namespace_uri} eq $NS{SGML}.'entity')) {
760 $r .= $_->outer_xml;
761 ## Text node and parameter entity reference node
762 } elsif ($nt ne '#attribute') {
763 $r .= $_->inner_text;
764 }
765 }
766 $r;
767 }
768
769 =item $tag = $x->attribute
770
771 Returns the attribute (name and value pair).
772
773 =cut
774
775 sub attribute ($) {
776 my $self = shift;
777 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
778 $self->attribute_name . '=' . $self->attribute_value;
779 } else {
780 '';
781 }
782 }
783
784 sub external_id ($;%) {
785 my $self = shift;
786 my %o = @_;
787 my ($pubid, $sysid, $ndata);
788 for (@{$self->{node}}) {
789 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
790 if ($_->{local_name} eq 'PUBLIC') {
791 $pubid = $_->inner_text;
792 } elsif ($_->{local_name} eq 'SYSTEM') {
793 $sysid = $_->inner_text;
794 } elsif ($_->{local_name} eq 'NDATA') {
795 $ndata = $_->inner_text;
796 undef $ndata unless $self->_check_ncname ($ndata);
797 }
798 }
799 }
800 my $r = '';
801 if (defined $pubid) {
802 $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
803 $pubid = '"' . $pubid . '"';
804 }
805 if (defined $sysid) {
806 if (index ($sysid, '"') > -1) {
807 if (index ($sysid, "'") > -1) {
808 $sysid =~ s/"/%22/; $sysid = '"' . $sysid . '"';
809 } else {
810 $sysid = "'" . $sysid . "'";
811 }
812 } else {
813 $sysid = '"' . $sysid . '"';
814 }
815 }
816 if ($pubid && $sysid) {
817 $r = 'PUBLIC ' . $pubid . ' ' . $sysid;
818 } elsif ($sysid) {
819 $r = 'SYSTEM ' . $sysid;
820 } elsif ($pubid && $o{allow_pubid_only}) {
821 $r = 'PUBLIC ' . $pubid;
822 }
823 if ($r && $ndata && $o{use_ndata}) {
824 $r .= ' NDATA ' . $ndata;
825 }
826 $r;
827 }
828
829 =item $tag = $x->inner_xml
830
831 Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
832 element content without start- and end-tags is returned.)
833
834 Note that for not all node types the behavior of this method is defined.
835 For example, returned value of C<#attribute> might be unexpected one
836 in this version of this module.
837
838 =cut
839
840 sub inner_xml ($;%) {
841 my ($self, %o) = @_;
842 my $r = '';
843 if ($self->{type} eq '#comment') {
844 $r = $self->inner_text;
845 $r =~ s/--/-&#x45;/g;
846 } elsif ($self->{type} eq '#pi') {
847 my $isc = $self->_is_same_class ($self->{value});
848 if (!$isc && length ($self->{value})) {
849 $r = ' ' . $self->{value};
850 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
851 }
852 for (($isc?$self->{value}:()), @{$self->{node}}) {
853 if ($_->node_type eq '#attribute') {
854 $r .= ' ' . $_->attribute;
855 } else {
856 my $s = $_->inner_text;
857 if (length $s) {
858 $s =~ s/\?>/? >/g;
859 $r .= ' ' . $s;
860 }
861 }
862 }
863 } elsif ($self->{type} eq '#reference') {
864 if ($self->{namespace_uri} eq $NS{SGML}.'char:ref:hex') {
865 $r = sprintf '%02X', $self->{value};
866 } elsif ($self->{namespace_uri} eq $NS{SGML}.'char:ref') {
867 $r = sprintf '%02d', $self->{value};
868 } elsif (ref ($self->{local_name}) && $self->{local_name}->{type} eq '#declaration') {
869 $r = $self->{local_name}->{local_name};
870 } elsif ($self->_check_ncname ($self->{local_name})) {
871 $r = ($self->{local_name});
872 } else { # error
873 $r = '';
874 }
875 } elsif ($self->{type} eq '#declaration') {
876 if ($self->{namespace_uri} eq $NS{SGML}.'doctype') {
877 my $root = $self->get_attribute ('qname');
878 ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
879 for (@{$self->{parent}->{node}}) {
880 if ($_->{type} eq '#element') {
881 $root = $_->qname;
882 last if $root;
883 }
884 }
885 $root = '#IMPLIED';
886 }) : ($root = '#IMPLIED')); ## error!
887 my ($isub, $xid) = ('', $self->external_id);
888 for (@{$self->{node}}) {
889 $isub .= $_->outer_xml if $_->{type} ne '#attribute';
890 }
891 if ($xid) {
892 $r = $xid;
893 if ($isub) {
894 $r .= " [" . $isub . "]";
895 }
896 } else {
897 if ($isub) {
898 $r = "[" . $isub . "]";
899 } else {
900 $r = "[]";
901 }
902 }
903 $r = $root . ' ' . $r;
904 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity'
905 || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter'
906 || $self->{namespace_uri} eq $NS{SGML}.'notation') {
907 my %xid_opt;
908 $r = $self->{local_name} . ' ' if !$self->{flag}->{smxp__defined_with_param_ref}
909 && $self->_check_ncname ($self->{local_name});
910 if ($self->{namespace_uri} eq $NS{SGML}.'entity:parameter') {
911 #$r = '% ' . $r;
912 } elsif ($self->{namespace_uri} eq $NS{SGML}.'entity') {
913 $xid_opt{use_ndata} = 1;
914 } elsif ($self->{namespace_uri} eq $NS{SGML}.'notation') {
915 $xid_opt{allow_pubid_only} = 1;
916 }
917
918 my ($v, $xid) = ($self->{value});
919 $xid = $self->external_id (%xid_opt) unless $self->{flag}->{smxp__defined_with_param_ref};
920 if ($xid) { ## External ID
921 $r .= $xid;
922 } else { ## EntityValue
923 my $entity_value = $self->get_attribute ('value');
924 undef $entity_value if $self->{flag}->{smxp__defined_with_param_ref};
925 if ($entity_value) { # <!ENTITY foo "bar">
926 $r .= $entity_value->entity_value;
927 } else { ## Parameter entity reference
928 my $isc = $self->_is_same_class ($self->{value});
929 $r .= $self->{value} unless $isc;
930 for (($isc?$self->{value}:()), @{$self->{node}}) {
931 $r .= $_->outer_xml unless $_->{type} eq '#attribute';
932 }
933 }
934 }
935 } elsif ($self->{namespace_uri} eq $NS{SGML}.'element') {
936 if (!$self->{flag}->{smxp__defined_with_param_ref}) {
937 $r = $self->get_attribute ('qname');
938 $r = $r->inner_text if $r;
939 unless ($self->_check_ncname ($r)) {
940 $r = undef;
941 } else {
942 $r .= ' ';
943 }
944
945 my $cmodel = $self->get_attribute ('content', make_new_node => 1)->inner_text;
946 if ($cmodel && $cmodel ne 'mixed') {
947 $r .= $cmodel;
948 } else {
949 my $make_cmodel;
950 $make_cmodel = sub {
951 my $c = shift;
952 my @tt;
953 for (@{$c->child_nodes}) {
954 if ($_->node_type eq '#element' && $_->namespace_uri eq $NS{SGML}.'element') {
955 if ($_->local_name eq 'group') {
956 my $tt = &$make_cmodel ($_);
957 push @tt, '(' . $tt . ')'
958 . ($_->get_attribute ('occurence', make_new_node => 1)->inner_text)
959 if $tt;
960 } elsif ($_->local_name eq 'element') {
961 push @tt, $_->get_attribute ('qname', make_new_node => 1)->inner_text
962 . ($_->get_attribute ('occurence', make_new_node => 1)->inner_text);
963 }
964 }
965 }
966 return join scalar ($c->get_attribute ('connector', make_new_node => 1)->inner_text
967 || '|'),
968 grep {$_} @tt;
969 };
970 my $tt;
971 my $grp_node;
972 for (@{$self->child_nodes}) {
973 if ($_->node_type eq '#element' && $_->namespace_uri eq $NS{SGML}.'element'
974 && $_->local_name eq 'group') {
975 $grp_node = $_;
976 $tt = &$make_cmodel ($grp_node);
977 last;
978 }
979 }
980 if ($cmodel eq 'mixed') { ## mixed content
981 if ($tt) {
982 $r .= '(#PCDATA|' . $tt . ')*';
983 } else {
984 $r .= '(#PCDATA)'
985 . ($grp_node->get_attribute ('occurence', make_new_node => 1)->inner_text eq '*'
986 ? '*' : '');
987 }
988 } else { ## element content
989 if ($tt) {
990 $r .= '(' . $tt . ')'
991 . ($grp_node->get_attribute ('occurence', make_new_node => 1)->inner_text);
992 } else {
993 $r .= 'EMPTY';
994 }
995 } # mixed or element content
996 } # content model group
997 } else { ## Save source doc's description as far as possible
998 my $isc = $self->_is_same_class ($self->{value});
999 $r .= $self->{value} unless $isc;
1000 for (($isc?$self->{value}:()), @{$self->{node}}) {
1001 unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') {
1002 $r .= $_->outer_xml;
1003 } elsif ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{SGML}.'group') {
1004 $r .= $_->outer_xml;
1005 }
1006 }
1007 }
1008 } elsif ($self->{namespace_uri} eq $NS{SGML}.'attlist') {
1009 if (!$self->{flag}->{smxp__defined_with_param_ref}) {
1010 $r = $self->get_attribute ('qname');
1011 $r = $r->inner_text if $r;
1012 unless ($self->_check_ncname ($r)) {
1013 $r = undef;
1014 } else {
1015 $r .= ' ';
1016 }
1017 for (@{$self->{node}}) {
1018 if ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{XML}.'attlist'
1019 && $_->{local_name} eq 'AttDef') {
1020 $r .= "\n\t" . $_->get_attribute ('qname', make_new_node => 1)->inner_text;
1021 my $attr_type = $_->get_attribute ('type', make_new_node => 1)->inner_text;
1022 if ($attr_type ne 'enum') {
1023 $r .= "\t" . $attr_type;
1024 }
1025 if ($attr_type eq 'enum' || $attr_type eq 'NOTATION') {
1026 my @l;
1027 for my $item (@{$_->{node}}) {
1028 if ($item->{type} eq '#element' && $item->{namespace_uri} eq $NS{XML}.'attlist'
1029 && $item->{local_name} eq 'enum') {
1030 push @l, $item->inner_text;
1031 }
1032 }
1033 $r .= "\t(" . join ('|', @l) . ')';
1034 }
1035 ## DefaultDecl
1036 my $deftype = $_->get_attribute ('default_type', make_new_node => 1)->inner_text;
1037 if ($deftype) {
1038 $r .= "\t#" . $deftype;
1039 }
1040 if (!$deftype || $deftype eq 'FIXED') {
1041 $r .= "\t" . $_->get_attribute ('default_value', make_new_node => 1)
1042 ->attribute_value;
1043 }
1044 } # AttDef
1045 }
1046 } else { ## Save source doc's description as far as possible
1047 my $isc = $self->_is_same_class ($self->{value});
1048 $r .= $self->{value} unless $isc;
1049 for (($isc?$self->{value}:()), @{$self->{node}}) {
1050 unless ($_->{type} eq '#attribute' || $_->{type} eq '#element') {
1051 $r .= $_->outer_xml;
1052 } elsif ($_->{type} eq '#element' && $_->{namespace_uri} eq $NS{SGML}.'group') {
1053 $r .= $_->outer_xml;
1054 }
1055 }
1056 }
1057 } else { # unknown
1058 for (@{$self->{node}}) {
1059 $r .= $_->outer_xml;
1060 }
1061 }
1062 } elsif ($self->{type} eq '#section') {
1063 my $status = $self->get_attribute ('status', make_new_node => 1)->inner_text;
1064 if ($status eq 'CDATA') {
1065 $r = $self->inner_text;
1066 $r =~ s/]]>/]]>]]<![CDATA[>/g;
1067 $r = 'CDATA['.$r;
1068 } else {
1069 my $sl = $self->get_attribute ('status_list', make_new_node => 1);
1070 if ($sl->{flag}->{smxp__defined_with_param_ref}) {
1071 my $isc = $self->_is_same_class ($self->{value});
1072 $status = '';
1073 $status = $sl->{value} unless $isc;
1074 for (($isc?$sl->{value}:()), @{$sl->{node}}) {
1075 $status .= $_->outer_xml unless $_->{type} eq '#attribute';
1076 }
1077 $r = $status.'['.$r;
1078 } elsif ($status) {
1079 $r = $status.'['.$r;
1080 } else {
1081 ## Must be an ignored section
1082 }
1083 my $isc = $self->_is_same_class ($self->{value});
1084 $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $self->{value} unless $isc;
1085 for (($isc?$self->{value}:()), @{$self->{node}}) {
1086 if ($_->{type} eq '#text') {
1087 $r .= join '', map {s/\]\]>/]]&gt;/g; $_} $_->inner_text; ## Anyway, this is error
1088 } elsif ($_->{type} ne '#attribute') {
1089 $r .= $_->outer_xml;
1090 }
1091 }
1092 }
1093 } else {
1094 my $isc = $self->_is_same_class ($self->{value});
1095 unless ($isc) {
1096 if ($self->{type} ne '#xml') {
1097 $r = $self->_entitize ($self->{value});
1098 } else {
1099 $r = $self->{value};
1100 }
1101 }
1102 for (($isc?$self->{value}:()), @{$self->{node}}) {
1103 my $nt = $_->{type};
1104 if (($self->{option}->{indent})
1105 && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
1106 $r .= "\n";
1107 }
1108 $r .= $_->outer_xml unless $_->node_type eq '#attribute';
1109 }
1110 }
1111 $r;
1112 }
1113
1114
1115 =item $tag = $x->outer_xml
1116
1117 Returns the node in XML syntax.
1118
1119 =cut
1120
1121 sub outer_xml ($) {
1122 my $self = shift;
1123 if ($self->{type} eq '#attribute') {
1124 return $self->attribute;
1125 } else {
1126 if ($self->{option}->{indent} && $self->{type} eq '#element') {
1127 my $r = $self->start_tag;
1128 my $c = $self->inner_xml;
1129 if (!length ($c) && $self->{option}->{use_EmptyElemTag}) {
1130 substr ($r, -1) = ' />';
1131 } else {
1132 if ($c) {
1133 $c =~ s/\n/\n /g;
1134 $r .= "\n " . $c . "\n";
1135 }
1136 $r .= $self->end_tag;
1137 }
1138 return $r;
1139 } else {
1140 my $r = $self->start_tag;
1141 my $c = $self->inner_xml;
1142 if ($self->{type} eq '#element' && !length ($c) && $self->{option}->{use_EmptyElemTag}) {
1143 substr ($r, -1) = ' />';
1144 } else {
1145 $r .= $c . $self->end_tag;
1146 }
1147 return $r;
1148 #return '{'.$self->{type}.': '.$r.'}'; ## DEBUG: show structure
1149 }
1150 }
1151 }
1152
1153 =item $tag = $x->inner_text
1154
1155 Returns the text content of the node. (In many case the returned value is same
1156 as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
1157 But some classes that inherits this module might implement to return other
1158 value (eg. to return the value of the alt attribute of html:img element).
1159
1160 Available options: C<output_ref_as_is>.
1161
1162 =cut
1163
1164 sub inner_text ($;%) {
1165 my $self = shift;
1166 my %o = @_;
1167 my $r = '';
1168 if ($self->{type} eq '#reference'
1169 && ($self->{namespace_uri} eq $NS{SGML}.'char:ref'
1170 || $self->{namespace_uri} eq $NS{SGML}.'char:ref:hex')) {
1171 $r = chr $self->{value};
1172 } elsif ($self->{type} eq '#declaration'
1173 && ($self->{namespace_uri} eq $NS{SGML}.'entity'
1174 || $self->{namespace_uri} eq $NS{SGML}.'entity:parameter')) {
1175 ## TODO:
1176 $r = $self->set_attribute ('value')->inner_text;
1177 } else { # not #reference nor #declaration(ENTITY)
1178 my $isc = $self->_is_same_class ($self->{value});
1179 $r = $self->{value} if !$isc && defined $self->{value};
1180 if ($o{output_ref_as_is}) { ## output as if RCDATA
1181 $r =~ s/&/&amp;/g;
1182 for my $node (($isc?$self->{value}:()), @{$self->{node}}) {
1183 my $nt = $node->node_type;
1184 if ($nt eq '#reference' || $nt eq '#xml') {
1185 $r .= $node->outer_xml;
1186 } elsif ($nt ne '#attribute') {
1187 $r .= map {s/&/&amp;/g; $_} $node->inner_text;
1188 }
1189 }
1190 } else {
1191 for (($isc?$self->{value}:()), @{$self->{node}}) {
1192 $r .= $_->inner_text unless $_->{type} eq '#attribute';
1193 }
1194 }
1195 }
1196 $r;
1197 }
1198
1199 sub _is_same_class ($$) {
1200 my ($self, $something) = @_;
1201 return 0 if {qw/ARRAY 1 HASH 1 CODE 1 :nonref: 1/}->{ref ($something) || ':nonref:'};
1202 eval q{$self->_CLASS_NAME eq $something->_CLASS_NAME} ? 1 : 0;
1203 }
1204
1205 sub root_node ($) {
1206 my $self = shift;
1207 if ($self->{type} eq '#document') {
1208 return $self;
1209 } elsif (ref $self->{parent}) {
1210 return $self->{parent}->root_node;
1211 } else {
1212 return $self;
1213 }
1214 }
1215
1216 sub _get_entity_manager ($) {
1217 my $self = shift;
1218 if ($self->{type} eq '#document') {
1219 unless ($self->{flag}->{smx__entity_manager}) {
1220 require SuikaWiki::Markup::XML::EntityManager;
1221 $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1222 }
1223 return $self->{flag}->{smx__entity_manager};
1224 } elsif (ref $self->{parent}) {
1225 return $self->{parent}->_get_entity_manager;
1226 } else {
1227 unless ($self->{flag}->{smx__entity_manager}) {
1228 require SuikaWiki::Markup::XML::EntityManager;
1229 $self->{flag}->{smx__entity_manager} = SuikaWiki::Markup::XML::EntityManager->new ($self);
1230 }
1231 return $self->{flag}->{smx__entity_manager};
1232 }
1233 }
1234
1235 sub _CLASS_NAME ($) { 'SuikaWiki::Markup::XML' }
1236
1237 # $s = $x->_entitize ($s)
1238 sub _entitize ($$;%) {
1239 my ($self, $s, %o) = (shift, shift, @_);
1240 $s =~ s/&/&amp;/g;
1241 $s =~ s/</&lt;/g;
1242 $s =~ s/>/&gt;/g;
1243 $s =~ s/"/&quot;/g;
1244 $s =~ s/([\x00-\x08\x0B\x0C\x0E-\x1F])/sprintf '&amp;#%d;', ord $1/ge;
1245 $s =~ s/([\x09\x0A\x0D])/sprintf '&#%d;', ord $1/ge if $o{keep_wsp};
1246 $s;
1247 }
1248
1249 # 1/0 = $x->_check_name ($s)
1250 sub _check_name ($$) {
1251 my $self = shift;
1252 my $s = shift;
1253 return $Cache{name}->{$s} if defined $Cache{name}->{$s};
1254 if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
1255 # \p{...}('*'/'+'/'{n,}') does not work...
1256 $Cache{name}->{$s} = 1;
1257 1;
1258 } else {
1259 $Cache{name}->{$s} = 0;
1260 0;
1261 }
1262 }
1263 # 1/0 = $x->_check_ncname ($s)
1264 sub _check_ncname ($$) {
1265 my $self = shift;
1266 my $s = shift;
1267 return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
1268 if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
1269 # \p{...}('*'/'+'/'{n,}') does not work...
1270 $Cache{ncname}->{$s} = 1;
1271 1;
1272 } else {
1273 $Cache{ncname}->{$s} = 0;
1274 0;
1275 }
1276 }
1277
1278 # 1/0 = $x->_check_namespace_prefix ($s)
1279 sub _check_namespace_prefix ($$) {
1280 my $self = shift;
1281 my $s = shift;
1282 return 0 unless defined $s;
1283 return 1 if $s eq '';
1284 substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
1285 $self->_check_ncname ($s);
1286 }
1287
1288 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
1289 ## multiple parents.
1290 ## TODO: normalize namespace URI (removing non URI chars)
1291
1292 sub flag ($$;$) {
1293 my ($self, $name, $value) = @_;
1294 if (defined $value) {
1295 $self->{flag}->{$name} = $value;
1296 }
1297 $self->{flag}->{$name};
1298 }
1299
1300 sub option ($$;$) {
1301 my ($self, $name, $value) = @_;
1302 if (defined $value) {
1303 $self->{option}->{$name} = $value;
1304 }
1305 $self->{option}->{$name};
1306 }
1307
1308 =back
1309
1310 =head1 NODE TYPES
1311
1312 =over 4
1313
1314 =item #attribute
1315
1316 Attribute. Its XML representation takes the form of NAME="VALUE".
1317
1318 =item #comment
1319
1320 Comment declarement. <!-- -->
1321
1322 =item #declarement
1323
1324 SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1325 <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1326
1327 =item #element
1328
1329 Element. Its XML representation consists of start tag, content and end tag,
1330 like <TYPE>content</TYPE>.
1331
1332 =item #fragment
1333
1334 Fragment of nodes. It's similar to DOM's fragment node.
1335
1336 =item #pi
1337
1338 Prosessing instruction. <?NAME VALUE?>
1339
1340 =item #reference
1341
1342 Character reference or general or parameter entity reference.
1343 &#nnnn;, &#xhhhh;, &name;, %name;.
1344
1345 =item #section
1346
1347 Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1348 <![%type;[...]]>
1349
1350 =item #text
1351
1352 Text.
1353
1354 =item #xml
1355
1356 Preformatted XML text.
1357
1358 =cut
1359
1360 =head1 RESTRICTIONS
1361
1362 =over 4
1363
1364 =item XML without XML Namespace is not supported.
1365
1366 =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1367
1368 For example, next code generates invalid (non-well-formed) XML Namespace document.
1369
1370 my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1371 print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1372
1373 So you must write like:
1374
1375 my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1376 $x->define_new_namespace ('' => '');
1377 print $x; # <elementType xmlns=""></elementType>
1378
1379 =back
1380
1381 =head1 LICENSE
1382
1383 Copyright 2003 Wakaba <w@suika.fam.cx>
1384
1385 This program is free software; you can redistribute it and/or
1386 modify it under the same terms as Perl itself.
1387
1388 =cut
1389
1390 1; # $Date: 2004/01/16 08:04:59 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24