/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Sat May 24 04:52:19 2003 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.3: +280 -117 lines
Parser: New

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.4 our $VERSION = do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.2 use overload '""' => \&stringify,
22     fallback => 1;
23     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
24 wakaba 1.1 our %Namespace_URI_to_prefix = (
25     'DAV:' => [qw/dav webdav/],
26     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
27     'http://icl.com/saxon' => [qw/saxon/],
28     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
29     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
30     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
31     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
32     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
33 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
34 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
35     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
36     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
37     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
38     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
39     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
40     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
41     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
42     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
43 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
44 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
45 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
46 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
47     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
48     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
49     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
50     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
51     );
52     my %Cache;
53 wakaba 1.4 my $NS_SGML = 'urn:x-suika-fam-cx:markup:sgml:';
54 wakaba 1.1
55     =head1 METHODS
56    
57     =over 4
58    
59     =item $x = SuikaWiki::Markup::XML->new (%options)
60    
61     Returns new instance of the module. It is itself a node.
62    
63 wakaba 1.4 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri> and C<value>.
64 wakaba 1.1
65     =cut
66    
67     sub new ($;%) {
68     my $class = shift;
69     my $self = bless {@_}, $class;
70     $self->{type} ||= '#element';
71 wakaba 1.4 for (qw/local_name value/) {
72 wakaba 1.2 if (ref $self->{$_}) {
73     $self->{$_}->{parent} = $self;
74     }
75     }
76 wakaba 1.1 $self->{node} ||= [];
77     $self;
78     }
79    
80    
81     =item $x->append_node ($node)
82    
83     Appending given node to the object (as the last child).
84     If the type of given node is C<#fragment>, its all children, not the node
85     itself, are appended.
86    
87     This method returns the appended node unless the type of given node is C<#fragment>.
88     In such cases, this node (C<$x>) is returned.
89    
90 wakaba 1.2 Available options: C<node_or_text>.
91    
92 wakaba 1.1 =cut
93    
94 wakaba 1.2 sub append_node ($$;%) {
95 wakaba 1.1 my $self = shift;
96 wakaba 1.2 my ($new_node, %o) = @_;
97     unless (ref $new_node) {
98     if ($o{node_or_text}) {
99     return $self->append_text ($new_node);
100     } else {
101     die "append_node: Invalid node" unless ref $new_node;
102     }
103     }
104 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
105     for (@{$new_node->{node}}) {
106     push @{$self->{node}}, $_;
107     $_->{parent} = $self;
108     }
109     $self;
110     } else {
111     push @{$self->{node}}, $new_node;
112     $new_node->{parent} = $self;
113     $new_node;
114     }
115     }
116    
117     =item $new_node = $x->append_new_node (%options)
118    
119     Appending a new node. The new node is returned.
120    
121     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
122    
123     =cut
124    
125     sub append_new_node ($;%) {
126     my $self = shift;
127     my %o = @_;
128     my $new_node = __PACKAGE__->new (%o);
129     push @{$self->{node}}, $new_node;
130     $new_node->{parent} = $self;
131     $new_node;
132     }
133    
134     =item $new_node = $x->append_text ($text)
135    
136     Appending given text as a new text node. The new text node is returned.
137    
138     =cut
139    
140     sub append_text ($$;%) {
141     my $self = shift;
142     my $s = shift;
143     #if (@{$self->{node}}[-1]->{type} eq '#text') {
144     # $self->{node}}[-1]->append_new_node (type => '#text', value => $s);
145     #} else {
146     $self->append_new_node (type => '#text', value => $s);
147     #}
148     }
149    
150 wakaba 1.2 sub append_baretext ($$;%) {
151     my $self = shift;
152     my $s = shift;
153     $self->append_new_node (type => '#xml', value => $s);
154     }
155    
156 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
157    
158     Returns the attribute node whose local-name is C<$local_name>.
159    
160     Available options: C<namespace_uri>, C<make_new_node>.
161    
162     =cut
163    
164     sub get_attribute ($$;%) {
165     my $self = shift;
166     my ($name, %o) = @_;
167     for (@{$self->{node}}) {
168     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
169     return $_;
170     }
171     }
172     ## Node is not exist
173     if ($o{make_new_node}) {
174     return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri});
175     } else {
176     return undef;
177     }
178     }
179    
180     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
181    
182     Set the value of the attribute. The attribute node is returned.
183    
184     Available options: C<namespace_uri>.
185    
186     =cut
187    
188     sub set_attribute ($$$;%) {
189     my $self = shift;
190     my ($name, $val, %o) = @_;
191     for (@{$self->{node}}) {
192     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
193     $_->{value} = $val;
194     $_->{node} = [];
195     return $_;
196     }
197     }
198     return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri});
199     }
200    
201     =item \@children = $x->child_nodes
202    
203     Returns an array reference to child nodes.
204    
205 wakaba 1.4 =item $local_name = $x->local_name ([$new_name])
206 wakaba 1.1
207 wakaba 1.4 Returns or set the local-name.
208    
209     =item $uri = $x->namespace_uri ([$new_uri])
210    
211     Returns or set namespace name (URI) of the element or the attribute
212 wakaba 1.1
213     =item $type = $x->node_type
214    
215     Returns the node type.
216    
217     =item $node = $x->parent_node
218    
219     Returns the parent node. If there is no parent node, undef is returned.
220    
221     =cut
222    
223     sub child_nodes ($) { shift->{node} }
224 wakaba 1.4 sub local_name ($;$) {
225     my ($self, $newname) = @_;
226     if ($newname) {
227     $self->{local_name} = $newname;
228     }
229     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
230     $self->{local_name}->{local_name};
231     } else {
232     $self->{local_name}
233     }
234     }
235 wakaba 1.1 sub node_type ($) { shift->{type} }
236     sub parent_node ($) { shift->{parent} }
237    
238 wakaba 1.4 ## TODO: obsolete
239     sub target_name ($;$) {
240     my ($self, $new) = @_;
241     if (defined $new) {
242     $self->{target_name} = $new;
243     }
244     $self->{target_name};
245     }
246    
247     sub namespace_uri ($;$) {
248     my ($self, $new_uri) = @_;
249     if (defined $new_uri) {
250     $self->{namespace_uri} = $new_uri;
251     }
252     $self->{namespace_uri};
253     }
254    
255 wakaba 1.1 =item $i = $x->count
256    
257     Returns the number of child nodes.
258    
259     =cut
260    
261     # TODO: support counting by type
262     sub count ($;@) {
263 wakaba 1.2 my $self = shift;
264     (defined $self->{value} ? 1 : 0) + scalar @{$self->{node}};
265 wakaba 1.1 }
266    
267     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
268     sub _get_namespace_prefix ($$;%) {
269     my ($self, $uri) = (shift, shift);
270     my %o = @_;
271     if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
272     return $p if $self->_prefix_to_uri ($p) eq $uri;
273     } if ($Namespace_URI_to_prefix{$uri}) {
274     for (@{$Namespace_URI_to_prefix{$uri}}) {
275     my $pfx = $_; $pfx .= ':' if $pfx;
276     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
277     return $self->_uri_to_prefix ($uri => $pfx, %o);
278     }
279     }
280     } else {
281     my ($u_r_i, $pfx) = ($uri);
282 wakaba 1.4 $u_r_i =~ s/[^0-9A-Za-z._-]+/ /g;
283     my @u_r_i = split / /, $u_r_i;
284     for (reverse @u_r_i) {
285     if (s/([A-Za-z][0-9A-Za-z._-]+)//) {
286     my $p_f_x = $1 . ':';
287     next if lc (substr ($p_f_x, 0, 3)) eq 'xml';
288     unless ($self->_prefix_to_uri ($p_f_x)) {
289     $pfx = $p_f_x;
290     last;
291     }
292 wakaba 1.1 }
293     }
294     if ($pfx) {
295     return $self->_uri_to_prefix ($uri => $pfx, %o);
296     } else {
297     while (1) {
298     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
299     unless ($self->_prefix_to_uri ($pfx)) {
300     return $self->_uri_to_prefix ($uri => $pfx, %o);
301     }
302     }
303     }
304     }
305     }
306    
307 wakaba 1.4 sub _set_prefix_to_uri ($$$;%) {
308     my ($self, $prefix => $uri, %o) = @_;
309     return undef unless $self->_check_namespace_prefix ($prefix);
310     $self->{ns}->{$prefix} = $uri;
311     $self->_prefix_to_uri ($prefix);
312     }
313    
314     ## TODO: removing ns declare (1.1) support
315     # $uri or undef = $x->_prefix_to_uri ($prefix)
316 wakaba 1.1 sub _prefix_to_uri ($$;$%) {
317 wakaba 1.4 my ($self, $prefix, %o) = @_;
318 wakaba 1.1 return undef unless $self->_check_namespace_prefix ($prefix);
319     if (uc (substr $prefix, 0, 3) eq 'XML') {
320     return 'http://www.w3.org/XML/1998/namespace' if $prefix eq 'xml:';
321     return 'http://www.w3.org/2000/xmlns/' if $prefix eq 'xmlns:';
322 wakaba 1.2 }
323     if (defined $self->{ns}->{$prefix}) {
324 wakaba 1.1 $self->{ns}->{$prefix};
325     } elsif (ref $self->{parent}) {
326     shift; # $self
327     $self->{parent}->_prefix_to_uri (@_);
328     } else {
329     undef;
330     }
331     }
332    
333     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
334     # use_no_prefix (default: 1): Allow default namespace (no prefix).
335     sub _uri_to_prefix ($$;$%) {
336     my ($self, $uri, $new_prefix, %o) = @_;
337     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
338     $self->{ns}->{$new_prefix} = $uri;
339     $new_prefix;
340     } else {
341     return 'xml:' if $uri eq 'http://www.w3.org/XML/1998/namespace';
342     return 'xmlns:' if $uri eq 'http://www.w3.org/2000/xmlns/';
343     for (keys %{$self->{ns}||{}}) {
344     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
345     return $_ if $self->{ns}->{$_} eq $uri;
346     }
347 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
348 wakaba 1.1 shift; # $self
349     $self->{parent}->_uri_to_prefix (@_);
350     } else {
351     undef;
352     }
353     }
354     }
355    
356     =item $x->define_new_namespace ($prefix => $uri)
357    
358     Defines a new XML Namespace. This method is useful for root or section-level
359     element node.
360    
361     Returned value is unspecified in this version of this module.
362    
363     =cut
364    
365 wakaba 1.4 ## TODO: structured URI (such as http://&server;/) support
366 wakaba 1.1 sub define_new_namespace ($$$) {
367     my ($self, $prefix, $uri) = @_;
368     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
369     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
370 wakaba 1.4 $self->_set_prefix_to_uri ($prefix => $uri);
371 wakaba 1.1 } else {
372     undef;
373     }
374     }
375    
376 wakaba 1.4 =item $uri = $x->defined_namespace_prefix ($prefix)
377    
378     Query whether the namespace prefix is defined or not.
379     If defined, return namespace name (URI).
380    
381     =cut
382    
383     sub defined_namespace_prefix ($$) {
384     my ($self, $prefix) = @_;
385     $prefix .= ':' if $prefix;#Carp::carp join ",", ';',@_;
386     $self->_prefix_to_uri ($prefix);
387     }
388    
389 wakaba 1.1 =item $qname = $x->qname
390    
391     Returns QName ((namespace-)qualified name) of the element type.
392     Undef is retuened when the type does not have its QName
393     (ie. when type is neither C<#element> or C<#attribute>).
394    
395     =cut
396    
397     sub qname ($) {
398     my $self = shift;
399     if ($self->_check_ncname ($self->{local_name})) {
400     if ($self->{type} eq '#element') {
401     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
402     unless $self->{_qname};
403     return $self->{_qname};
404     } elsif ($self->{type} eq '#attribute') {
405     return $self->attribute_name;
406     }
407     }
408     undef;
409     }
410    
411     =item $tag = $x->start_tag
412    
413 wakaba 1.4 Returns the start tag (or something that marks the start of something, such as '<!--'
414 wakaba 1.1 for C<#comment> nodes).
415    
416     =cut
417    
418     sub start_tag ($) {
419     my $self = shift;
420     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
421     my $r = '<';
422     $r .= $self->qname;
423     for (@{$self->{node}}) {
424     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
425     }
426     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
427     if ($prefix) {
428     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
429     } else {
430     $r .= ' xmlns';
431     }
432     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
433     }
434     $r .= '>';
435     $r;
436     } elsif ($self->{type} eq '#comment') {
437 wakaba 1.4 '<!--';
438     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
439     '<?' . ($self->{local_name});
440 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
441     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
442     '&#x';
443     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
444     '&#';
445 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
446     if ($self->{local_name}->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
447 wakaba 1.2 '%';
448     } else {
449     '&';
450     }
451 wakaba 1.4 } elsif ($self->_check_ncname ($self->{local_name})) {
452 wakaba 1.2 if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
453     '%';
454     } else {
455     '&';
456     }
457     } else { # error
458     '';
459     }
460 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
461     '<!' . {
462     $NS_SGML.'attlist' => 'ATTLIST',
463     $NS_SGML.'doctype' => 'DOCTYPE',
464     $NS_SGML.'element' => 'ELEMENT',
465     $NS_SGML.'entity' => 'ENTITY',
466     $NS_SGML.'entity:parameter' => 'ENTITY %',
467     $NS_SGML.'notation' => 'NOTATION',
468     $NS_SGML.'sgml' => 'SGML',
469     }->{$self->{namespace_uri}} . ' ';
470 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
471     my $r = '<!' . $self->{local_name} . ' ';
472     if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) {
473     my $qname;
474     for (@{$self->{parent}->{node}}) {
475     if ($_->{type} eq '#element') {
476     $qname = $_->qname;
477     last if $qname;
478     }
479     }
480     $r .= ($qname ? $qname : '#IMPLIED') . ' ';
481 wakaba 1.2 }
482     $r;
483     } elsif ($self->{type} eq '#section') {
484     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
485     '<![' . $self->{local_name} . '[';
486     } elsif ($self->_check_ncname ($self->{local_name})) {
487     '<![' . $self->{local_name} . '[';
488     } else { # error
489     '';
490 wakaba 1.1 }
491     } else {
492     '';
493     }
494     }
495    
496     =item $tag = $x->end_tag
497    
498 wakaba 1.4 Returns the end tag (or something that marks the end of something, such as '-->'
499 wakaba 1.1 for C<#comment> nodes).
500    
501     =cut
502    
503     sub end_tag ($) {
504     my $self = shift;
505     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
506     '</' . $self->qname . '>';
507     } elsif ($self->{type} eq '#comment') {
508 wakaba 1.4 '-->';
509 wakaba 1.1 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
510     '?>';
511 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
512     ';';
513 wakaba 1.4 } elsif ($self->{type} eq '#declaration' && $self->{namespace_uri}) {
514     '>';
515 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
516 wakaba 1.4 '>';
517 wakaba 1.2 } elsif ($self->{type} eq '#section') {
518     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
519     ']]>';
520     } elsif ($self->_check_ncname ($self->{local_name})) {
521     ']]>';
522     } else { # error
523     '';
524     }
525 wakaba 1.1 } else {
526     '';
527     }
528     }
529    
530     =item $tag = $x->attribute_name
531    
532     Returns the attribute name.
533    
534     =cut
535    
536     sub attribute_name ($) {
537     my $self = shift;
538     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
539     ($self->{namespace_uri} ?
540     (ref $self->{parent} ? $self->{parent} : $self)
541     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
542     .$self->{local_name};
543     } else {
544     '';
545     }
546     }
547    
548     =item $tag = $x->attribute_value
549    
550     Returns the attribute value.
551    
552     =cut
553    
554 wakaba 1.4 sub attribute_value ($;%) {
555 wakaba 1.1 my $self = shift;
556 wakaba 1.4 my %o = @_;
557 wakaba 1.1 if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
558 wakaba 1.4 my $r = '"' . $self->_entitize ($self->{value}, percent => $o{escape_percent});
559     for (@{$self->{node}}) {
560     my $nt = $_->node_type;
561     if ($nt eq '#reference' || $nt eq '#xml') {
562     $r .= $_->outer_xml;
563     } elsif ($nt ne '#attribute') {
564     $r .= $self->_entitize ($_->inner_text, percent => $o{escape_percent});
565     }
566     }
567     return $r . '"';
568     } else {
569     '';
570     }
571     }
572    
573     sub entity_value ($;%) {
574     my $self = shift;
575     my %o = @_;
576     my $_entitize = sub {
577     my $s = shift;
578     $s =~ s/&/&amp;/g;
579     $s =~ s/%/&#x25;/g;
580     $s =~ s/"/&quot;/g;
581     $s;
582     };
583     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
584     my $r = '"' . &$_entitize ($self->{value});
585     for (@{$self->{node}}) {
586     my $nt = $_->node_type;
587     if ($nt eq '#reference' || $nt eq '#xml') {
588     $r .= $_->outer_xml;
589     } elsif ($nt ne '#attribute') {
590     $r .= &$_entitize ($_->inner_text);
591 wakaba 1.1 }
592     }
593 wakaba 1.4 return $r . '"';
594 wakaba 1.1 } else {
595     '';
596     }
597     }
598    
599     =item $tag = $x->attribute
600    
601     Returns the attribute (name and value pair).
602    
603     =cut
604    
605     sub attribute ($) {
606     my $self = shift;
607     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
608     $self->attribute_name . '=' . $self->attribute_value;
609     } else {
610     '';
611     }
612     }
613    
614 wakaba 1.2 sub external_id ($;%) {
615     my $self = shift;
616     my %o = @_;
617     my ($pubid, $sysid, $ndata);
618     for (@{$self->{node}}) {
619 wakaba 1.4 if ($_->{type} eq '#attribute' && !$_->{namespace_uri}) {
620 wakaba 1.2 if ($_->{local_name} eq 'PUBLIC') {
621 wakaba 1.4 $pubid = $_->inner_text;
622 wakaba 1.2 } elsif ($_->{local_name} eq 'SYSTEM') {
623 wakaba 1.4 $sysid = $_->inner_text;
624 wakaba 1.2 } elsif ($_->{local_name} eq 'NDATA') {
625 wakaba 1.4 $ndata = $_->inner_text;
626     undef $ndata unless $self->_check_ncname ($ndata);
627 wakaba 1.2 }
628     }
629     }
630     my $r = '';
631 wakaba 1.4 if (length $pubid) {
632     $pubid =~ s|([^\x0A\x0D\x20A-Za-z0-9'()+,./:=?;!*#\@\$_%-])|sprintf '%%%02X', ord $1|ges;
633     }
634     if (length $sysid) {
635     if ($sysid =~ /"/) {
636     if ($sysid =~ /'/) {
637     $sysid =~ s/"/&quot;/; $sysid = '"' . $sysid . '"';
638     } else {
639     $sysid = "'" . $sysid . "'";
640     }
641     } else {
642     $sysid = '"' . $sysid . '"';
643     }
644     }
645 wakaba 1.2 if ($pubid && $sysid) {
646 wakaba 1.4 $r = 'PUBLIC "' . $pubid . '" ' . $sysid;
647 wakaba 1.2 } elsif ($sysid) {
648 wakaba 1.4 $r = 'SYSTEM ' . $sysid;
649 wakaba 1.2 } elsif ($pubid && $o{allow_pubid_only}) {
650 wakaba 1.4 $r = 'PUBLIC "' . $pubid . '"';
651 wakaba 1.2 }
652     if ($r && $ndata && $o{use_ndata}) {
653 wakaba 1.4 $r .= ' NDATA ' . $ndata;
654 wakaba 1.2 }
655     $r;
656     }
657    
658     =item $s = $x->content_spec
659    
660     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
661     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
662    
663     =cut
664    
665     sub content_spec ($) {
666     my $self = shift;
667     if ($self->{type} eq '#element') {
668     my $text = 0;
669     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
670     $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
671    
672     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
673     } elsif ($self->{type} eq '#attribute') {
674     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
675     my $default = $self->{default_decl};
676     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
677     unless ($default) {
678     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
679     }
680     return $attdef . $default;
681     }
682     }
683    
684 wakaba 1.1 =item $tag = $x->inner_xml
685    
686     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
687     element content without start- and end-tags is returned.)
688    
689     Note that for not all node types the behavior of this method is defined.
690     For example, returned value of C<#attribute> might be unexpected one
691     in this version of this module.
692    
693     =cut
694    
695 wakaba 1.3 sub inner_xml ($;%) {
696 wakaba 1.1 my $self = shift;
697 wakaba 1.3 my %o = @_;
698 wakaba 1.1 my $r = '';
699     if ($self->{type} eq '#comment') {
700     $r = $self->inner_text;
701     $r =~ s/--/-&#x45;/g;
702     } elsif ($self->{type} eq '#pi') {
703     if (length $self->{value}) {
704     $r = ' ' . $self->{value};
705 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
706 wakaba 1.1 }
707     for (@{$self->{node}}) {
708     if ($_->node_type eq '#attribute') {
709     $r .= ' ' . $_->attribute;
710     } else {
711     my $s = $_->inner_text;
712     $s =~ s/\?>/?&gt;/g;
713     $r .= ' ' . $s if length $s;
714     }
715     }
716 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
717     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
718     $r = sprintf '%02X', $self->{value};
719     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
720     $r = sprintf '%02d', $self->{value};
721 wakaba 1.4 } elsif (ref $self->{local_name} && $self->{local_name}->{type} eq '#declaration') {
722     $r = $self->{local_name}->{local_name};
723     } elsif ($self->_check_ncname ($self->{local_name})) {
724     $r = ($self->{local_name});
725 wakaba 1.2 } else { # error
726     $r = '';
727     }
728 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
729 wakaba 1.2 if ($self->{local_name} eq 'DOCTYPE') {
730     my ($isub, $xid) = ('', $self->external_id);
731     for (@{$self->{node}}) {
732 wakaba 1.4 $isub .= $_->outer_xml if $_->{type} ne '#attribute';
733 wakaba 1.2 }
734     if ($xid) {
735     $r = $xid;
736     if ($isub) {
737     $r .= " [\n" . $isub . "]";
738 wakaba 1.1 }
739     } else {
740 wakaba 1.2 if ($isub) {
741     $r = "[\n" . $isub . "]";
742     } else {
743     $r = "[]";
744     }
745 wakaba 1.1 }
746 wakaba 1.4 } elsif ($self->{namespace_uri} eq $NS_SGML.'doctype') {
747     my $root = $self->get_attribute ('qname');
748     ref $root ? ($root = $root->inner_text) : (ref $self->{parent} ? (do {
749     for (@{$self->{parent}->{node}}) {
750     if ($_->{type} eq '#element') {
751     $root = $_->qname;
752     last if $root;
753     }
754     }
755     $root = '#IMPLIED';
756     }) : ($root = '#IMPLIED')); ## error!
757     my ($isub, $xid) = ('', $self->external_id);
758     for (@{$self->{node}}) {
759     $isub .= $_->outer_xml if $_->{type} ne '#attribute';
760 wakaba 1.2 }
761     if ($xid) {
762 wakaba 1.4 $r = $xid;
763     if ($isub) {
764     $r .= " [\n" . $isub . "]";
765     }
766     } else {
767     if ($isub) {
768     $r = "[\n" . $isub . "]";
769     } else {
770     $r = "[]";
771     }
772     }
773     $r = $root . ' ' . $r;
774     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity'
775     || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter'
776     || $self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:notation') {
777     my %xid_opt;
778     $r = $self->{local_name} . ' ' if $self->_check_ncname ($self->{local_name});
779     if ($self->{namespace_uri} eq $NS_SGML.'entity:parameter') {
780     #$r = '% ' . $r;
781     } elsif ($self->{namespace_uri} eq $NS_SGML.'entity') {
782     $xid_opt{use_ndata} = 1;
783     } elsif ($self->{namespace_uri} eq $NS_SGML.'notation') {
784     $xid_opt{allow_pubid_only} = 1;
785     }
786    
787     my ($v, $xid) = ($self->{value}, $self->external_id (%xid_opt));
788     if ($xid) { ## External ID
789 wakaba 1.2 $r .= $xid;
790 wakaba 1.4 } else { ## EntityValue
791     my $entity_value = $self->get_attribute ('value');
792     if ($entity_value) { # <!ENTITY foo "bar">
793     $r .= $entity_value->entity_value;
794     } else { ## Parameter entity reference
795     $r .= $self->{value};
796     for (@{$self->{node}}) {
797     $r .= $_->outer_xml;
798     }
799 wakaba 1.2 }
800 wakaba 1.1 }
801 wakaba 1.4 } elsif ($self->{namespace_uri} eq $NS_SGML.'element') {
802     my $qname = $self->get_attribute ('qname');
803     ref $qname ? $qname = $qname->inner_text : undef;
804     if ($qname && $self->_check_name ($qname)) {
805     ## Element type name is defined
806     $r = $qname . ' ';
807 wakaba 1.3 if ($o{output_tag_omit_declaration}) {
808 wakaba 1.4 $r .= ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ')
809     .
810     ($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- ');
811 wakaba 1.3 }
812 wakaba 1.4 my $rs = $self->inner_text (output_ref_as_is => 1);
813     $r .= $rs || 'ANY';
814 wakaba 1.2 } else {
815 wakaba 1.4 #if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') {
816     ### Element prototype is given
817     # $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec;
818     #} elsif () {
819     ### Element type name and contentspec is given
820     # $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY');
821     #} else {
822 wakaba 1.2 ## (Element type name and contetnspac) is given
823 wakaba 1.4 $r .= $self->inner_text (output_ref_as_is => 1);
824     # || 'Name ANY'; # error
825     #}
826 wakaba 1.2 }
827     } elsif ($self->{local_name} eq 'ATTLIST') {
828     if ($self->_check_name ($self->{target_name})) {
829     $r = $self->{target_name};
830     }
831     my $t = $self->inner_text (output_ref_as_is => 1);
832     $r .= "\n\t" . $t if $t;
833     $r ||= 'Name'; # error!
834     for (@{$self->{node}}) {
835     if ($_->{type} eq '#attribute') {
836     $r .= "\n\t" . $_->content_spec;
837     }
838 wakaba 1.1 }
839 wakaba 1.2 } else { # unknown
840 wakaba 1.3 for (@{$self->{node}}) {
841     $r .= $_->outer_xml;
842     }
843 wakaba 1.1 }
844 wakaba 1.2 } elsif ($self->{type} eq '#section' && !ref $self->{local_name} && $self->{local_name} eq 'CDATA') {
845     $r = $self->inner_text;
846     $r =~ s/]]>/]]>]]<![CDATA[>/g;
847 wakaba 1.1 } else {
848     if ($self->{type} ne '#xml') {
849     $r = $self->_entitize ($self->{value});
850     } else {
851     $r = $self->{value};
852     }
853     for (@{$self->{node}}) {
854     my $nt = $_->node_type;
855 wakaba 1.4 if (($self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
856 wakaba 1.1 $r .= "\n";
857     }
858     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
859     }
860     }
861     $r;
862     }
863    
864    
865     =item $tag = $x->outer_xml
866    
867     Returns the node in XML syntax.
868    
869     =cut
870    
871     sub outer_xml ($) {
872     my $self = shift;
873     if ($self->{type} eq '#attribute') {
874     $self->attribute;
875     } else {
876 wakaba 1.4 if ($self->{option}->{indent} && $self->{type} eq '#element') {
877 wakaba 1.1 my $r = $self->start_tag;
878     my $c = $self->inner_xml;
879 wakaba 1.4 if (!length $c && $self->{option}->{use_EmptyElemTag}) {
880     substr ($r, -1) = ' />';
881     } else {
882     if ($c) {
883     $c =~ s/\n/\n /g;
884     $r .= "\n " . $c . "\n";
885     }
886     $r .= $self->end_tag;
887 wakaba 1.1 }
888     $r;
889     } else {
890 wakaba 1.4 my $r = $self->start_tag;
891     my $c = $self->inner_xml;
892     if ($self->{type} eq '#element' && !length $c && $self->{option}->{use_EmptyElemTag}) {
893     substr ($r, -1) = ' />';
894     } else {
895     $r .= $c . $self->end_tag;
896     #$r .= "\n" if $self->{type} eq '#declaration';
897     }
898 wakaba 1.2 $r;
899     #'{'.$self->{type}.': '.$r.'}'; # for debug
900 wakaba 1.1 }
901     }
902     }
903    
904     =item $tag = $x->inner_text
905    
906     Returns the text content of the node. (In many case the returned value is same
907     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
908     But some classes that inherits this module might implement to return other
909     value (eg. to return the value of the alt attribute of html:img element).
910    
911 wakaba 1.2 Available options: C<output_ref_as_is>.
912    
913 wakaba 1.1 =cut
914    
915 wakaba 1.2 sub inner_text ($;%) {
916 wakaba 1.1 my $self = shift;
917 wakaba 1.2 my %o = @_;
918 wakaba 1.1 my $r = $self->{value};
919 wakaba 1.2 if ($o{output_ref_as_is}) { ## output as if RCDATA
920 wakaba 1.4 $r =~ s/&/&amp;/g;
921     for my $node (@{$self->{node}}) {
922     my $nt = $node->node_type;
923     if ($nt eq '#reference' || $nt eq '#xml') {
924     $r .= $node->outer_xml;
925 wakaba 1.2 } elsif ($nt ne '#attribute') {
926 wakaba 1.4 $r .= map {s/&/&amp;/g; $_} $node->inner_text;
927 wakaba 1.2 }
928     }
929     } else {
930     for (@{$self->{node}}) {
931     $r .= $_->inner_text unless $_->node_type eq '#attribute';
932     }
933 wakaba 1.1 }
934     $r;
935     }
936    
937     {no warnings; # prototype mismatch
938     *stringify = \&outer_xml;
939     }
940    
941     # $s = $x->_entitize ($s)
942 wakaba 1.4 sub _entitize ($$;%) {
943     my ($self, $s, %o) = (shift, shift, @_);
944 wakaba 1.1 $s =~ s/&/&amp;/g;
945     $s =~ s/</&lt;/g;
946     $s =~ s/>/&gt;/g;
947     $s =~ s/"/&quot;/g;
948 wakaba 1.4 $s =~ s/%/&#x25;/g if $o{percent};
949     $s =~ s/'/&#x27;/g if $o{apos};
950 wakaba 1.1 $s;
951     }
952    
953 wakaba 1.2 # 1/0 = $x->_check_name ($s)
954     sub _check_name ($$) {
955     my $self = shift;
956     my $s = shift;
957     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
958     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
959     # \p{...}('*'/'+'/'{n,}') does not work...
960     $Cache{name}->{$s} = 1;
961     1;
962     } else {
963     $Cache{name}->{$s} = 0;
964     0;
965     }
966     }
967 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
968     sub _check_ncname ($$) {
969     my $self = shift;
970     my $s = shift;
971     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
972     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
973     # \p{...}('*'/'+'/'{n,}') does not work...
974     $Cache{ncname}->{$s} = 1;
975     1;
976     } else {
977     $Cache{ncname}->{$s} = 0;
978     0;
979     }
980     }
981    
982     # 1/0 = $x->_check_namespace_prefix ($s)
983     sub _check_namespace_prefix ($$) {
984     my $self = shift;
985     my $s = shift;
986     return 0 unless defined $s;
987     return 1 if $s eq '';
988     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
989     $self->_check_ncname ($s);
990     }
991    
992 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
993     ## multiple parents.
994     ## TODO: normalize namespace URI (removing non URI chars)
995    
996     sub flag ($$;$) {
997     my ($self, $name, $value) = @_;
998     if (defined $value) {
999     $self->{flag}->{$name} = $value;
1000     }
1001     $self->{flag}->{$name};
1002     }
1003    
1004 wakaba 1.3 sub option ($$;$) {
1005     my ($self, $name, $value) = @_;
1006     if (defined $value) {
1007     $self->{option}->{$name} = $value;
1008     }
1009     $self->{option}->{$name};
1010     }
1011    
1012 wakaba 1.1 =back
1013    
1014 wakaba 1.2 =head1 NODE TYPES
1015    
1016     =over 4
1017    
1018     =item #attribute
1019    
1020     Attribute. Its XML representation takes the form of NAME="VALUE".
1021    
1022     =item #comment
1023    
1024     Comment declarement. <!-- -->
1025    
1026     =item #declarement
1027    
1028     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
1029     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
1030    
1031     =item #element
1032    
1033     Element. Its XML representation consists of start tag, content and end tag,
1034     like <TYPE>content</TYPE>.
1035    
1036     =item #fragment
1037    
1038     Fragment of nodes. It's similar to DOM's fragment node.
1039    
1040     =item #pi
1041    
1042     Prosessing instruction. <?NAME VALUE?>
1043    
1044     =item #reference
1045    
1046     Character reference or general or parameter entity reference.
1047     &#nnnn;, &#xhhhh;, &name;, %name;.
1048    
1049     =item #section
1050    
1051     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
1052     <![%type;[...]]>
1053    
1054     =item #text
1055    
1056     Text.
1057    
1058     =item #xml
1059    
1060     Preformatted XML text.
1061    
1062     =cut
1063    
1064 wakaba 1.4 =head1 RESTRICTIONS
1065    
1066     =over 4
1067    
1068     =item XML without XML Namespace is not supported.
1069    
1070     =item Before default namespace without bounded URI (xmlns="") is outputed, it must be declared.
1071    
1072     For example, next code generates invalid (non-well-formed) XML Namespace document.
1073    
1074     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1075     print $x # <ns1:elementType xmlns:ns1=""></ns1:elementType>
1076    
1077     So you must write like:
1078    
1079     my $x = SuikaWiki::Markup::XML->new (local_name => 'elementType');
1080     $x->define_new_namespace ('' => '');
1081     print $x; # <elementType xmlns=""></elementType>
1082    
1083     =back
1084    
1085 wakaba 1.1 =head1 LICENSE
1086    
1087     Copyright 2003 Wakaba <w@suika.fam.cx>
1088    
1089     This program is free software; you can redistribute it and/or
1090     modify it under the same terms as Perl itself.
1091    
1092     =cut
1093    
1094 wakaba 1.4 1; # $Date: 2003/05/10 05:58:06 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24