/[pub]/suikawiki/script/lib/SuikaWiki/Markup/XML.pm
Suika

Contents of /suikawiki/script/lib/SuikaWiki/Markup/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Sat May 10 05:58:06 2003 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +28 -10 lines
Rewrite import_plugins

1 wakaba 1.1
2     =head1 NAME
3    
4     SuikaWiki::Markup::XML --- SuikaWiki: Simple well-formed document fragment generator
5    
6     =head1 DESCRIPTION
7    
8     This module can be used to generate the document fragment of XML, SGML or
9     other well-formed (in XML meaning) data formats with the object oriented manner.
10    
11     This module cannot be used to parse XML (or other marked-up) document (or its fragment)
12     by itself, nor is compatible with other huge packages such as XML::Parser. The only purpose
13     of this module is to make it easy for tiny perl scripts to GENERATE well-formed
14     markup constructures. (SuikaWiki is not "tiny"? Oh, yes, I see:-))
15    
16     =cut
17    
18     package SuikaWiki::Markup::XML;
19     use strict;
20 wakaba 1.2 use overload '""' => \&stringify,
21     fallback => 1;
22     use Char::Class::XML qw!InXML_NameStartChar InXMLNameChar InXML_NCNameStartChar InXMLNCNameChar!;
23 wakaba 1.1 our %Namespace_URI_to_prefix = (
24     'DAV:' => [qw/dav webdav/],
25     'http://greenbytes.de/2002/rfcedit' => [qw/ed/],
26     'http://icl.com/saxon' => [qw/saxon/],
27     'http://members.jcom.home.ne.jp/jintrick/2003/02/site-concept.xml#' => ['', qw/sitemap/],
28     'http://purl.org/dc/elements/1.1/' => [qw/dc dc11/],
29     'http://purl.org/rss/1.0/' => ['', qw/rss rss10/],
30     'http://suika.fam.cx/~wakaba/lang/rfc/translation/' => [qw/ja/],
31     'http://www.mozilla.org/xbl' => ['', qw/xbl/],
32 wakaba 1.3 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => [qw/rdf/],
33 wakaba 1.1 'http://www.w3.org/1999/xhtml' => ['', qw/h h1 xhtml xhtml1/],
34     'http://www.w3.org/1999/xlink' => [qw/l xlink/],
35     'http://www.w3.org/1999/XSL/Format' => [qw/fo xslfo xsl-fo xsl/],
36     'http://www.w3.org/1999/XSL/Transform' => [qw/t s xslt xsl/],
37     'http://www.w3.org/1999/XSL/TransformAlias' => [qw/axslt axsl xslt xsl/],
38     'http://www.w3.org/2000/01/rdf-schema#' => [qw/rdfs/],
39     'http://www.w3.org/2000/svg' => ['', qw/s svg/],
40     'http://www.w3.org/2002/06/hlink' => [qw/h hlink/],
41     'http://www.w3.org/2002/06/xhtml2' => ['', qw/h h2 xhtml xhtml2/],
42 wakaba 1.3 'http://www.w3.org/2002/07/owl' => [qw/owl/],
43 wakaba 1.1 'http://www.w3.org/TR/REC-smil' => ['', qw/smil smil1/],
44 wakaba 1.3 'http://www.wapforum.org/2001/wml' => [qw/wap/],
45 wakaba 1.1 'http://xml.apache.org/xalan' => [qw/xalan/],
46     'mailto:julian.reschke@greenbytes.de?subject=rcf2629.xslt' => [qw/myns/],
47     'urn:schemas-microsoft-com:vml' => [qw/v vml/],
48     'urn:schemas-microsoft-com:xslt' => [qw/ms msxsl msxslt/],
49     'urn:x-suika-fam-cx:markup:ietf:html:3:draft:00' => ['', qw/H HTML HTML3/],
50     );
51     my %Cache;
52    
53     =head1 METHODS
54    
55     =over 4
56    
57     =item $x = SuikaWiki::Markup::XML->new (%options)
58    
59     Returns new instance of the module. It is itself a node.
60    
61 wakaba 1.2 Available options: C<data_type>, C<default_decl>, C<type> (default: C<#element>), C<local_name>, C<namespace_uri>, C<target_name> and C<value>.
62 wakaba 1.1
63     =cut
64    
65     sub new ($;%) {
66     my $class = shift;
67     my $self = bless {@_}, $class;
68     $self->{type} ||= '#element';
69 wakaba 1.2 for (qw/target_name value/) {
70     if (ref $self->{$_}) {
71     $self->{$_}->{parent} = $self;
72     }
73     }
74 wakaba 1.1 $self->{node} ||= [];
75     $self;
76     }
77    
78    
79     =item $x->append_node ($node)
80    
81     Appending given node to the object (as the last child).
82     If the type of given node is C<#fragment>, its all children, not the node
83     itself, are appended.
84    
85     This method returns the appended node unless the type of given node is C<#fragment>.
86     In such cases, this node (C<$x>) is returned.
87    
88 wakaba 1.2 Available options: C<node_or_text>.
89    
90 wakaba 1.1 =cut
91    
92 wakaba 1.2 sub append_node ($$;%) {
93 wakaba 1.1 my $self = shift;
94 wakaba 1.2 my ($new_node, %o) = @_;
95     unless (ref $new_node) {
96     if ($o{node_or_text}) {
97     return $self->append_text ($new_node);
98     } else {
99     die "append_node: Invalid node" unless ref $new_node;
100     }
101     }
102 wakaba 1.1 if ($new_node->{type} eq '#fragment') {
103     for (@{$new_node->{node}}) {
104     push @{$self->{node}}, $_;
105     $_->{parent} = $self;
106     }
107     $self;
108     } else {
109     push @{$self->{node}}, $new_node;
110     $new_node->{parent} = $self;
111     $new_node;
112     }
113     }
114    
115     =item $new_node = $x->append_new_node (%options)
116    
117     Appending a new node. The new node is returned.
118    
119     Available options: C<type>, C<namespace_uri>, C<local_name>, C<value>.
120    
121     =cut
122    
123     sub append_new_node ($;%) {
124     my $self = shift;
125     my %o = @_;
126     my $new_node = __PACKAGE__->new (%o);
127     push @{$self->{node}}, $new_node;
128     $new_node->{parent} = $self;
129     $new_node;
130     }
131    
132     =item $new_node = $x->append_text ($text)
133    
134     Appending given text as a new text node. The new text node is returned.
135    
136     =cut
137    
138     sub append_text ($$;%) {
139     my $self = shift;
140     my $s = shift;
141     #if (@{$self->{node}}[-1]->{type} eq '#text') {
142     # $self->{node}}[-1]->append_new_node (type => '#text', value => $s);
143     #} else {
144     $self->append_new_node (type => '#text', value => $s);
145     #}
146     }
147    
148 wakaba 1.2 sub append_baretext ($$;%) {
149     my $self = shift;
150     my $s = shift;
151     $self->append_new_node (type => '#xml', value => $s);
152     }
153    
154 wakaba 1.1 =item $attr_node = $x->get_attribute ($local_name, %options)
155    
156     Returns the attribute node whose local-name is C<$local_name>.
157    
158     Available options: C<namespace_uri>, C<make_new_node>.
159    
160     =cut
161    
162     sub get_attribute ($$;%) {
163     my $self = shift;
164     my ($name, %o) = @_;
165     for (@{$self->{node}}) {
166     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
167     return $_;
168     }
169     }
170     ## Node is not exist
171     if ($o{make_new_node}) {
172     return $self->append_new_node (type => '#attribute', local_name => $name, namespace_uri => $o{namespace_uri});
173     } else {
174     return undef;
175     }
176     }
177    
178     =item $attr_node = $x->set_attribute ($local_name => $value, %options)
179    
180     Set the value of the attribute. The attribute node is returned.
181    
182     Available options: C<namespace_uri>.
183    
184     =cut
185    
186     sub set_attribute ($$$;%) {
187     my $self = shift;
188     my ($name, $val, %o) = @_;
189     for (@{$self->{node}}) {
190     if ($_->{type} eq '#attribute' && $_->{local_name} eq $name && $o{namespace_uri} eq $_->{namespace_uri}) {
191     $_->{value} = $val;
192     $_->{node} = [];
193     return $_;
194     }
195     }
196     return $self->append_new_node (type => '#attribute', local_name => $name, value => $val, namespace_uri => $o{namespace_uri});
197     }
198    
199     =item \@children = $x->child_nodes
200    
201     Returns an array reference to child nodes.
202    
203     =item $local_name = $x->local_name
204    
205     Returns the local-name.
206    
207     =item $type = $x->node_type
208    
209     Returns the node type.
210    
211     =item $node = $x->parent_node
212    
213     Returns the parent node. If there is no parent node, undef is returned.
214    
215     =cut
216    
217     sub child_nodes ($) { shift->{node} }
218     sub local_name ($) { shift->{local_name} }
219     sub node_type ($) { shift->{type} }
220     sub parent_node ($) { shift->{parent} }
221    
222     =item $i = $x->count
223    
224     Returns the number of child nodes.
225    
226     =cut
227    
228     # TODO: support counting by type
229     sub count ($;@) {
230 wakaba 1.2 my $self = shift;
231     (defined $self->{value} ? 1 : 0) + scalar @{$self->{node}};
232 wakaba 1.1 }
233    
234     # $prefix = $x->_get_namespace_prefix ($namespace_uri)
235     sub _get_namespace_prefix ($$;%) {
236     my ($self, $uri) = (shift, shift);
237     my %o = @_;
238     if (defined (my $p = $self->_uri_to_prefix ($uri, undef, %o))) {
239     return $p if $self->_prefix_to_uri ($p) eq $uri;
240     } if ($Namespace_URI_to_prefix{$uri}) {
241     for (@{$Namespace_URI_to_prefix{$uri}}) {
242     my $pfx = $_; $pfx .= ':' if $pfx;
243     if ($self->_check_namespace_prefix ($pfx) && !$self->_prefix_to_uri ($pfx)) {
244     return $self->_uri_to_prefix ($uri => $pfx, %o);
245     }
246     }
247     } else {
248     my ($u_r_i, $pfx) = ($uri);
249     while ($u_r_i =~ s/([A-Za-z][0-9A-Za-z-]+)[^0-9A-Za-z-]*$//) {
250     my $p_f_x = $1 . ':';
251     next if uc (substr ($p_f_x, 0, 3)) eq 'XML';
252     unless ($self->_prefix_to_uri ($p_f_x)) {
253     $pfx = $p_f_x;
254     last;
255     }
256     }
257     if ($pfx) {
258     return $self->_uri_to_prefix ($uri => $pfx, %o);
259     } else {
260     while (1) {
261     my $pfx = 'ns'.(++$self->{ns}->{-anonymous}).':';
262     unless ($self->_prefix_to_uri ($pfx)) {
263     return $self->_uri_to_prefix ($uri => $pfx, %o);
264     }
265     }
266     }
267     }
268     }
269    
270     # $uri or undef = $x->_prefix_to_uri ($prefix => [$new_uri])
271     sub _prefix_to_uri ($$;$%) {
272     my ($self, $prefix, $new_uri, %o) = @_;
273     return undef unless $self->_check_namespace_prefix ($prefix);
274     if ($new_uri) {
275     $self->{ns}->{$prefix} = $new_uri;
276     }
277     if (uc (substr $prefix, 0, 3) eq 'XML') {
278     return 'http://www.w3.org/XML/1998/namespace' if $prefix eq 'xml:';
279     return 'http://www.w3.org/2000/xmlns/' if $prefix eq 'xmlns:';
280 wakaba 1.2 }
281     if (defined $self->{ns}->{$prefix}) {
282 wakaba 1.1 $self->{ns}->{$prefix};
283     } elsif (ref $self->{parent}) {
284     shift; # $self
285     $self->{parent}->_prefix_to_uri (@_);
286     } else {
287     undef;
288     }
289     }
290    
291     # $prefix or undef = $x->_uri_to_prefix ($uri => [$new_prefix], %options)
292     # use_no_prefix (default: 1): Allow default namespace (no prefix).
293     sub _uri_to_prefix ($$;$%) {
294     my ($self, $uri, $new_prefix, %o) = @_;
295     if (defined $new_prefix && $self->_check_namespace_prefix ($new_prefix)) {
296     $self->{ns}->{$new_prefix} = $uri;
297     $new_prefix;
298     } else {
299     return 'xml:' if $uri eq 'http://www.w3.org/XML/1998/namespace';
300     return 'xmlns:' if $uri eq 'http://www.w3.org/2000/xmlns/';
301     for (keys %{$self->{ns}||{}}) {
302     next if ($_ eq '') && !(!defined $o{use_no_prefix} || $o{use_no_prefix});
303     return $_ if $self->{ns}->{$_} eq $uri;
304     }
305 wakaba 1.2 if (ref ($self->{parent}) && $self->{parent}->{type} ne '#declaration') {
306 wakaba 1.1 shift; # $self
307     $self->{parent}->_uri_to_prefix (@_);
308     } else {
309     undef;
310     }
311     }
312     }
313    
314     =item $x->define_new_namespace ($prefix => $uri)
315    
316     Defines a new XML Namespace. This method is useful for root or section-level
317     element node.
318    
319     Returned value is unspecified in this version of this module.
320    
321     =cut
322    
323     sub define_new_namespace ($$$) {
324     my ($self, $prefix, $uri) = @_;
325     if ($prefix eq '' || $self->_check_ncname ($prefix)) {
326     $prefix .= ':' if $prefix && substr ($prefix, -1) ne ':';
327     $self->_prefix_to_uri ($prefix => $uri);
328     } else {
329     undef;
330     }
331     }
332    
333     =item $qname = $x->qname
334    
335     Returns QName ((namespace-)qualified name) of the element type.
336     Undef is retuened when the type does not have its QName
337     (ie. when type is neither C<#element> or C<#attribute>).
338    
339     =cut
340    
341     sub qname ($) {
342     my $self = shift;
343     if ($self->_check_ncname ($self->{local_name})) {
344     if ($self->{type} eq '#element') {
345     $self->{_qname} = $self->_get_namespace_prefix ($self->{namespace_uri}) . $self->{local_name}
346     unless $self->{_qname};
347     return $self->{_qname};
348     } elsif ($self->{type} eq '#attribute') {
349     return $self->attribute_name;
350     }
351     }
352     undef;
353     }
354    
355     =item $tag = $x->start_tag
356    
357     Returns the start tag (or something that marks the start of something, such as '<!-- '
358     for C<#comment> nodes).
359    
360     =cut
361    
362     sub start_tag ($) {
363     my $self = shift;
364     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
365     my $r = '<';
366     $r .= $self->qname;
367     for (@{$self->{node}}) {
368     $r .= ' ' . $_->outer_xml if $_->node_type eq '#attribute';
369     }
370     for my $prefix (grep !/^-/, keys %{$self->{ns}||{}}) {
371     if ($prefix) {
372     $r .= ' xmlns:'.substr ($prefix, 0, length ($prefix)-1);
373     } else {
374     $r .= ' xmlns';
375     }
376     $r .= '="'.$self->_entitize ($self->{ns}->{$prefix}).'"';
377     }
378     $r .= '>';
379     $r;
380     } elsif ($self->{type} eq '#comment') {
381     '<!-- ';
382 wakaba 1.2 } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{target_name} || $self->{local_name})) {
383     '<?' . ($self->{target_name} || $self->{local_name});
384     } elsif ($self->{type} eq '#reference') {
385     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
386     '&#x';
387     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
388     '&#';
389     } elsif (ref $self->{target_name} && $self->{target_name}->{type} eq '#declaration') {
390     if ($self->{target_name}->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
391     '%';
392     } else {
393     '&';
394     }
395     } elsif ($self->_check_ncname ($self->{target_name} || $self->{local_name})) {
396     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
397     '%';
398     } else {
399     '&';
400     }
401     } else { # error
402     '';
403     }
404 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
405     my $r = '<!' . $self->{local_name} . ' ';
406     if ($self->{local_name} eq 'DOCTYPE' && ref $self->{parent}) {
407     my $qname;
408     for (@{$self->{parent}->{node}}) {
409     if ($_->{type} eq '#element') {
410     $qname = $_->qname;
411     last if $qname;
412     }
413     }
414     $r .= ($qname ? $qname : '#IMPLIED') . ' ';
415 wakaba 1.2 }
416     $r;
417     } elsif ($self->{type} eq '#section') {
418     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
419     '<![' . $self->{local_name} . '[';
420     } elsif ($self->_check_ncname ($self->{local_name})) {
421     '<![' . $self->{local_name} . '[';
422     } else { # error
423     '';
424 wakaba 1.1 }
425     } else {
426     '';
427     }
428     }
429    
430     =item $tag = $x->end_tag
431    
432     Returns the end tag (or something that marks the end of something, such as ' -->'
433     for C<#comment> nodes).
434    
435     =cut
436    
437     sub end_tag ($) {
438     my $self = shift;
439     if ($self->{type} eq '#element' && $self->_check_ncname ($self->{local_name})) {
440     '</' . $self->qname . '>';
441     } elsif ($self->{type} eq '#comment') {
442     ' -->';
443     } elsif ($self->{type} eq '#pi' && $self->_check_ncname ($self->{local_name})) {
444     '?>';
445 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
446     ';';
447 wakaba 1.1 } elsif ($self->{type} eq '#declaration' && $self->_check_ncname ($self->{local_name})) {
448     my $r = '';
449     $r .= '>';
450 wakaba 1.2 } elsif ($self->{type} eq '#section') {
451     if (ref $self->{local_name} && $self->{local_name}->{type} eq '#reference') {
452     ']]>';
453     } elsif ($self->_check_ncname ($self->{local_name})) {
454     ']]>';
455     } else { # error
456     '';
457     }
458 wakaba 1.1 } else {
459     '';
460     }
461     }
462    
463     =item $tag = $x->attribute_name
464    
465     Returns the attribute name.
466    
467     =cut
468    
469     sub attribute_name ($) {
470     my $self = shift;
471     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
472     ($self->{namespace_uri} ?
473     (ref $self->{parent} ? $self->{parent} : $self)
474     ->_get_namespace_prefix ($self->{namespace_uri}, use_no_prefix => 0) : '')
475     .$self->{local_name};
476     } elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) {
477     $self->{local_name};
478     } else {
479     '';
480     }
481     }
482    
483     =item $tag = $x->attribute_value
484    
485     Returns the attribute value.
486    
487     =cut
488    
489     sub attribute_value ($) {
490     my $self = shift;
491     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
492     '"' . $self->_entitize ($self->inner_text) . '"';
493     } elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) {
494 wakaba 1.2 if (ref $self->{value} && $self->{value}->{type} eq '#declaration'
495     && $self->{value}->_check_ncname ($self->{value}->{target_name})) {
496     return $self->{value}->{target_name};
497     }
498 wakaba 1.1 my $t = $self->inner_text;
499     if ($t =~ /"/) {
500     if ($t =~ /'/) {
501     $t =~ s/"/&quot;/g;
502     return '"' . $t . '"';
503     } else {
504     return "'" . $t . "'";
505     }
506     } else {
507     return '"' . $t . '"';
508     }
509     } else {
510     '';
511     }
512     }
513    
514     =item $tag = $x->attribute
515    
516     Returns the attribute (name and value pair).
517    
518     =cut
519    
520     sub attribute ($) {
521     my $self = shift;
522     if ($self->{type} eq '#attribute' && $self->_check_ncname ($self->{local_name})) {
523     $self->attribute_name . '=' . $self->attribute_value;
524     } elsif ($self->{type} eq '#pair' && $self->_check_ncname ($self->{local_name})) {
525     $self->attribute_name . ' ' . $self->attribute_value;
526     } else {
527     '';
528     }
529     }
530    
531 wakaba 1.2 sub external_id ($;%) {
532     my $self = shift;
533     my %o = @_;
534     my ($pubid, $sysid, $ndata);
535     for (@{$self->{node}}) {
536     if ($_->{type} eq '#pair') {
537     if ($_->{local_name} eq 'PUBLIC') {
538     $pubid = $_;
539     } elsif ($_->{local_name} eq 'SYSTEM') {
540     $sysid = $_;
541     } elsif ($_->{local_name} eq 'NDATA') {
542     $ndata = $_;
543     }
544     }
545     }
546     my $r = '';
547     if ($pubid && $sysid) {
548     $r = $pubid->attribute . ' ' . $sysid->attribute_value;
549     } elsif ($sysid) {
550     $r = $sysid->attribute;
551     } elsif ($pubid && $o{allow_pubid_only}) {
552     $r = $pubid->attribute;
553     }
554     if ($r && $ndata && $o{use_ndata}) {
555     $r .= ' ' . $ndata->attribute;
556     }
557     $r;
558     }
559    
560     =item $s = $x->content_spec
561    
562     Generates contentspec of element type declaration (ex. C<(E1 | E2 | E3)>)
563     or AttDef of attribute declaration (ex. C<name CDATA #REQUIRED>).
564    
565     =cut
566    
567     sub content_spec ($) {
568     my $self = shift;
569     if ($self->{type} eq '#element') {
570     my $text = 0;
571     my $contentspec = join ' | ', map {$_->qname} grep {$text = 1 if $_->{type} eq '#text'; $_->{type} eq '#element'} @{$self->{node}};
572     $contentspec = '#PCDATA' . ($contentspec ? ' | ' . $contentspec : '') if $text;
573    
574     return $contentspec ? '(' . $contentspec . ')' : 'EMPTY';
575     } elsif ($self->{type} eq '#attribute') {
576     my $attdef = $self->qname . "\t" . ($self->{data_type} || 'CDATA') . "\t";
577     my $default = $self->{default_decl};
578     $default .= ' ' . $self->attribute_value if $default eq '#FIXED';
579     unless ($default) {
580     $default = defined $self->{value} ? $self->attribute_value : '#IMPLIED';
581     }
582     return $attdef . $default;
583     }
584     }
585    
586 wakaba 1.1 =item $tag = $x->inner_xml
587    
588     Returns the content of the node in XML syntax. (In case of the C<#element> nodes,
589     element content without start- and end-tags is returned.)
590    
591     Note that for not all node types the behavior of this method is defined.
592     For example, returned value of C<#attribute> might be unexpected one
593     in this version of this module.
594    
595     =cut
596    
597 wakaba 1.3 sub inner_xml ($;%) {
598 wakaba 1.1 my $self = shift;
599 wakaba 1.3 my %o = @_;
600 wakaba 1.1 my $r = '';
601     if ($self->{type} eq '#comment') {
602     $r = $self->inner_text;
603     $r =~ s/--/-&#x45;/g;
604     } elsif ($self->{type} eq '#pi') {
605     if (length $self->{value}) {
606     $r = ' ' . $self->{value};
607 wakaba 1.2 $r =~ s/\?>/? >/g; # Same replacement as of the recommendation of XSLT:p.i.
608 wakaba 1.1 }
609     for (@{$self->{node}}) {
610     if ($_->node_type eq '#attribute') {
611     $r .= ' ' . $_->attribute;
612     } else {
613     my $s = $_->inner_text;
614     $s =~ s/\?>/?&gt;/g;
615     $r .= ' ' . $s if length $s;
616     }
617     }
618 wakaba 1.2 } elsif ($self->{type} eq '#reference') {
619     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref:hex') {
620     $r = sprintf '%02X', $self->{value};
621     } elsif ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:char:ref') {
622     $r = sprintf '%02d', $self->{value};
623     } elsif (ref $self->{target_name} && $self->{target_name}->{type} eq '#declaration') {
624     $r = $self->{target_name}->{target_name};
625     } elsif ($self->_check_ncname ($self->{target_name} || $self->{local_name})) {
626     $r = ($self->{target_name} || $self->{local_name});
627     } else { # error
628     $r = '';
629     }
630 wakaba 1.1 } elsif ($self->{type} eq '#declaration') {
631 wakaba 1.2 if ($self->{local_name} eq 'DOCTYPE') {
632     my ($isub, $xid) = ('', $self->external_id);
633     for (@{$self->{node}}) {
634     $isub .= $_->outer_xml if $_->{type} ne '#pair';
635     }
636     if ($xid) {
637     $r = $xid;
638     if ($isub) {
639     $r .= " [\n" . $isub . "]";
640 wakaba 1.1 }
641     } else {
642 wakaba 1.2 if ($isub) {
643     $r = "[\n" . $isub . "]";
644     } else {
645     $r = "[]";
646     }
647 wakaba 1.1 }
648 wakaba 1.2 } elsif ($self->{local_name} eq 'ENTITY' && $self->_check_ncname ($self->{target_name})) {
649     my $ndatable = 1;
650     $r = $self->{target_name} . ' ';
651     if ($self->{namespace_uri} eq 'urn:x-suika-fam-cx:markup:sgml:entity:parameter') {
652     $r = '% ' . $r;
653     $ndatable = 0;
654     }
655     my ($v, $xid) = ($self->{value}, $self->external_id (use_ndata => $ndatable));
656     if ($xid) {
657     $r .= $xid;
658 wakaba 1.1 } else {
659 wakaba 1.2 #$v = $self->_entitize ($v);
660     for (@{$self->{node}}) {
661     $v .= $_->outer_xml if $_->{type} ne '#pair';
662     }
663     $r .= '"' . $self->_entitize ($v) . '"'; # BUG: implement this correctly
664 wakaba 1.1 }
665 wakaba 1.2 } elsif ($self->{local_name} eq 'ELEMENT') {
666 wakaba 1.3 my $to = sub {
667     if ($o{output_tag_omit_declaration}) {
668     ($self->{flag}->{element_tag_start_omitable} ? 'o ' : '- ')
669     .
670     ($self->{flag}->{element_tag_end_omitable} ? 'o ' : '- ');
671     } else {
672     '';
673     }
674     };
675 wakaba 1.2 if (ref $self->{node}->[0] && $self->{node}->[0]->{type} eq '#element') {
676     ## Element prototype is given
677 wakaba 1.3 $r = $self->{node}->[0]->qname . ' ' . &$to . $self->{node}->[0]->content_spec;
678 wakaba 1.2 } elsif ($self->_check_name ($self->{target_name})) {
679     ## Element type name and contentspec is given
680 wakaba 1.3 $r = $self->{target_name} . ' ' . &$to . ($self->inner_text || 'ANY');
681 wakaba 1.2 } else {
682     ## (Element type name and contetnspac) is given
683     $r = $self->inner_text (output_ref_as_is => 1)
684     || 'Name ANY'; # error
685     }
686     } elsif ($self->{local_name} eq 'ATTLIST') {
687     if ($self->_check_name ($self->{target_name})) {
688     $r = $self->{target_name};
689     }
690     my $t = $self->inner_text (output_ref_as_is => 1);
691     $r .= "\n\t" . $t if $t;
692     $r ||= 'Name'; # error!
693     for (@{$self->{node}}) {
694     if ($_->{type} eq '#attribute') {
695     $r .= "\n\t" . $_->content_spec;
696     }
697 wakaba 1.1 }
698 wakaba 1.2 } elsif ($self->{local_name} eq 'NOTATION' && $self->_check_ncname ($self->{target_name})) {
699     $r = $self->{target_name} . ' ';
700     my ($v, $xid) = ($self->{value}, $self->external_id (allow_pubid_only => 1));
701     if ($xid) {
702     $r .= $xid;
703 wakaba 1.1 } else {
704 wakaba 1.2 $r .= '""';
705 wakaba 1.1 }
706 wakaba 1.2 } else { # unknown
707 wakaba 1.3 for (@{$self->{node}}) {
708     $r .= $_->outer_xml;
709     }
710 wakaba 1.1 }
711 wakaba 1.2 } elsif ($self->{type} eq '#section' && !ref $self->{local_name} && $self->{local_name} eq 'CDATA') {
712     $r = $self->inner_text;
713     $r =~ s/]]>/]]>]]<![CDATA[>/g;
714 wakaba 1.1 } else {
715     if ($self->{type} ne '#xml') {
716     $r = $self->_entitize ($self->{value});
717     } else {
718     $r = $self->{value};
719     }
720     for (@{$self->{node}}) {
721     my $nt = $_->node_type;
722     if ((0||$self->{option}->{indent}) && ($nt eq '#element' || $nt eq '#comment' || $nt eq '#pi' || $nt eq '#declaration')) {
723     $r .= "\n";
724     }
725     $r .= $_->outer_xml unless $_->node_type eq '#attribute';
726     }
727     }
728     $r;
729     }
730    
731    
732     =item $tag = $x->outer_xml
733    
734     Returns the node in XML syntax.
735    
736     =cut
737    
738     sub outer_xml ($) {
739     my $self = shift;
740     if ($self->{type} eq '#attribute') {
741     $self->attribute;
742     } else {
743     if ((0||$self->{option}->{indent}) && $self->{type} eq '#element') {
744     my $r = $self->start_tag;
745     my $c = $self->inner_xml;
746     if ($c) {
747     $c =~ s/\n/\n /g;
748     $r .= "\n " . $c . "\n";
749     }
750     $r .= $self->end_tag;
751     $r;
752     } else {
753 wakaba 1.2 my $r = $self->start_tag . $self->inner_xml . $self->end_tag;
754     $r .= "\n" if $self->{type} eq '#declaration';
755     $r;
756     #'{'.$self->{type}.': '.$r.'}'; # for debug
757 wakaba 1.1 }
758     }
759     }
760    
761     =item $tag = $x->inner_text
762    
763     Returns the text content of the node. (In many case the returned value is same
764     as WinIE DOM C<inner_text ()> function's or XPath C<text()> function's.
765     But some classes that inherits this module might implement to return other
766     value (eg. to return the value of the alt attribute of html:img element).
767    
768 wakaba 1.2 Available options: C<output_ref_as_is>.
769    
770 wakaba 1.1 =cut
771    
772 wakaba 1.2 sub inner_text ($;%) {
773 wakaba 1.1 my $self = shift;
774 wakaba 1.2 my %o = @_;
775 wakaba 1.1 my $r = $self->{value};
776 wakaba 1.2 if ($o{output_ref_as_is}) { ## output as if RCDATA
777     for (@{$self->{node}}) {
778     my $nt = $_->node_type;
779     if ($nt eq '#reference') {
780     $r .= $_->outer_xml;
781     } elsif ($nt ne '#attribute') {
782     $r .= map {s/&/&amp;/g; $_} $_->inner_text;
783     }
784     }
785     } else {
786     for (@{$self->{node}}) {
787     $r .= $_->inner_text unless $_->node_type eq '#attribute';
788     }
789 wakaba 1.1 }
790     $r;
791     }
792    
793     {no warnings; # prototype mismatch
794     *stringify = \&outer_xml;
795     }
796    
797     # $s = $x->_entitize ($s)
798     sub _entitize ($$) {
799     my ($self, $s) = (shift, shift);
800     $s =~ s/&/&amp;/g;
801     $s =~ s/</&lt;/g;
802     $s =~ s/>/&gt;/g;
803     $s =~ s/"/&quot;/g;
804     $s;
805     }
806    
807 wakaba 1.2 # 1/0 = $x->_check_name ($s)
808     sub _check_name ($$) {
809     my $self = shift;
810     my $s = shift;
811     return $Cache{name}->{$s} if defined $Cache{name}->{$s};
812     if ($s =~ /^\p{InXML_NameStartChar}/ && $s !~ /\P{InXMLNameChar}/) {
813     # \p{...}('*'/'+'/'{n,}') does not work...
814     $Cache{name}->{$s} = 1;
815     1;
816     } else {
817     $Cache{name}->{$s} = 0;
818     0;
819     }
820     }
821 wakaba 1.1 # 1/0 = $x->_check_ncname ($s)
822     sub _check_ncname ($$) {
823     my $self = shift;
824     my $s = shift;
825     return $Cache{ncname}->{$s} if defined $Cache{ncname}->{$s};
826     if ($s =~ /^\p{InXML_NCNameStartChar}/ && $s !~ /\P{InXMLNCNameChar}/) {
827     # \p{...}('*'/'+'/'{n,}') does not work...
828     $Cache{ncname}->{$s} = 1;
829     1;
830     } else {
831     $Cache{ncname}->{$s} = 0;
832     0;
833     }
834     }
835    
836     # 1/0 = $x->_check_namespace_prefix ($s)
837     sub _check_namespace_prefix ($$) {
838     my $self = shift;
839     my $s = shift;
840     return 0 unless defined $s;
841     return 1 if $s eq '';
842     substr ($s, -1, 1) = '' if substr ($s, -1, 1) eq ':';
843     $self->_check_ncname ($s);
844     }
845    
846 wakaba 1.2 ## TODO: cleaning $self->{node} before outputing, to ensure nodes not to have
847     ## multiple parents.
848     ## TODO: normalize namespace URI (removing non URI chars)
849    
850     sub flag ($$;$) {
851     my ($self, $name, $value) = @_;
852     if (defined $value) {
853     $self->{flag}->{$name} = $value;
854     }
855     $self->{flag}->{$name};
856     }
857    
858 wakaba 1.3 sub option ($$;$) {
859     my ($self, $name, $value) = @_;
860     if (defined $value) {
861     $self->{option}->{$name} = $value;
862     }
863     $self->{option}->{$name};
864     }
865    
866 wakaba 1.1 =back
867    
868 wakaba 1.2 =head1 NODE TYPES
869    
870     =over 4
871    
872     =item #attribute
873    
874     Attribute. Its XML representation takes the form of NAME="VALUE".
875    
876     =item #comment
877    
878     Comment declarement. <!-- -->
879    
880     =item #declarement
881    
882     SGML's declarements, such as SGML, DOCTYPE, ENTITY, etc.
883     <!SGML ...>, <!DOCTYPE root []>, <!ENTITY % name "value">
884    
885     =item #element
886    
887     Element. Its XML representation consists of start tag, content and end tag,
888     like <TYPE>content</TYPE>.
889    
890     =item #fragment
891    
892     Fragment of nodes. It's similar to DOM's fragment node.
893    
894     =item #pair
895    
896     A name-value pair.
897    
898     =item #pi
899    
900     Prosessing instruction. <?NAME VALUE?>
901    
902     =item #reference
903    
904     Character reference or general or parameter entity reference.
905     &#nnnn;, &#xhhhh;, &name;, %name;.
906    
907     =item #section
908    
909     Markup section. CDATA, INCLUDE and IGNORE are supported by XML.
910     <![%type;[...]]>
911    
912     =item #text
913    
914     Text.
915    
916     =item #xml
917    
918     Preformatted XML text.
919    
920     =cut
921    
922 wakaba 1.1 =head1 LICENSE
923    
924     Copyright 2003 Wakaba <w@suika.fam.cx>
925    
926     This program is free software; you can redistribute it and/or
927     modify it under the same terms as Perl itself.
928    
929     =cut
930    
931 wakaba 1.3 1; # $Date: 2003/04/29 10:35:53 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24