1 |
package Whatpm::XMLSerializer; |
2 |
use strict; |
3 |
|
4 |
sub get_outer_xml ($$;$) { |
5 |
my $r = ''; |
6 |
my @src = ($_[1]); |
7 |
my $onerror = $_[2] || sub { }; |
8 |
my $nsbind = [{'' => '', xml => q<http://www.w3.org/XML/1998/namespace>, |
9 |
xmlns => q<http://suika.fam.cx/~wakaba/-temp/2003/09/27/undef>}]; |
10 |
my $xescape = sub ($) { |
11 |
my $s = shift; |
12 |
$s =~ s/&/&/g; |
13 |
$s =~ s/</</g; |
14 |
$s =~ s/>/>/g; |
15 |
$s =~ s/"/"/g; |
16 |
return $s; |
17 |
}; |
18 |
while (defined (my $src = shift @src)) { |
19 |
if (ref $src eq 'ARRAY') { |
20 |
pop @$nsbind; ## End tag |
21 |
} elsif (ref $src) { |
22 |
my $srcnt = $src->node_type; |
23 |
if ($srcnt == 1) { # ELEMENT_NODE |
24 |
my @csrc; |
25 |
my $etag; |
26 |
push @$nsbind, my $ns = {%{$nsbind->[-1]}}; |
27 |
my %attrr; |
28 |
|
29 |
my @attrs = @{$src->attributes}; |
30 |
my @nsattrs; |
31 |
my @gattrs; |
32 |
my @lattrs; |
33 |
|
34 |
for my $attr (@attrs) { |
35 |
my $nsuri = $attr->namespace_uri; |
36 |
if (not defined $nsuri) { |
37 |
push @lattrs, $attr; |
38 |
} elsif ($nsuri eq q<http://www.w3.org/2000/xmlns/>) { |
39 |
push @nsattrs, $attr; |
40 |
} else { |
41 |
push @gattrs, $attr; |
42 |
} |
43 |
} |
44 |
|
45 |
## Implied namespace prefixes |
46 |
my $etns = $src->namespace_uri; |
47 |
my $etpfx = $src->prefix; |
48 |
if (defined $etns and defined $etpfx and |
49 |
not (defined $ns->{$etpfx} and $ns->{$etpfx} eq $etns)) { |
50 |
$ns->{$etpfx} = $etns; |
51 |
$attrr{'xmlns:'.$etpfx} = [$xescape->($etns)]; |
52 |
} |
53 |
|
54 |
for my $attr (@gattrs) { |
55 |
my $atns = $attr->namespace_uri; |
56 |
my $atpfx = $attr->prefix; |
57 |
if (defined $atpfx and |
58 |
not (defined $ns->{$atpfx} and $ns->{$atpfx} eq $atns)) { |
59 |
$ns->{$atpfx} = $atns; |
60 |
$attrr{'xmlns:'.$atpfx} = [$xescape->($atns)]; |
61 |
} |
62 |
} |
63 |
|
64 |
## Namespace attributes |
65 |
XA: for my $attr (@nsattrs) { |
66 |
my $attrval = $attr->value; |
67 |
my $lname = $attr->local_name; |
68 |
if ($lname eq 'xmlns') { |
69 |
$ns->{''} = $attrval; |
70 |
$attrr{xmlns} = [@{$attr->child_nodes}]; |
71 |
} else { |
72 |
if (length $attrval) { |
73 |
$ns->{$lname} = $attrval; |
74 |
} else { |
75 |
$ns->{$lname} = q<http://suika.fam.cx/~wakaba/-temp/2003/09/27/undef>; |
76 |
} |
77 |
$attrr{'xmlns:'.$lname} = [@{$attr->child_nodes}]; |
78 |
} |
79 |
} # XA |
80 |
|
81 |
## Per-element partition attributes |
82 |
for my $attr (@lattrs) { |
83 |
$attrr{$attr->local_name} = [@{$attr->child_nodes}]; |
84 |
} |
85 |
|
86 |
## Global partition attributes |
87 |
my $dns = $ns->{''}; |
88 |
delete $ns->{''}; |
89 |
my $nsrev = {reverse %$ns}; |
90 |
$ns->{''} = $dns; |
91 |
delete $nsrev->{q<http://suika.fam.cx/~wakaba/-temp/2003/09/27/undef>}; # for security reason |
92 |
for my $attr (@gattrs) { |
93 |
my $atns = $attr->namespace_uri; |
94 |
my $atpfx = $attr->prefix; |
95 |
if (not defined $atpfx or |
96 |
$ns->{$atpfx} ne $atns) { |
97 |
if (defined $nsrev->{$atns}) { |
98 |
$atpfx = $nsrev->{$atns}; |
99 |
} else { |
100 |
## Prefix is not registered |
101 |
my @uritxt = grep {/\A[A-Za-z][A-Za-z0-9_.-]*\z/} |
102 |
split /\W+/, $atns; |
103 |
P: { |
104 |
for my $pfx (reverse @uritxt) { |
105 |
if (not defined $ns->{$pfx}) { |
106 |
$atpfx = $pfx; |
107 |
$ns->{$pfx} = $atns; |
108 |
$nsrev->{$atns} = $atpfx; |
109 |
$attrr{'xmlns:'.$atpfx} = [$xescape->($atns)]; |
110 |
last P; |
111 |
} |
112 |
} |
113 |
|
114 |
my $i = 1; |
115 |
$i++ while exists $ns->{'ns'.$i}; |
116 |
$atpfx = 'ns'.$i; |
117 |
$ns->{$atpfx} = $atns; |
118 |
$nsrev->{$atns} = $atpfx; |
119 |
$attrr{'xmlns:ns'.$i} = [$xescape->($atns)]; |
120 |
} # P |
121 |
} |
122 |
} |
123 |
|
124 |
$attrr{$atpfx.':'.$attr->local_name} = [@{$attr->child_nodes}]; |
125 |
} |
126 |
|
127 |
## Element type name |
128 |
if (defined $etns) { |
129 |
if (not defined $etpfx or |
130 |
(defined $ns->{$etpfx} and $ns->{$etpfx} ne $etns)) { |
131 |
if ($ns->{''} eq $etns) { |
132 |
$etpfx = undef; |
133 |
} else { |
134 |
$etpfx = $nsrev->{$etns}; |
135 |
unless (defined $etpfx) { |
136 |
## Prefix is not registered |
137 |
my @uritxt = grep {/\A[A-Za-z][A-Za-z0-9_.-]*\z/} |
138 |
split /\W+/, $etns; |
139 |
P: { |
140 |
for my $pfx (reverse @uritxt) { |
141 |
if (not defined $ns->{$pfx}) { |
142 |
$etpfx = $pfx; |
143 |
$ns->{$pfx} = $etns; |
144 |
$nsrev->{$etns} = $etpfx; |
145 |
$attrr{'xmlns:'.$etpfx} = [$xescape->($etns)]; |
146 |
last P; |
147 |
} |
148 |
} |
149 |
|
150 |
my $i = 1; |
151 |
$i++ while exists $ns->{'ns'.$i}; |
152 |
$etpfx = 'ns'.$i; |
153 |
$ns->{$etpfx} = $etns; |
154 |
$nsrev->{$etns} = $etpfx; |
155 |
$attrr{'xmlns:ns'.$i} = [$xescape->($etns)]; |
156 |
} # P |
157 |
} |
158 |
} |
159 |
} |
160 |
} else { |
161 |
if ($ns->{''} ne '') { |
162 |
$ns->{''} = ''; |
163 |
$attrr{xmlns} = ['']; |
164 |
} |
165 |
} |
166 |
|
167 |
$r .= '<'; |
168 |
$etag = '</'; |
169 |
if (defined $etpfx and defined $etns) { |
170 |
$r .= $etpfx . ':'; |
171 |
$etag .= $etpfx . ':'; |
172 |
} |
173 |
my $etln = $src->local_name; |
174 |
$r .= $etln; |
175 |
$etag .= $etln . '>'; |
176 |
|
177 |
## Attribute specifications |
178 |
for my $an (sort keys %attrr) { |
179 |
push @csrc, ' ' . $an . '="', @{$attrr{$an}}, '"'; |
180 |
} |
181 |
|
182 |
## Children |
183 |
push @csrc, '>', @{$src->child_nodes}, $etag, []; |
184 |
unshift @src, @csrc; |
185 |
} elsif ($srcnt == 3) { # TEXT_NODE |
186 |
$r .= $xescape->($src->node_value); |
187 |
} elsif ($srcnt == 4) { # CDATA_SECTION_NODE |
188 |
my $text = $src->node_value; |
189 |
$text =~ s/]]>/]]]]>><![CDATA[/g; |
190 |
$r .= '<![CDATA[' . $text . ']]>'; |
191 |
} elsif ($srcnt == 5) { # ENTITY_REFERENCE_NODE |
192 |
if ($src->manakai_expanded) { |
193 |
push @src, @{$src->child_nodes}; |
194 |
} else { |
195 |
$r .= '&' . $src->node_name . ';'; |
196 |
} |
197 |
} elsif ($srcnt == 7) { # PROCESSING_INSTRUCTION_NODE |
198 |
$r .= '<?' . $src->node_name; |
199 |
my $data = $src->node_value; |
200 |
if (length $data) { |
201 |
$data =~ s/\?>/?>/g; |
202 |
$r .= ' ' . $data; |
203 |
} |
204 |
$r .= '?>'; |
205 |
} elsif ($srcnt == 8) { # COMMENT_NODE |
206 |
my $data = $src->node_value; |
207 |
$data =~ s/--/- - /g; |
208 |
$r .= '<!--' . $data . '-->'; |
209 |
} elsif ($srcnt == 9) { # DOCUMENT_NODE |
210 |
unshift @src, map {$_, "\x0A"} @{$src->child_nodes}; |
211 |
## ISSUE: |cfg:strict-document-children| cparam |
212 |
} |
213 |
# document type, entity, notation, etdef, atdef, df |
214 |
} else { |
215 |
$r .= $src; |
216 |
} |
217 |
} |
218 |
|
219 |
return \$r; |
220 |
} # get_outer_xml |
221 |
|
222 |
1; |
223 |
## $Date: 2007/06/30 13:12:33 $ |