/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Serializer.pm
Suika

Contents of /markup/html/whatpm/Whatpm/HTML/Serializer.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Sat Mar 1 00:42:53 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +6 -3 lines
++ whatpm/t/ChangeLog	1 Mar 2008 00:26:59 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* tokenizer-test-1.test: Updated (HTML5 recision 1286).

	* content-model-2.dat: Updated (HTML5 revision 1275).

++ whatpm/Whatpm/ChangeLog	1 Mar 2008 00:19:36 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* _NamedEntityList.pm: Updated (HTML5 revision 1286).

	* HTML.pm.src: |charset| in |content| attribute is
	case-insensitive (HTML5 revision 1270).

++ whatpm/Whatpm/HTML/ChangeLog	1 Mar 2008 00:07:44 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* Serializer.pm (get_inner_html): Escape NBSP (HTML5 revision
	1277).

++ whatpm/Whatpm/ContentChecker/ChangeLog	29 Feb 2008 23:29:54 -0000
2008-03-01  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: Sectioning root category added.  |blockquote|
	is no longer a sectioning content.

1 wakaba 1.1 package Whatpm::HTML::Serializer;
2     use strict;
3     our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4    
5     sub get_inner_html ($$$) {
6     my (undef, $node, $on_error) = @_;
7    
8     ## Step 1
9     my $s = '';
10    
11     my $in_cdata;
12     my $parent = $node;
13     while (defined $parent) {
14     if ($parent->node_type == 1 and
15     $parent->namespace_uri eq 'http://www.w3.org/1999/xhtml' and
16     {
17     style => 1, script => 1, xmp => 1, iframe => 1,
18     noembed => 1, noframes => 1, noscript => 1,
19     }->{$parent->local_name}) { ## TODO: case thingy
20     $in_cdata = 1;
21     }
22     $parent = $parent->parent_node;
23     }
24    
25     ## Step 2
26     my @node = @{$node->child_nodes};
27     C: while (@node) {
28     my $child = shift @node;
29     unless (ref $child) {
30     if ($child eq 'cdata-out') {
31     $in_cdata = 0;
32     } else {
33     $s .= $child; # end tag
34     }
35     next C;
36     }
37    
38     my $nt = $child->node_type;
39     if ($nt == 1) { # Element
40     my $tag_name = $child->tag_name; ## TODO: manakai_tag_name
41     $s .= '<' . $tag_name;
42     ## NOTE: Non-HTML case:
43     ## <http://permalink.gmane.org/gmane.org.w3c.whatwg.discuss/11191>
44    
45     my @attrs = @{$child->attributes}; # sort order MUST be stable
46     for my $attr (@attrs) { # order is implementation dependent
47     my $attr_name = $attr->name; ## TODO: manakai_name
48     $s .= ' ' . $attr_name . '="';
49     my $attr_value = $attr->value;
50     ## escape
51     $attr_value =~ s/&/&amp;/g;
52     $attr_value =~ s/</&lt;/g;
53     $attr_value =~ s/>/&gt;/g;
54     $attr_value =~ s/"/&quot;/g;
55 wakaba 1.2 $attr_value =~ s/\xA0/&nbsp;/g;
56 wakaba 1.1 $s .= $attr_value . '"';
57     }
58     $s .= '>';
59    
60     next C if {
61     area => 1, base => 1, basefont => 1, bgsound => 1,
62     br => 1, col => 1, embed => 1, frame => 1, hr => 1,
63     img => 1, input => 1, link => 1, meta => 1, param => 1,
64     spacer => 1, wbr => 1,
65     }->{$tag_name};
66    
67     $s .= "\x0A" if $tag_name eq 'pre' or $tag_name eq 'textarea';
68    
69     if (not $in_cdata and {
70     style => 1, script => 1, xmp => 1, iframe => 1,
71     noembed => 1, noframes => 1, noscript => 1,
72     plaintext => 1,
73     }->{$tag_name}) {
74     unshift @node, 'cdata-out';
75     $in_cdata = 1;
76     }
77    
78     unshift @node, @{$child->child_nodes}, '</' . $tag_name . '>';
79     } elsif ($nt == 3 or $nt == 4) {
80     if ($in_cdata) {
81     $s .= $child->data;
82     } else {
83     my $value = $child->data;
84     $value =~ s/&/&amp;/g;
85     $value =~ s/</&lt;/g;
86     $value =~ s/>/&gt;/g;
87     $value =~ s/"/&quot;/g;
88 wakaba 1.2 $value =~ s/\xA0/&nbsp;/g;
89 wakaba 1.1 $s .= $value;
90     }
91     } elsif ($nt == 8) {
92     $s .= '<!--' . $child->data . '-->';
93     } elsif ($nt == 10) {
94     $s .= '<!DOCTYPE ' . $child->name . '>';
95     } elsif ($nt == 5) { # entrefs
96     push @node, @{$child->child_nodes};
97 wakaba 1.2 } elsif ($nt == 7) { # PIs
98     $s .= '<?' . $child->target . ' ' . $target->data . '>';
99 wakaba 1.1 } else {
100     $on_error->($child) if defined $on_error;
101     }
102     } # C
103    
104     ## Step 3
105     return \$s;
106     } # get_inner_html
107    
108     =head1 LICENSE
109    
110 wakaba 1.2 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
111 wakaba 1.1
112     This library is free software; you can redistribute it
113     and/or modify it under the same terms as Perl itself.
114    
115     =cut
116    
117     1;
118 wakaba 1.2 ## $Date: 2007/11/11 04:59:36 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24