1 |
package Whatpm::ContentChecker; |
package Whatpm::ContentChecker; |
2 |
use strict; |
use strict; |
3 |
|
|
4 |
|
## ISSUE: How XML and XML Namespaces conformance can (or cannot) |
5 |
|
## be applied to an in-memory representation (i.e. DOM)? |
6 |
|
|
7 |
my $XML_NS = q<http://www.w3.org/XML/1998/namespace>; |
my $XML_NS = q<http://www.w3.org/XML/1998/namespace>; |
8 |
my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>; |
my $XMLNS_NS = q<http://www.w3.org/2000/xmlns/>; |
9 |
|
|
10 |
my $AttrChecker = { |
my $AttrChecker = { |
11 |
$XML_NS => { |
$XML_NS => { |
12 |
## TODO: xml:space, xml:base, xml:lang, xml:id |
space => sub { |
13 |
## TODO: xml:lang MUST NOT in HTML document |
my ($self, $attr) = @_; |
14 |
|
my $value = $attr->value; |
15 |
|
if ($value eq 'default' or $value eq 'preserve') { |
16 |
|
# |
17 |
|
} else { |
18 |
|
## NOTE: An XML "error" |
19 |
|
$self->{onerror}->(node => $attr, |
20 |
|
type => 'XML error:invalid xml:space value'); |
21 |
|
} |
22 |
|
}, |
23 |
|
lang => sub { |
24 |
|
## NOTE: "The values of the attribute are language identifiers |
25 |
|
## as defined by [IETF RFC 3066], Tags for the Identification |
26 |
|
## of Languages, or its successor; in addition, the empty string |
27 |
|
## may be specified." ("may" in lower case) |
28 |
|
## TODO: xml:lang MUST NOT in HTML document |
29 |
|
}, |
30 |
|
base => sub { |
31 |
|
my ($self, $attr) = @_; |
32 |
|
my $value = $attr->value; |
33 |
|
if ($value =~ /[^\x{0000}-\x{10FFFF}]/) { ## ISSUE: Should we disallow noncharacters? |
34 |
|
$self->{onerror}->(node => $attr, |
35 |
|
type => 'syntax error'); |
36 |
|
} |
37 |
|
## NOTE: Conformance to URI standard is not checked. |
38 |
|
}, |
39 |
|
id => sub { |
40 |
|
my ($self, $attr) = @_; |
41 |
|
my $value = $attr->value; |
42 |
|
$value =~ s/[\x09\x0A\x0D\x20]+/ /g; |
43 |
|
$value =~ s/^\x20//; |
44 |
|
$value =~ s/\x20$//; |
45 |
|
## TODO: NCName in XML 1.0 or 1.1 |
46 |
|
## TODO: declared type is ID? |
47 |
|
if ($self->{id}->{$value}) { |
48 |
|
$self->{onerror}->(node => $attr, type => 'xml:id error:duplicate ID'); |
49 |
|
} else { |
50 |
|
$self->{id}->{$value} = 1; |
51 |
|
} |
52 |
|
}, |
53 |
}, |
}, |
54 |
$XMLNS_NS => { |
$XMLNS_NS => { |
55 |
'' => sub {}, ## TODO: implement |
'' => sub { |
56 |
xmlns => sub {}, ## TODO: implement |
my ($self, $attr) = @_; |
57 |
|
my $ln = $attr->manakai_local_name; |
58 |
|
my $value = $attr->value; |
59 |
|
if ($value eq $XML_NS and $ln ne 'xml') { |
60 |
|
$self->{onerror} |
61 |
|
->(node => $attr, |
62 |
|
type => 'NC:Reserved Prefixes and Namespace Names:=xml'); |
63 |
|
} elsif ($value eq $XMLNS_NS) { |
64 |
|
$self->{onerror} |
65 |
|
->(node => $attr, |
66 |
|
type => 'NC:Reserved Prefixes and Namespace Names:=xmlns'); |
67 |
|
} |
68 |
|
if ($ln eq 'xml' and $value ne $XML_NS) { |
69 |
|
$self->{onerror} |
70 |
|
->(node => $attr, |
71 |
|
type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xml='); |
72 |
|
} elsif ($ln eq 'xmlns') { |
73 |
|
$self->{onerror} |
74 |
|
->(node => $attr, |
75 |
|
type => 'NC:Reserved Prefixes and Namespace Names:xmlns:xmlns='); |
76 |
|
} |
77 |
|
## TODO: If XML 1.0 and empty |
78 |
|
}, |
79 |
|
xmlns => sub { |
80 |
|
my ($self, $attr) = @_; |
81 |
|
## TODO: In XML 1.0, URI reference [RFC 3986] or an empty string |
82 |
|
## TODO: In XML 1.1, IRI reference [RFC 3987] or an empty string |
83 |
|
my $value = $attr->value; |
84 |
|
if ($value eq $XML_NS) { |
85 |
|
$self->{onerror} |
86 |
|
->(node => $attr, |
87 |
|
type => 'NC:Reserved Prefixes and Namespace Names:=xml'); |
88 |
|
} elsif ($value eq $XMLNS_NS) { |
89 |
|
$self->{onerror} |
90 |
|
->(node => $attr, |
91 |
|
type => 'NC:Reserved Prefixes and Namespace Names:=xmlns'); |
92 |
|
} |
93 |
|
}, |
94 |
}, |
}, |
95 |
}; |
}; |
96 |
|
|
97 |
|
$AttrChecker->{''}->{'xml:space'} = $AttrChecker->{$XML_NS}->{space}; |
98 |
|
$AttrChecker->{''}->{'xml:lang'} = $AttrChecker->{$XML_NS}->{lang}; |
99 |
|
$AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base}; |
100 |
|
$AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id}; |
101 |
|
|
102 |
## ANY |
## ANY |
103 |
my $AnyChecker = sub { |
my $AnyChecker = sub { |
104 |
my ($self, $todo) = @_; |
my ($self, $todo) = @_; |
2153 |
while (@todo) { |
while (@todo) { |
2154 |
my $todo = shift @todo; |
my $todo = shift @todo; |
2155 |
if ($todo->{type} eq 'element') { |
if ($todo->{type} eq 'element') { |
2156 |
|
my $prefix = $todo->{node}->prefix; |
2157 |
|
if (defined $prefix and $prefix eq 'xmlns') { |
2158 |
|
$self->{onerror} |
2159 |
|
->(node => $todo->{node}, |
2160 |
|
type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>'); |
2161 |
|
} |
2162 |
my $nsuri = $todo->{node}->namespace_uri; |
my $nsuri = $todo->{node}->namespace_uri; |
2163 |
$nsuri = '' unless defined $nsuri; |
$nsuri = '' unless defined $nsuri; |
2164 |
my $ln = $todo->{node}->manakai_local_name; |
my $ln = $todo->{node}->manakai_local_name; |
2169 |
my ($new_todos) = $eldef->{checker}->($self, $todo); |
my ($new_todos) = $eldef->{checker}->($self, $todo); |
2170 |
push @todo, @$new_todos; |
push @todo, @$new_todos; |
2171 |
} elsif ($todo->{type} eq 'element-attributes') { |
} elsif ($todo->{type} eq 'element-attributes') { |
2172 |
|
my $prefix = $todo->{node}->prefix; |
2173 |
|
if (defined $prefix and $prefix eq 'xmlns') { |
2174 |
|
$self->{onerror} |
2175 |
|
->(node => $todo->{node}, |
2176 |
|
type => 'NC:Reserved Prefixes and Namespace Names:<xmlns:>'); |
2177 |
|
} |
2178 |
my $nsuri = $todo->{node}->namespace_uri; |
my $nsuri = $todo->{node}->namespace_uri; |
2179 |
$nsuri = '' unless defined $nsuri; |
$nsuri = '' unless defined $nsuri; |
2180 |
my $ln = $todo->{node}->manakai_local_name; |
my $ln = $todo->{node}->manakai_local_name; |