140 |
$AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base}; |
$AttrChecker->{''}->{'xml:base'} = $AttrChecker->{$XML_NS}->{base}; |
141 |
$AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id}; |
$AttrChecker->{''}->{'xml:id'} = $AttrChecker->{$XML_NS}->{id}; |
142 |
|
|
143 |
## ANY |
our %AnyChecker = ( |
144 |
our $AnyChecker = sub { |
check_start => sub { }, |
145 |
my ($self, $todo) = @_; |
check_attrs => sub { |
146 |
my $el = $todo->{node}; |
my ($self, $item, $element_state) = @_; |
147 |
my $new_todos = []; |
for my $attr (@{$item->{node}->attributes}) { |
|
my @nodes = (@{$el->child_nodes}); |
|
|
while (@nodes) { |
|
|
my $node = shift @nodes; |
|
|
$self->_remove_minuses ($node) and next if ref $node eq 'HASH'; |
|
|
|
|
|
my $nt = $node->node_type; |
|
|
if ($nt == 1) { |
|
|
my $node_ns = $node->namespace_uri; |
|
|
$node_ns = '' unless defined $node_ns; |
|
|
my $node_ln = $node->manakai_local_name; |
|
|
if ($self->{minuses}->{$node_ns}->{$node_ln}) { |
|
|
$self->{onerror}->(node => $node, type => 'element not allowed'); |
|
|
} |
|
|
my ($sib, $ch) = $self->_check_get_children ($node, $todo); |
|
|
unshift @nodes, @$sib; |
|
|
push @$new_todos, @$ch; |
|
|
} elsif ($nt == 3 or $nt == 4) { |
|
|
if ($node->data =~ /[^\x09-\x0D\x20]/) { |
|
|
$todo->{flag}->{has_descendant}->{significant} = 1; |
|
|
} |
|
|
} elsif ($nt == 5) { |
|
|
unshift @nodes, @{$node->child_nodes}; |
|
|
} |
|
|
} |
|
|
return ($new_todos); |
|
|
}; # $AnyChecker |
|
|
|
|
|
our $ElementDefault = { |
|
|
checker => sub { |
|
|
my ($self, $todo) = @_; |
|
|
$self->{onerror}->(node => $todo->{node}, level => 'unsupported', |
|
|
type => 'element'); |
|
|
return $AnyChecker->($self, $todo); |
|
|
}, |
|
|
attrs_checker => sub { |
|
|
my ($self, $todo) = @_; |
|
|
for my $attr (@{$todo->{node}->attributes}) { |
|
148 |
my $attr_ns = $attr->namespace_uri; |
my $attr_ns = $attr->namespace_uri; |
149 |
$attr_ns = '' unless defined $attr_ns; |
$attr_ns = '' unless defined $attr_ns; |
150 |
my $attr_ln = $attr->manakai_local_name; |
my $attr_ln = $attr->manakai_local_name; |
151 |
my $checker = $AttrChecker->{$attr_ns}->{$attr_ln} |
my $checker = $AttrChecker->{$attr_ns}->{$attr_ln} |
152 |
|| $AttrChecker->{$attr_ns}->{''}; |
|| $AttrChecker->{$attr_ns}->{''}; |
153 |
if ($checker) { |
if ($checker) { |
154 |
$checker->($self, $attr); |
$checker->($self, $attr); |
155 |
} else { |
} else { |
158 |
} |
} |
159 |
} |
} |
160 |
}, |
}, |
161 |
|
check_child_element => sub { |
162 |
|
my ($self, $item, $child_el, $child_nsuri, $child_ln, |
163 |
|
$child_is_transparent, $element_state) = @_; |
164 |
|
if ($self->{minus_elements}->{$child_nsuri}->{$child_ln}) { |
165 |
|
$self->{onerror}->(node => $child_el, |
166 |
|
type => 'element not allowed:minus', |
167 |
|
level => $self->{must_level}); |
168 |
|
} elsif ($self->{plus_elements}->{$child_nsuri}->{$child_ln}) { |
169 |
|
# |
170 |
|
} else { |
171 |
|
# |
172 |
|
} |
173 |
|
}, |
174 |
|
check_child_text => sub { }, |
175 |
|
check_end => sub { |
176 |
|
my ($self, $item, $element_state) = @_; |
177 |
|
if ($element_state->{has_significant}) { |
178 |
|
$item->{parent_state}->{has_significant} = 1; |
179 |
|
} |
180 |
|
}, |
181 |
|
); |
182 |
|
|
183 |
|
our $ElementDefault = { |
184 |
|
%AnyChecker, |
185 |
|
check_start => sub { |
186 |
|
my ($self, $item, $element_state) = @_; |
187 |
|
$self->{onerror}->(node => $item->{node}, level => 'unsupported', |
188 |
|
type => 'element'); |
189 |
|
}, |
190 |
}; |
}; |
191 |
|
|
192 |
|
our $HTMLEmbeddedContent = { |
193 |
|
## NOTE: All embedded content is also phrasing content. |
194 |
|
$HTML_NS => { |
195 |
|
img => 1, iframe => 1, embed => 1, object => 1, video => 1, audio => 1, |
196 |
|
canvas => 1, |
197 |
|
}, |
198 |
|
## NOTE: MathML is mentioned in the HTML5 spec. |
199 |
|
q<http://www.w3.org/1998/Math/MathML> => {math => 1}, |
200 |
|
## NOTE: SVG is mentioned in the HTML5 spec. |
201 |
|
q<http://www.w3.org/2000/svg> => {svg => 1}, |
202 |
|
## NOTE: Foreign elements with content (but no metadata) are |
203 |
|
## embedded content. |
204 |
|
}; |
205 |
|
|
206 |
my $HTMLTransparentElements = { |
my $HTMLTransparentElements = { |
207 |
$HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/}, |
$HTML_NS => {qw/ins 1 del 1 font 1 noscript 1 canvas 1/}, |
208 |
## NOTE: |html:noscript| is transparent if scripting is disabled |
## NOTE: |html:noscript| is transparent if scripting is disabled |
345 |
$self->{map} = {}; |
$self->{map} = {}; |
346 |
$self->{menu} = {}; |
$self->{menu} = {}; |
347 |
$self->{has_link_type} = {}; |
$self->{has_link_type} = {}; |
348 |
|
$self->{flag} = {}; |
349 |
#$self->{has_uri_attr}; |
#$self->{has_uri_attr}; |
350 |
#$self->{has_hyperlink_element}; |
#$self->{has_hyperlink_element}; |
351 |
#$self->{has_charset}; |
#$self->{has_charset}; |
355 |
id => $self->{id}, table => [], term => $self->{term}, |
id => $self->{id}, table => [], term => $self->{term}, |
356 |
}; |
}; |
357 |
|
|
358 |
my @todo = ({type => 'element', node => $el}); |
my @item = ({type => 'element', node => $el, parent_state => {}}); |
359 |
while (@todo) { |
while (@item) { |
360 |
my $todo = shift @todo; |
my $item = shift @item; |
361 |
if ($todo->{type} eq 'element') { |
if (ref $item eq 'ARRAY') { |
362 |
my $prefix = $todo->{node}->prefix; |
my $code = shift @$item; |
363 |
if (defined $prefix and $prefix eq 'xmlns') { |
next unless $code;## TODO: temp. |
364 |
$self->{onerror} |
$code->(@$item); |
365 |
->(node => $todo->{node}, level => 'NC', |
} elsif ($item->{type} eq 'element') { |
366 |
type => 'Reserved Prefixes and Namespace Names:<xmlns:>'); |
my $el_nsuri = $item->{node}->namespace_uri; |
367 |
} |
$el_nsuri = '' unless defined $el_nsuri; |
368 |
my $nsuri = $todo->{node}->namespace_uri; |
my $el_ln = $item->{node}->manakai_local_name; |
369 |
$nsuri = '' unless defined $nsuri; |
|
370 |
unless ($Namespace->{$nsuri}->{loaded}) { |
unless ($Namespace->{$el_nsuri}->{loaded}) { |
371 |
if ($Namespace->{$nsuri}->{module}) { |
if ($Namespace->{$el_nsuri}->{module}) { |
372 |
eval qq{ require $Namespace->{$nsuri}->{module} } or die $@; |
eval qq{ require $Namespace->{$el_nsuri}->{module} } or die $@; |
|
} else { |
|
|
$Namespace->{$nsuri}->{loaded} = 1; |
|
|
} |
|
|
} |
|
|
my $ln = $todo->{node}->manakai_local_name; |
|
|
my $eldef = $Element->{$nsuri}->{$ln} || |
|
|
$Element->{$nsuri}->{''} || |
|
|
$ElementDefault; |
|
|
$eldef->{attrs_checker}->($self, $todo); |
|
|
my ($new_todos) = $eldef->{checker}->($self, $todo); |
|
|
unshift @todo, @$new_todos; |
|
|
} elsif ($todo->{type} eq 'element-attributes') { |
|
|
my $prefix = $todo->{node}->prefix; |
|
|
if (defined $prefix and $prefix eq 'xmlns') { |
|
|
$self->{onerror} |
|
|
->(node => $todo->{node}, level => 'NC', |
|
|
type => 'Reserved Prefixes and Namespace Names:<xmlns:>'); |
|
|
} |
|
|
my $nsuri = $todo->{node}->namespace_uri; |
|
|
$nsuri = '' unless defined $nsuri; |
|
|
unless ($Namespace->{$nsuri}->{loaded}) { |
|
|
if ($Namespace->{$nsuri}->{module}) { |
|
|
eval qq{ require $Namespace->{$nsuri}->{module} } or die $@; |
|
373 |
} else { |
} else { |
374 |
$Namespace->{$nsuri}->{loaded} = 1; |
$Namespace->{$el_nsuri}->{loaded} = 1; |
375 |
} |
} |
376 |
} |
} |
377 |
my $ln = $todo->{node}->manakai_local_name; |
my $eldef = $Element->{$el_nsuri}->{$el_ln} || |
378 |
my $eldef = $Element->{$nsuri}->{$ln} || |
$Element->{$el_nsuri}->{''} || |
|
$Element->{$nsuri}->{''} || |
|
379 |
$ElementDefault; |
$ElementDefault; |
380 |
$eldef->{attrs_checker}->($self, $todo); |
my $content_def = $item->{parent_def} || $eldef; |
381 |
} elsif ($todo->{type} eq 'descendant') { |
|
382 |
for my $key (keys %{$todo->{errors}}) { |
my $element_state = {}; |
383 |
unless ($todo->{flag}->{has_descendant}->{$key}) { |
my @new_item; |
384 |
$todo->{errors}->{$key}->($self, $todo); |
push @new_item, [$eldef->{check_start}, $self, $item, $element_state]; |
385 |
} |
push @new_item, [$eldef->{check_attrs}, $self, $item, $element_state]; |
386 |
for my $key (keys %{$todo->{old_values}}) { |
|
387 |
$todo->{flag}->{has_descendant}->{$key} |
my @child = @{$item->{node}->child_nodes}; |
388 |
||= $todo->{old_values}->{$key}; |
while (@child) { |
389 |
|
my $child = shift @child; |
390 |
|
my $child_nt = $child->node_type; |
391 |
|
if ($child_nt == 1) { # ELEMENT_NODE |
392 |
|
my $child_nsuri = $child->namespace_uri; |
393 |
|
$child_nsuri = '' unless defined $child_nsuri; |
394 |
|
my $child_ln = $child->manakai_local_name; |
395 |
|
if ($HTMLTransparentElements->{$child_nsuri}->{$child_ln} and |
396 |
|
not (($self->{flag}->{in_head} or |
397 |
|
($el_nsuri eq q<http://www.w3.org/1999/xhtml> and |
398 |
|
$el_ln eq 'head')) and |
399 |
|
$child_nsuri eq q<http://www.w3.org/1999/xhtml> and |
400 |
|
$child_ln eq 'noscript')) { |
401 |
|
push @new_item, [$content_def->{check_child_element}, |
402 |
|
$self, $item, $child, |
403 |
|
$child_nsuri, $child_ln, 1, $element_state]; |
404 |
|
push @new_item, {type => 'element', node => $child, |
405 |
|
parent_state => $element_state, |
406 |
|
parent_def => $item->{parent_def} || $eldef, |
407 |
|
transparent => 1}; |
408 |
|
} else { |
409 |
|
push @new_item, [$content_def->{check_child_element}, |
410 |
|
$self, $item, $child, |
411 |
|
$child_nsuri, $child_ln, 0, $element_state]; |
412 |
|
push @new_item, {type => 'element', node => $child, |
413 |
|
parent_state => $element_state}; |
414 |
|
} |
415 |
|
|
416 |
|
if ($HTMLEmbeddedContent->{$child_nsuri}->{$child_ln}) { |
417 |
|
$element_state->{has_significant} = 1; |
418 |
|
} |
419 |
|
} elsif ($child_nt == 3 or # TEXT_NODE |
420 |
|
$child_nt == 4) { # CDATA_SECTION_NODE |
421 |
|
my $has_significant = ($child->data =~ /[^\x09-\x0D\x20]/); |
422 |
|
push @new_item, [$content_def->{check_child_text}, |
423 |
|
$self, $item, $child, $has_significant, |
424 |
|
$element_state]; |
425 |
|
$element_state->{has_significant} ||= $has_significant; |
426 |
|
} elsif ($child_nt == 5) { # ENTITY_REFERENCE_NODE |
427 |
|
push @child, @{$child->child_nodes}; |
428 |
} |
} |
429 |
|
## TODO: PI_NODE |
430 |
|
## TODO: Unknown node type |
431 |
} |
} |
432 |
} elsif ($todo->{type} eq 'plus' or $todo->{type} eq 'minus') { |
|
433 |
$self->_remove_minuses ($todo); |
push @new_item, [$eldef->{check_end}, $self, $item, $element_state]; |
434 |
} elsif ($todo->{type} eq 'code') { |
|
435 |
$todo->{code}->(); |
unshift @item, @new_item; |
436 |
} else { |
} else { |
437 |
die "$0: Internal error: Unsupported checking action type |$todo->{type}|"; |
die "$0: Internal error: Unsupported checking action type |$item->{type}|"; |
438 |
} |
} |
439 |
} |
} |
440 |
|
|
459 |
return $self->{return}; |
return $self->{return}; |
460 |
} # check_element |
} # check_element |
461 |
|
|
462 |
|
sub _add_minus_elements ($$@) { |
463 |
|
my $self = shift; |
464 |
|
my $element_state = shift; |
465 |
|
for my $elements (@_) { |
466 |
|
for my $nsuri (keys %$elements) { |
467 |
|
for my $ln (keys %{$elements->{$nsuri}}) { |
468 |
|
unless ($self->{minus_elements}->{$nsuri}->{$ln}) { |
469 |
|
$element_state->{minus_elements_original}->{$nsuri}->{$ln} = 0; |
470 |
|
$self->{minus_elements}->{$nsuri}->{$ln} = 1; |
471 |
|
} |
472 |
|
} |
473 |
|
} |
474 |
|
} |
475 |
|
} # _add_minus_elements |
476 |
|
|
477 |
|
sub _remove_minus_elements ($$) { |
478 |
|
my $self = shift; |
479 |
|
my $element_state = shift; |
480 |
|
for my $nsuri (keys %{$element_state->{minus_elements_original}}) { |
481 |
|
for my $ln (keys %{$element_state->{minus_elements_original}->{$nsuri}}) { |
482 |
|
delete $self->{minus_elements}->{$nsuri}->{$ln}; |
483 |
|
} |
484 |
|
} |
485 |
|
} # _remove_minus_elements |
486 |
|
|
487 |
|
sub _add_plus_elements ($$@) { |
488 |
|
my $self = shift; |
489 |
|
my $element_state = shift; |
490 |
|
for my $elements (@_) { |
491 |
|
for my $nsuri (keys %$elements) { |
492 |
|
for my $ln (keys %{$elements->{$nsuri}}) { |
493 |
|
unless ($self->{plus_elements}->{$nsuri}->{$ln}) { |
494 |
|
$element_state->{plus_elements_original}->{$nsuri}->{$ln} = 0; |
495 |
|
$self->{plus_elements}->{$nsuri}->{$ln} = 1; |
496 |
|
} |
497 |
|
} |
498 |
|
} |
499 |
|
} |
500 |
|
} # _add_plus_elements |
501 |
|
|
502 |
|
sub _remove_plus_elements ($$) { |
503 |
|
my $self = shift; |
504 |
|
my $element_state = shift; |
505 |
|
for my $nsuri (keys %{$element_state->{plus_elements_original}}) { |
506 |
|
for my $ln (keys %{$element_state->{plus_elements_original}->{$nsuri}}) { |
507 |
|
delete $self->{plus_elements}->{$nsuri}->{$ln}; |
508 |
|
} |
509 |
|
} |
510 |
|
} # _remove_plus_elements |
511 |
|
|
512 |
sub _add_minuses ($@) { |
sub _add_minuses ($@) { |
513 |
my $self = shift; |
my $self = shift; |
514 |
my $r = {}; |
my $r = {}; |