8 |
## doc.write (''); |
## doc.write (''); |
9 |
## alert (doc.compatMode); |
## alert (doc.compatMode); |
10 |
|
|
|
## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT |
|
|
## strip BOM and the HTML layer MUST ignore it. Whether we can do it |
|
|
## is not yet clear. |
|
|
## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters? |
|
|
## "{U+FEFF}..." in GB18030? |
|
|
|
|
11 |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
12 |
## TODO: 1252 parse error (revision 1264) |
## TODO: 1252 parse error (revision 1264) |
13 |
## TODO: 8859-11 = 874 (revision 1271) |
## TODO: 8859-11 = 874 (revision 1271) |
18 |
meta => 1, |
meta => 1, |
19 |
hr => 1, |
hr => 1, |
20 |
br => 1, |
br => 1, |
21 |
img=> 1, |
img => 1, |
22 |
embed => 1, |
embed => 1, |
23 |
param => 1, |
param => 1, |
24 |
area => 1, |
area => 1, |
153 |
return $return; |
return $return; |
154 |
} # parse_byte_string |
} # parse_byte_string |
155 |
|
|
156 |
|
## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM |
157 |
|
## and the HTML layer MUST ignore it. However, we does strip BOM in |
158 |
|
## the encoding layer and the HTML layer does not ignore any U+FEFF, |
159 |
|
## because the core part of our HTML parser expects a string of character, |
160 |
|
## not a string of bytes or code units or anything which might contain a BOM. |
161 |
|
## Therefore, any parser interface that accepts a string of bytes, |
162 |
|
## such as |parse_byte_string| in this module, must ensure that it does |
163 |
|
## strip the BOM and never strip any ZWNBSP. |
164 |
|
|
165 |
*parse_char_string = \&parse_string; |
*parse_char_string = \&parse_string; |
166 |
|
|
167 |
sub parse_string ($$$;$) { |
sub parse_string ($$$;$) { |
5467 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
5468 |
|
|
5469 |
## Step 2 |
## Step 2 |
5470 |
my $node_ln = $node->local_name; |
my $node_ln = $node->manakai_local_name; |
5471 |
$p->{content_model} = { |
$p->{content_model} = { |
5472 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
5473 |
textarea => RCDATA_CONTENT_MODEL, |
textarea => RCDATA_CONTENT_MODEL, |
5507 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
5508 |
my $nsuri = $anode->namespace_uri; |
my $nsuri = $anode->namespace_uri; |
5509 |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
5510 |
if ($anode->local_name eq 'form') { ## TODO: case? |
if ($anode->manakai_local_name eq 'form') { |
5511 |
$p->{form_element} = $anode; |
$p->{form_element} = $anode; |
5512 |
last AN; |
last AN; |
5513 |
} |
} |