| 8 |
## doc.write (''); |
## doc.write (''); |
| 9 |
## alert (doc.compatMode); |
## alert (doc.compatMode); |
| 10 |
|
|
|
## ISSUE: HTML5 revision 967 says that the encoding layer MUST NOT |
|
|
## strip BOM and the HTML layer MUST ignore it. Whether we can do it |
|
|
## is not yet clear. |
|
|
## "{U+FEFF}..." in UTF-16BE/UTF-16LE is three or four characters? |
|
|
## "{U+FEFF}..." in GB18030? |
|
|
|
|
| 11 |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
## TODO: Control charcters and noncharacters are not allowed (HTML5 revision 1263) |
| 12 |
## TODO: 1252 parse error (revision 1264) |
## TODO: 1252 parse error (revision 1264) |
| 13 |
## TODO: 8859-11 = 874 (revision 1271) |
## TODO: 8859-11 = 874 (revision 1271) |
| 18 |
meta => 1, |
meta => 1, |
| 19 |
hr => 1, |
hr => 1, |
| 20 |
br => 1, |
br => 1, |
| 21 |
img=> 1, |
img => 1, |
| 22 |
embed => 1, |
embed => 1, |
| 23 |
param => 1, |
param => 1, |
| 24 |
area => 1, |
area => 1, |
| 153 |
return $return; |
return $return; |
| 154 |
} # parse_byte_string |
} # parse_byte_string |
| 155 |
|
|
| 156 |
|
## NOTE: HTML5 spec says that the encoding layer MUST NOT strip BOM |
| 157 |
|
## and the HTML layer MUST ignore it. However, we does strip BOM in |
| 158 |
|
## the encoding layer and the HTML layer does not ignore any U+FEFF, |
| 159 |
|
## because the core part of our HTML parser expects a string of character, |
| 160 |
|
## not a string of bytes or code units or anything which might contain a BOM. |
| 161 |
|
## Therefore, any parser interface that accepts a string of bytes, |
| 162 |
|
## such as |parse_byte_string| in this module, must ensure that it does |
| 163 |
|
## strip the BOM and never strip any ZWNBSP. |
| 164 |
|
|
| 165 |
*parse_char_string = \&parse_string; |
*parse_char_string = \&parse_string; |
| 166 |
|
|
| 167 |
sub parse_string ($$$;$) { |
sub parse_string ($$$;$) { |
| 5467 |
$p->_initialize_tree_constructor; |
$p->_initialize_tree_constructor; |
| 5468 |
|
|
| 5469 |
## Step 2 |
## Step 2 |
| 5470 |
my $node_ln = $node->local_name; |
my $node_ln = $node->manakai_local_name; |
| 5471 |
$p->{content_model} = { |
$p->{content_model} = { |
| 5472 |
title => RCDATA_CONTENT_MODEL, |
title => RCDATA_CONTENT_MODEL, |
| 5473 |
textarea => RCDATA_CONTENT_MODEL, |
textarea => RCDATA_CONTENT_MODEL, |
| 5507 |
if ($anode->node_type == 1) { |
if ($anode->node_type == 1) { |
| 5508 |
my $nsuri = $anode->namespace_uri; |
my $nsuri = $anode->namespace_uri; |
| 5509 |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
if (defined $nsuri and $nsuri eq 'http://www.w3.org/1999/xhtml') { |
| 5510 |
if ($anode->local_name eq 'form') { ## TODO: case? |
if ($anode->manakai_local_name eq 'form') { |
| 5511 |
$p->{form_element} = $anode; |
$p->{form_element} = $anode; |
| 5512 |
last AN; |
last AN; |
| 5513 |
} |
} |