/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.185 by wakaba, Mon Sep 15 09:27:53 2008 UTC revision 1.195 by wakaba, Sat Oct 4 06:30:34 2008 UTC
# Line 66  sub TABLE_ROWS_EL () { Line 66  sub TABLE_ROWS_EL () {
66  }  }
67    
68  ## NOTE: Used in "generate implied end tags" algorithm.  ## NOTE: Used in "generate implied end tags" algorithm.
69  ## NOTE: There is a code where a modified version of END_TAG_OPTIONAL_EL  ## NOTE: There is a code where a modified version of
70  ## is used in "generate implied end tags" implementation (search for the  ## END_TAG_OPTIONAL_EL is used in "generate implied end tags"
71  ## function mae).  ## implementation (search for the algorithm name).
72  sub END_TAG_OPTIONAL_EL () {  sub END_TAG_OPTIONAL_EL () {
73    DD_EL |    DD_EL |
74    DT_EL |    DT_EL |
75    LI_EL |    LI_EL |
76      OPTION_EL |
77      OPTGROUP_EL |
78    P_EL |    P_EL |
79    RUBY_COMPONENT_EL    RUBY_COMPONENT_EL
80  }  }
# Line 141  my $el_category = { Line 143  my $el_category = {
143    address => ADDRESS_EL,    address => ADDRESS_EL,
144    applet => MISC_SCOPING_EL,    applet => MISC_SCOPING_EL,
145    area => MISC_SPECIAL_EL,    area => MISC_SPECIAL_EL,
146      article => MISC_SPECIAL_EL,
147      aside => MISC_SPECIAL_EL,
148    b => FORMATTING_EL,    b => FORMATTING_EL,
149    base => MISC_SPECIAL_EL,    base => MISC_SPECIAL_EL,
150    basefont => MISC_SPECIAL_EL,    basefont => MISC_SPECIAL_EL,
# Line 154  my $el_category = { Line 158  my $el_category = {
158    center => MISC_SPECIAL_EL,    center => MISC_SPECIAL_EL,
159    col => MISC_SPECIAL_EL,    col => MISC_SPECIAL_EL,
160    colgroup => MISC_SPECIAL_EL,    colgroup => MISC_SPECIAL_EL,
161      command => MISC_SPECIAL_EL,
162      datagrid => MISC_SPECIAL_EL,
163    dd => DD_EL,    dd => DD_EL,
164      details => MISC_SPECIAL_EL,
165      dialog => MISC_SPECIAL_EL,
166    dir => MISC_SPECIAL_EL,    dir => MISC_SPECIAL_EL,
167    div => DIV_EL,    div => DIV_EL,
168    dl => MISC_SPECIAL_EL,    dl => MISC_SPECIAL_EL,
169    dt => DT_EL,    dt => DT_EL,
170    em => FORMATTING_EL,    em => FORMATTING_EL,
171    embed => MISC_SPECIAL_EL,    embed => MISC_SPECIAL_EL,
172      eventsource => MISC_SPECIAL_EL,
173    fieldset => MISC_SPECIAL_EL,    fieldset => MISC_SPECIAL_EL,
174      figure => MISC_SPECIAL_EL,
175    font => FORMATTING_EL,    font => FORMATTING_EL,
176      footer => MISC_SPECIAL_EL,
177    form => FORM_EL,    form => FORM_EL,
178    frame => MISC_SPECIAL_EL,    frame => MISC_SPECIAL_EL,
179    frameset => FRAMESET_EL,    frameset => FRAMESET_EL,
# Line 173  my $el_category = { Line 184  my $el_category = {
184    h5 => HEADING_EL,    h5 => HEADING_EL,
185    h6 => HEADING_EL,    h6 => HEADING_EL,
186    head => MISC_SPECIAL_EL,    head => MISC_SPECIAL_EL,
187      header => MISC_SPECIAL_EL,
188    hr => MISC_SPECIAL_EL,    hr => MISC_SPECIAL_EL,
189    html => HTML_EL,    html => HTML_EL,
190    i => FORMATTING_EL,    i => FORMATTING_EL,
191    iframe => MISC_SPECIAL_EL,    iframe => MISC_SPECIAL_EL,
192    img => MISC_SPECIAL_EL,    img => MISC_SPECIAL_EL,
193      #image => MISC_SPECIAL_EL, ## NOTE: Commented out in the spec.
194    input => MISC_SPECIAL_EL,    input => MISC_SPECIAL_EL,
195    isindex => MISC_SPECIAL_EL,    isindex => MISC_SPECIAL_EL,
196    li => LI_EL,    li => LI_EL,
# Line 186  my $el_category = { Line 199  my $el_category = {
199    marquee => MISC_SCOPING_EL,    marquee => MISC_SCOPING_EL,
200    menu => MISC_SPECIAL_EL,    menu => MISC_SPECIAL_EL,
201    meta => MISC_SPECIAL_EL,    meta => MISC_SPECIAL_EL,
202      nav => MISC_SPECIAL_EL,
203    nobr => NOBR_EL | FORMATTING_EL,    nobr => NOBR_EL | FORMATTING_EL,
204    noembed => MISC_SPECIAL_EL,    noembed => MISC_SPECIAL_EL,
205    noframes => MISC_SPECIAL_EL,    noframes => MISC_SPECIAL_EL,
# Line 204  my $el_category = { Line 218  my $el_category = {
218    s => FORMATTING_EL,    s => FORMATTING_EL,
219    script => MISC_SPECIAL_EL,    script => MISC_SPECIAL_EL,
220    select => SELECT_EL,    select => SELECT_EL,
221      section => MISC_SPECIAL_EL,
222    small => FORMATTING_EL,    small => FORMATTING_EL,
223    spacer => MISC_SPECIAL_EL,    spacer => MISC_SPECIAL_EL,
224    strike => FORMATTING_EL,    strike => FORMATTING_EL,
# Line 323  my $foreign_attr_xname = { Line 338  my $foreign_attr_xname = {
338    
339  ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.  ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
340    
341  my $c1_entity_char = {  my $charref_map = {
342      0x0D => 0x000A,
343    0x80 => 0x20AC,    0x80 => 0x20AC,
344    0x81 => 0xFFFD,    0x81 => 0xFFFD,
345    0x82 => 0x201A,    0x82 => 0x201A,
# Line 356  my $c1_entity_char = { Line 372  my $c1_entity_char = {
372    0x9D => 0xFFFD,    0x9D => 0xFFFD,
373    0x9E => 0x017E,    0x9E => 0x017E,
374    0x9F => 0x0178,    0x9F => 0x0178,
375  }; # $c1_entity_char  }; # $charref_map
376    $charref_map->{$_} = 0xFFFD
377        for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
378            0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF
379            0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
380            0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
381            0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
382            0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
383            0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;
384    
385    ## TODO: Invoke the reset algorithm when a resettable element is
386    ## created (cf. HTML5 revision 2259).
387    
388  sub parse_byte_string ($$$$;$) {  sub parse_byte_string ($$$$;$) {
389    my $self = shift;    my $self = shift;
# Line 401  sub parse_byte_stream ($$$$;$$) { Line 428  sub parse_byte_stream ($$$$;$$) {
428            ## TODO: Is this ok?  Transfer protocol's parameter should be            ## TODO: Is this ok?  Transfer protocol's parameter should be
429            ## interpreted in its semantics?            ## interpreted in its semantics?
430    
       ## ISSUE: Unsupported encoding is not ignored according to the spec.  
431        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
432            ($byte_stream, allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
433             allow_fallback => 1);             allow_fallback => 1);
# Line 409  sub parse_byte_stream ($$$$;$$) { Line 435  sub parse_byte_stream ($$$$;$$) {
435          $self->{confident} = 1;          $self->{confident} = 1;
436          last SNIFFING;          last SNIFFING;
437        } else {        } else {
438          ## TODO: unsupported error          !!!parse-error (type => 'charset:not supported',
439                            layer => 'encode',
440                            line => 1, column => 1,
441                            value => $charset_name,
442                            level => $self->{level}->{uncertain});
443        }        }
444      }      }
445    
# Line 966  sub _initialize_tokenizer ($) { Line 996  sub _initialize_tokenizer ($) {
996  ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)  ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
997  ## (This requirement was dropped from HTML5 spec, unfortunately.)  ## (This requirement was dropped from HTML5 spec, unfortunately.)
998    
999    my $is_space = {
1000      0x0009 => 1, # CHARACTER TABULATION (HT)
1001      0x000A => 1, # LINE FEED (LF)
1002      #0x000B => 0, # LINE TABULATION (VT)
1003      0x000C => 1, # FORM FEED (FF)
1004      #0x000D => 1, # CARRIAGE RETURN (CR)
1005      0x0020 => 1, # SPACE (SP)
1006    };
1007    
1008  sub _get_next_token ($) {  sub _get_next_token ($) {
1009    my $self = shift;    my $self = shift;
1010    
# Line 1336  sub _get_next_token ($) { Line 1375  sub _get_next_token ($) {
1375            redo A;            redo A;
1376          }          }
1377        } else { # after "<{tag-name}"        } else { # after "<{tag-name}"
1378          unless ({          unless ($is_space->{$self->{nc}} or
1379                   0x0009 => 1, # HT                  {
                  0x000A => 1, # LF  
                  0x000B => 1, # VT  
                  0x000C => 1, # FF  
                  0x0020 => 1, # SP  
1380                   0x003E => 1, # >                   0x003E => 1, # >
1381                   0x002F => 1, # /                   0x002F => 1, # /
1382                   -1 => 1, # EOF                   -1 => 1, # EOF
# Line 1368  sub _get_next_token ($) { Line 1403  sub _get_next_token ($) {
1403          }          }
1404        }        }
1405      } elsif ($self->{state} == TAG_NAME_STATE) {      } elsif ($self->{state} == TAG_NAME_STATE) {
1406        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1407          !!!cp (34);          !!!cp (34);
1408          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1409          !!!next-input-character;          !!!next-input-character;
# Line 1444  sub _get_next_token ($) { Line 1475  sub _get_next_token ($) {
1475          redo A;          redo A;
1476        }        }
1477      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1478        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1479          !!!cp (45);          !!!cp (45);
1480          ## Stay in the state          ## Stay in the state
1481          !!!next-input-character;          !!!next-input-character;
# Line 1544  sub _get_next_token ($) { Line 1571  sub _get_next_token ($) {
1571          }          }
1572        }; # $before_leave        }; # $before_leave
1573    
1574        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1575          !!!cp (59);          !!!cp (59);
1576          $before_leave->();          $before_leave->();
1577          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
# Line 1631  sub _get_next_token ($) { Line 1654  sub _get_next_token ($) {
1654          redo A;          redo A;
1655        }        }
1656      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1657        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1658          !!!cp (71);          !!!cp (71);
1659          ## Stay in the state          ## Stay in the state
1660          !!!next-input-character;          !!!next-input-character;
# Line 1722  sub _get_next_token ($) { Line 1741  sub _get_next_token ($) {
1741          redo A;                  redo A;        
1742        }        }
1743      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1744        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP        
1745          !!!cp (83);          !!!cp (83);
1746          ## Stay in the state          ## Stay in the state
1747          !!!next-input-character;          !!!next-input-character;
# Line 1907  sub _get_next_token ($) { Line 1922  sub _get_next_token ($) {
1922          redo A;          redo A;
1923        }        }
1924      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1925        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # HT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1926          !!!cp (107);          !!!cp (107);
1927          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1928          !!!next-input-character;          !!!next-input-character;
# Line 1993  sub _get_next_token ($) { Line 2004  sub _get_next_token ($) {
2004          redo A;          redo A;
2005        }        }
2006      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
2007        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2008          !!!cp (118);          !!!cp (118);
2009          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
2010          !!!next-input-character;          !!!next-input-character;
# Line 2438  sub _get_next_token ($) { Line 2445  sub _get_next_token ($) {
2445          redo A;          redo A;
2446        }        }
2447      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
2448        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2449          !!!cp (155);          !!!cp (155);
2450          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2451          !!!next-input-character;          !!!next-input-character;
# Line 2455  sub _get_next_token ($) { Line 2458  sub _get_next_token ($) {
2458          redo A;          redo A;
2459        }        }
2460      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2461        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2462          !!!cp (157);          !!!cp (157);
2463          ## Stay in the state          ## Stay in the state
2464          !!!next-input-character;          !!!next-input-character;
# Line 2493  sub _get_next_token ($) { Line 2492  sub _get_next_token ($) {
2492        }        }
2493      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2494  ## ISSUE: Redundant "First," in the spec.  ## ISSUE: Redundant "First," in the spec.
2495        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2496          !!!cp (161);          !!!cp (161);
2497          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2498          !!!next-input-character;          !!!next-input-character;
# Line 2529  sub _get_next_token ($) { Line 2524  sub _get_next_token ($) {
2524          redo A;          redo A;
2525        }        }
2526      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2527        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2528          !!!cp (165);          !!!cp (165);
2529          ## Stay in the state          ## Stay in the state
2530          !!!next-input-character;          !!!next-input-character;
# Line 2656  sub _get_next_token ($) { Line 2647  sub _get_next_token ($) {
2647          redo A;          redo A;
2648        }        }
2649      } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2650        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2651          !!!cp (181);          !!!cp (181);
2652          ## Stay in the state          ## Stay in the state
2653          !!!next-input-character;          !!!next-input-character;
# Line 2786  sub _get_next_token ($) { Line 2774  sub _get_next_token ($) {
2774          redo A;          redo A;
2775        }        }
2776      } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2777        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2778          !!!cp (195);          !!!cp (195);
2779          ## Stay in the state          ## Stay in the state
2780          !!!next-input-character;          !!!next-input-character;
# Line 2835  sub _get_next_token ($) { Line 2820  sub _get_next_token ($) {
2820          redo A;          redo A;
2821        }        }
2822      } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2823        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2824          !!!cp (201);          !!!cp (201);
2825          ## Stay in the state          ## Stay in the state
2826          !!!next-input-character;          !!!next-input-character;
# Line 2964  sub _get_next_token ($) { Line 2946  sub _get_next_token ($) {
2946          redo A;          redo A;
2947        }        }
2948      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2949        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2950          !!!cp (215);          !!!cp (215);
2951          ## Stay in the state          ## Stay in the state
2952          !!!next-input-character;          !!!next-input-character;
# Line 3099  sub _get_next_token ($) { Line 3078  sub _get_next_token ($) {
3078          redo A;          redo A;
3079        }        }
3080      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
3081        if ({        if ($is_space->{$self->{nc}} or
3082          0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,            {
3083          0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
3084          $self->{entity_add} => 1,              $self->{entity_add} => 1,
3085        }->{$self->{nc}}) {            }->{$self->{nc}}) {
3086          !!!cp (1001);          !!!cp (1001);
3087          ## Don't consume          ## Don't consume
3088          ## No error          ## No error
# Line 3222  sub _get_next_token ($) { Line 3201  sub _get_next_token ($) {
3201        my $code = $self->{s_kwd};        my $code = $self->{s_kwd};
3202        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3203        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3204        if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {        if ($charref_map->{$code}) {
3205          !!!cp (1015);          !!!cp (1015);
3206          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3207                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
3208                          line => $l, column => $c);                          line => $l, column => $c);
3209          $code = 0xFFFD;          $code = $charref_map->{$code};
3210        } elsif ($code > 0x10FFFF) {        } elsif ($code > 0x10FFFF) {
3211          !!!cp (1016);          !!!cp (1016);
3212          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3213                          text => (sprintf 'U-%08X', $code),                          text => (sprintf 'U-%08X', $code),
3214                          line => $l, column => $c);                          line => $l, column => $c);
3215          $code = 0xFFFD;          $code = 0xFFFD;
       } elsif ($code == 0x000D) {  
         !!!cp (1017);  
         !!!parse-error (type => 'CR character reference',  
                         line => $l, column => $c);  
         $code = 0x000A;  
       } elsif (0x80 <= $code and $code <= 0x9F) {  
         !!!cp (1018);  
         !!!parse-error (type => 'C1 character reference',  
                         text => (sprintf 'U+%04X', $code),  
                         line => $l, column => $c);  
         $code = $c1_entity_char->{$code};  
3216        }        }
3217    
3218        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
# Line 3341  sub _get_next_token ($) { Line 3309  sub _get_next_token ($) {
3309        my $code = $self->{s_kwd};        my $code = $self->{s_kwd};
3310        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3311        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3312        if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {        if ($charref_map->{$code}) {
3313          !!!cp (1008);          !!!cp (1008);
3314          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3315                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
3316                          line => $l, column => $c);                          line => $l, column => $c);
3317          $code = 0xFFFD;          $code = $charref_map->{$code};
3318        } elsif ($code > 0x10FFFF) {        } elsif ($code > 0x10FFFF) {
3319          !!!cp (1009);          !!!cp (1009);
3320          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3321                          text => (sprintf 'U-%08X', $code),                          text => (sprintf 'U-%08X', $code),
3322                          line => $l, column => $c);                          line => $l, column => $c);
3323          $code = 0xFFFD;          $code = 0xFFFD;
       } elsif ($code == 0x000D) {  
         !!!cp (1010);  
         !!!parse-error (type => 'CR character reference', line => $l, column => $c);  
         $code = 0x000A;  
       } elsif (0x80 <= $code and $code <= 0x9F) {  
         !!!cp (1011);  
         !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);  
         $code = $c1_entity_char->{$code};  
3324        }        }
3325    
3326        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
# Line 3701  sub _tree_construction_initial ($) { Line 3661  sub _tree_construction_initial ($) {
3661        !!!ack-later;        !!!ack-later;
3662        return;        return;
3663      } elsif ($token->{type} == CHARACTER_TOKEN) {      } elsif ($token->{type} == CHARACTER_TOKEN) {
3664        if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3665          ## Ignore the token          ## Ignore the token
3666    
3667          unless (length $token->{data}) {          unless (length $token->{data}) {
# Line 3758  sub _tree_construction_root_element ($) Line 3718  sub _tree_construction_root_element ($)
3718          !!!next-token;          !!!next-token;
3719          redo B;          redo B;
3720        } elsif ($token->{type} == CHARACTER_TOKEN) {        } elsif ($token->{type} == CHARACTER_TOKEN) {
3721          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3722            ## Ignore the token.            ## Ignore the token.
3723    
3724            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 4572  sub _tree_construction_main ($) { Line 4532  sub _tree_construction_main ($) {
4532    
4533      if ($self->{insertion_mode} & HEAD_IMS) {      if ($self->{insertion_mode} & HEAD_IMS) {
4534        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
4535          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4536            unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {            unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4537              !!!cp ('t88.2');              !!!cp ('t88.2');
4538              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
# Line 4697  sub _tree_construction_main ($) { Line 4657  sub _tree_construction_main ($) {
4657                  !!!cp ('t101');                  !!!cp ('t101');
4658                }                }
4659                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4660                pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.                pop @{$self->{open_elements}};
4661                pop @{$self->{open_elements}} # <head>                pop @{$self->{open_elements}} # <head>
4662                    if $self->{insertion_mode} == AFTER_HEAD_IM;                    if $self->{insertion_mode} == AFTER_HEAD_IM;
4663                !!!nack ('t101.1');                !!!nack ('t101.1');
4664                !!!next-token;                !!!next-token;
4665                next B;                next B;
4666              } elsif ($token->{tag_name} eq 'link') {          } elsif ($token->{tag_name} eq 'link') {
4667                ## NOTE: There is a "as if in head" code clone.            ## NOTE: There is a "as if in head" code clone.
4668                if ($self->{insertion_mode} == AFTER_HEAD_IM) {            if ($self->{insertion_mode} == AFTER_HEAD_IM) {
4669                  !!!cp ('t102');              !!!cp ('t102');
4670                  !!!parse-error (type => 'after head',              !!!parse-error (type => 'after head',
4671                                  text => $token->{tag_name}, token => $token);                              text => $token->{tag_name}, token => $token);
4672                  push @{$self->{open_elements}},              push @{$self->{open_elements}},
4673                      [$self->{head_element}, $el_category->{head}];                  [$self->{head_element}, $el_category->{head}];
4674                } else {            } else {
4675                  !!!cp ('t103');              !!!cp ('t103');
4676                }            }
4677                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);            !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4678                pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.            pop @{$self->{open_elements}};
4679                pop @{$self->{open_elements}} # <head>            pop @{$self->{open_elements}} # <head>
4680                    if $self->{insertion_mode} == AFTER_HEAD_IM;                if $self->{insertion_mode} == AFTER_HEAD_IM;
4681                !!!ack ('t103.1');            !!!ack ('t103.1');
4682                !!!next-token;            !!!next-token;
4683                next B;            next B;
4684            } elsif ($token->{tag_name} eq 'command' or
4685                     $token->{tag_name} eq 'eventsource') {
4686              if ($self->{insertion_mode} == IN_HEAD_IM) {
4687                ## NOTE: If the insertion mode at the time of the emission
4688                ## of the token was "before head", $self->{insertion_mode}
4689                ## is already changed to |IN_HEAD_IM|.
4690    
4691                ## NOTE: There is a "as if in head" code clone.
4692                !!!insert-element ($token->{tag_name}, $token->{attributes}, $token);
4693                pop @{$self->{open_elements}};
4694                pop @{$self->{open_elements}} # <head>
4695                    if $self->{insertion_mode} == AFTER_HEAD_IM;
4696                !!!ack ('t103.2');
4697                !!!next-token;
4698                next B;
4699              } else {
4700                ## NOTE: "in head noscript" or "after head" insertion mode
4701                ## - in these cases, these tags are treated as same as
4702                ## normal in-body tags.
4703                !!!cp ('t103.3');
4704                #
4705              }
4706              } elsif ($token->{tag_name} eq 'meta') {              } elsif ($token->{tag_name} eq 'meta') {
4707                ## NOTE: There is a "as if in head" code clone.                ## NOTE: There is a "as if in head" code clone.
4708                if ($self->{insertion_mode} == AFTER_HEAD_IM) {                if ($self->{insertion_mode} == AFTER_HEAD_IM) {
# Line 4751  sub _tree_construction_main ($) { Line 4733  sub _tree_construction_main ($) {
4733                  } elsif ($token->{attributes}->{content}) {                  } elsif ($token->{attributes}->{content}) {
4734                    if ($token->{attributes}->{content}->{value}                    if ($token->{attributes}->{content}->{value}
4735                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4736                            [\x09-\x0D\x20]*=                            [\x09\x0A\x0C\x0D\x20]*=
4737                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                            [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4738                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {                            ([^"'\x09\x0A\x0C\x0D\x20]
4739                               [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
4740                      !!!cp ('t107');                      !!!cp ('t107');
4741                      ## NOTE: Whether the encoding is supported or not is handled                      ## NOTE: Whether the encoding is supported or not is handled
4742                      ## in the {change_encoding} callback.                      ## in the {change_encoding} callback.
# Line 5562  sub _tree_construction_main ($) { Line 5545  sub _tree_construction_main ($) {
5545      } elsif ($self->{insertion_mode} & TABLE_IMS) {      } elsif ($self->{insertion_mode} & TABLE_IMS) {
5546        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
5547          if (not $open_tables->[-1]->[1] and # tainted          if (not $open_tables->[-1]->[1] and # tainted
5548              $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {              $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
5549            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5550                                
5551            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 6246  sub _tree_construction_main ($) { Line 6229  sub _tree_construction_main ($) {
6229        }        }
6230      } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {      } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6231            if ($token->{type} == CHARACTER_TOKEN) {            if ($token->{type} == CHARACTER_TOKEN) {
6232              if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {              if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6233                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6234                unless (length $token->{data}) {                unless (length $token->{data}) {
6235                  !!!cp ('t260');                  !!!cp ('t260');
# Line 6587  sub _tree_construction_main ($) { Line 6570  sub _tree_construction_main ($) {
6570        }        }
6571      } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {      } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6572        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
6573          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6574            my $data = $1;            my $data = $1;
6575            ## As if in body            ## As if in body
6576            $reconstruct_active_formatting_elements->($insert_to_current);            $reconstruct_active_formatting_elements->($insert_to_current);
# Line 6604  sub _tree_construction_main ($) { Line 6587  sub _tree_construction_main ($) {
6587          if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {          if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6588            !!!cp ('t301');            !!!cp ('t301');
6589            !!!parse-error (type => 'after html:#text', token => $token);            !!!parse-error (type => 'after html:#text', token => $token);
6590              #
           ## Reprocess in the "after body" insertion mode.  
6591          } else {          } else {
6592            !!!cp ('t302');            !!!cp ('t302');
6593              ## "after body" insertion mode
6594              !!!parse-error (type => 'after body:#text', token => $token);
6595              #
6596          }          }
           
         ## "after body" insertion mode  
         !!!parse-error (type => 'after body:#text', token => $token);  
6597    
6598          $self->{insertion_mode} = IN_BODY_IM;          $self->{insertion_mode} = IN_BODY_IM;
6599          ## reprocess          ## reprocess
# Line 6621  sub _tree_construction_main ($) { Line 6603  sub _tree_construction_main ($) {
6603            !!!cp ('t303');            !!!cp ('t303');
6604            !!!parse-error (type => 'after html',            !!!parse-error (type => 'after html',
6605                            text => $token->{tag_name}, token => $token);                            text => $token->{tag_name}, token => $token);
6606                        #
           ## Reprocess in the "after body" insertion mode.  
6607          } else {          } else {
6608            !!!cp ('t304');            !!!cp ('t304');
6609              ## "after body" insertion mode
6610              !!!parse-error (type => 'after body',
6611                              text => $token->{tag_name}, token => $token);
6612              #
6613          }          }
6614    
         ## "after body" insertion mode  
         !!!parse-error (type => 'after body',  
                         text => $token->{tag_name}, token => $token);  
   
6615          $self->{insertion_mode} = IN_BODY_IM;          $self->{insertion_mode} = IN_BODY_IM;
6616          !!!ack-later;          !!!ack-later;
6617          ## reprocess          ## reprocess
# Line 6641  sub _tree_construction_main ($) { Line 6622  sub _tree_construction_main ($) {
6622            !!!parse-error (type => 'after html:/',            !!!parse-error (type => 'after html:/',
6623                            text => $token->{tag_name}, token => $token);                            text => $token->{tag_name}, token => $token);
6624                        
6625            $self->{insertion_mode} = AFTER_BODY_IM;            $self->{insertion_mode} = IN_BODY_IM;
6626            ## Reprocess in the "after body" insertion mode.            ## Reprocess.
6627              next B;
6628          } else {          } else {
6629            !!!cp ('t306');            !!!cp ('t306');
6630          }          }
# Line 6680  sub _tree_construction_main ($) { Line 6662  sub _tree_construction_main ($) {
6662        }        }
6663      } elsif ($self->{insertion_mode} & FRAME_IMS) {      } elsif ($self->{insertion_mode} & FRAME_IMS) {
6664        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
6665          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6666            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6667                        
6668            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 6690  sub _tree_construction_main ($) { Line 6672  sub _tree_construction_main ($) {
6672            }            }
6673          }          }
6674                    
6675          if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {          if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
6676            if ($self->{insertion_mode} == IN_FRAMESET_IM) {            if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6677              !!!cp ('t311');              !!!cp ('t311');
6678              !!!parse-error (type => 'in frameset:#text', token => $token);              !!!parse-error (type => 'in frameset:#text', token => $token);
# Line 6838  sub _tree_construction_main ($) { Line 6820  sub _tree_construction_main ($) {
6820          $parse_rcdata->(CDATA_CONTENT_MODEL);          $parse_rcdata->(CDATA_CONTENT_MODEL);
6821          next B;          next B;
6822        } elsif ({        } elsif ({
6823                  base => 1, link => 1,                  base => 1, command => 1, eventsource => 1, link => 1,
6824                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
6825          !!!cp ('t334');          !!!cp ('t334');
6826          ## NOTE: This is an "as if in head" code clone, only "-t" differs          ## NOTE: This is an "as if in head" code clone, only "-t" differs
6827          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6828          pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.          pop @{$self->{open_elements}};
6829          !!!ack ('t334.1');          !!!ack ('t334.1');
6830          !!!next-token;          !!!next-token;
6831          next B;          next B;
6832        } elsif ($token->{tag_name} eq 'meta') {        } elsif ($token->{tag_name} eq 'meta') {
6833          ## NOTE: This is an "as if in head" code clone, only "-t" differs          ## NOTE: This is an "as if in head" code clone, only "-t" differs
6834          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);          !!!insert-element-t ($token->{tag_name}, $token->{attributes}, $token);
6835          my $meta_el = pop @{$self->{open_elements}}; ## ISSUE: This step is missing in the spec.          my $meta_el = pop @{$self->{open_elements}};
6836    
6837          unless ($self->{confident}) {          unless ($self->{confident}) {
6838            if ($token->{attributes}->{charset}) {            if ($token->{attributes}->{charset}) {
# Line 6867  sub _tree_construction_main ($) { Line 6849  sub _tree_construction_main ($) {
6849            } elsif ($token->{attributes}->{content}) {            } elsif ($token->{attributes}->{content}) {
6850              if ($token->{attributes}->{content}->{value}              if ($token->{attributes}->{content}->{value}
6851                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6852                      [\x09-\x0D\x20]*=                      [\x09\x0A\x0C\x0D\x20]*=
6853                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                      [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6854                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {                      ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
6855                       /x) {
6856                !!!cp ('t336');                !!!cp ('t336');
6857                ## NOTE: Whether the encoding is supported or not is handled                ## NOTE: Whether the encoding is supported or not is handled
6858                ## in the {change_encoding} callback.                ## in the {change_encoding} callback.
# Line 6928  sub _tree_construction_main ($) { Line 6911  sub _tree_construction_main ($) {
6911          !!!next-token;          !!!next-token;
6912          next B;          next B;
6913        } elsif ({        } elsif ({
6914                  address => 1, blockquote => 1, center => 1, dir => 1,                  ## NOTE: Start tags for non-phrasing flow content elements
6915                  div => 1, dl => 1, fieldset => 1,  
6916                  h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,                  ## NOTE: The normal one
6917                  menu => 1, ol => 1, p => 1, ul => 1,                  address => 1, article => 1, aside => 1, blockquote => 1,
6918                    center => 1, datagrid => 1, details => 1, dialog => 1,
6919                    dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
6920                    footer => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1,
6921                    h6 => 1, header => 1, menu => 1, nav => 1, ol => 1, p => 1,
6922                    section => 1, ul => 1,
6923                    ## NOTE: As normal, but drops leading newline
6924                  pre => 1, listing => 1,                  pre => 1, listing => 1,
6925                    ## NOTE: As normal, but interacts with the form element pointer
6926                  form => 1,                  form => 1,
6927                    
6928                  table => 1,                  table => 1,
6929                  hr => 1,                  hr => 1,
6930                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
# Line 7000  sub _tree_construction_main ($) { Line 6991  sub _tree_construction_main ($) {
6991            !!!next-token;            !!!next-token;
6992          }          }
6993          next B;          next B;
6994        } elsif ({li => 1, dt => 1, dd => 1}->{$token->{tag_name}}) {        } elsif ({
6995                    ## NOTE: As normal, but imply </li> when there's another <li>
6996                    li => 1,
6997                    ## NOTE: As normal, but imply </dt> or </dd> when ...
6998                    dt => 1, dd => 1,
6999                   }->{$token->{tag_name}}) {
7000          ## has a p element in scope          ## has a p element in scope
7001          INSCOPE: for (reverse @{$self->{open_elements}}) {          INSCOPE: for (reverse @{$self->{open_elements}}) {
7002            if ($_->[1] & P_EL) {            if ($_->[1] & P_EL) {
# Line 7014  sub _tree_construction_main ($) { Line 7010  sub _tree_construction_main ($) {
7010              last INSCOPE;              last INSCOPE;
7011            }            }
7012          } # INSCOPE          } # INSCOPE
7013    
7014            ## NOTE: Special, Scope (<li><foo><li> == <li><foo><li/></foo></li>)
7015              ## Interpreted as <li><foo/></li><li/> (non-conforming)
7016              ## blockquote (O9.27), center (O), dd (Fx3, O, S3.1.2, IE7),
7017              ## dt (Fx, O, S, IE), dl (O), fieldset (O, S, IE), form (Fx, O, S),
7018              ## hn (O), pre (O), applet (O, S), button (O, S), marquee (Fx, O, S),
7019              ## object (Fx)
7020              ## Generate non-tree (non-conforming)
7021              ## basefont (IE7 (where basefont is non-void)), center (IE),
7022              ## form (IE), hn (IE)
7023            ## address, div, p (<li><foo><li> == <li><foo/></li><li/>)
7024              ## Interpreted as <li><foo><li/></foo></li> (non-conforming)
7025              ## div (Fx, S)
7026                        
7027          ## Step 1          ## Step 1
7028          my $i = -1;          my $i = -1;
# Line 7062  sub _tree_construction_main ($) { Line 7071  sub _tree_construction_main ($) {
7071          !!!next-token;          !!!next-token;
7072          next B;          next B;
7073        } elsif ($token->{tag_name} eq 'plaintext') {        } elsif ($token->{tag_name} eq 'plaintext') {
7074            ## NOTE: As normal, but effectively ends parsing
7075    
7076          ## has a p element in scope          ## has a p element in scope
7077          INSCOPE: for (reverse @{$self->{open_elements}}) {          INSCOPE: for (reverse @{$self->{open_elements}}) {
7078            if ($_->[1] & P_EL) {            if ($_->[1] & P_EL) {
# Line 7394  sub _tree_construction_main ($) { Line 7405  sub _tree_construction_main ($) {
7405            !!!nack ('t380.1');            !!!nack ('t380.1');
7406          } elsif ({          } elsif ({
7407                    b => 1, big => 1, em => 1, font => 1, i => 1,                    b => 1, big => 1, em => 1, font => 1, i => 1,
7408                    s => 1, small => 1, strile => 1,                    s => 1, small => 1, strike => 1,
7409                    strong => 1, tt => 1, u => 1,                    strong => 1, tt => 1, u => 1,
7410                   }->{$token->{tag_name}}) {                   }->{$token->{tag_name}}) {
7411            !!!cp ('t375');            !!!cp ('t375');
# Line 7498  sub _tree_construction_main ($) { Line 7509  sub _tree_construction_main ($) {
7509            next B;            next B;
7510          }          }
7511        } elsif ({        } elsif ({
7512                  address => 1, blockquote => 1, center => 1, dir => 1,                  ## NOTE: End tags for non-phrasing flow content elements
7513                  div => 1, dl => 1, fieldset => 1, listing => 1,  
7514                  menu => 1, ol => 1, pre => 1, ul => 1,                  ## NOTE: The normal ones
7515                    address => 1, article => 1, aside => 1, blockquote => 1,
7516                    center => 1, datagrid => 1, details => 1, dialog => 1,
7517                    dir => 1, div => 1, dl => 1, fieldset => 1, figure => 1,
7518                    footer => 1, header => 1, listing => 1, menu => 1, nav => 1,
7519                    ol => 1, pre => 1, section => 1, ul => 1,
7520    
7521                    ## NOTE: As normal, but ... optional tags
7522                  dd => 1, dt => 1, li => 1,                  dd => 1, dt => 1, li => 1,
7523    
7524                  applet => 1, button => 1, marquee => 1, object => 1,                  applet => 1, button => 1, marquee => 1, object => 1,
7525                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
7526          ## has an element in scope          ## has an element in scope
# Line 7530  sub _tree_construction_main ($) { Line 7549  sub _tree_construction_main ($) {
7549                    dd => ($token->{tag_name} ne 'dd'),                    dd => ($token->{tag_name} ne 'dd'),
7550                    dt => ($token->{tag_name} ne 'dt'),                    dt => ($token->{tag_name} ne 'dt'),
7551                    li => ($token->{tag_name} ne 'li'),                    li => ($token->{tag_name} ne 'li'),
7552                      option => 1,
7553                      optgroup => 1,
7554                    p => 1,                    p => 1,
7555                    rt => 1,                    rt => 1,
7556                    rp => 1,                    rp => 1,
# Line 7562  sub _tree_construction_main ($) { Line 7583  sub _tree_construction_main ($) {
7583          !!!next-token;          !!!next-token;
7584          next B;          next B;
7585        } elsif ($token->{tag_name} eq 'form') {        } elsif ($token->{tag_name} eq 'form') {
7586            ## NOTE: As normal, but interacts with the form element pointer
7587    
7588          undef $self->{form_element};          undef $self->{form_element};
7589    
7590          ## has an element in scope          ## has an element in scope
# Line 7609  sub _tree_construction_main ($) { Line 7632  sub _tree_construction_main ($) {
7632          !!!next-token;          !!!next-token;
7633          next B;          next B;
7634        } elsif ({        } elsif ({
7635                    ## NOTE: As normal, except acts as a closer for any ...
7636                  h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,                  h1 => 1, h2 => 1, h3 => 1, h4 => 1, h5 => 1, h6 => 1,
7637                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
7638          ## has an element in scope          ## has an element in scope
# Line 7654  sub _tree_construction_main ($) { Line 7678  sub _tree_construction_main ($) {
7678          !!!next-token;          !!!next-token;
7679          next B;          next B;
7680        } elsif ($token->{tag_name} eq 'p') {        } elsif ($token->{tag_name} eq 'p') {
7681            ## NOTE: As normal, except </p> implies <p> and ...
7682    
7683          ## has an element in scope          ## has an element in scope
7684          my $i;          my $i;
7685          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {          INSCOPE: for (reverse 0..$#{$self->{open_elements}}) {
# Line 7699  sub _tree_construction_main ($) { Line 7725  sub _tree_construction_main ($) {
7725        } elsif ({        } elsif ({
7726                  a => 1,                  a => 1,
7727                  b => 1, big => 1, em => 1, font => 1, i => 1,                  b => 1, big => 1, em => 1, font => 1, i => 1,
7728                  nobr => 1, s => 1, small => 1, strile => 1,                  nobr => 1, s => 1, small => 1, strike => 1,
7729                  strong => 1, tt => 1, u => 1,                  strong => 1, tt => 1, u => 1,
7730                 }->{$token->{tag_name}}) {                 }->{$token->{tag_name}}) {
7731          !!!cp ('t427');          !!!cp ('t427');
# Line 7738  sub _tree_construction_main ($) { Line 7764  sub _tree_construction_main ($) {
7764          ## Ignore the token          ## Ignore the token
7765          !!!next-token;          !!!next-token;
7766          next B;          next B;
           
         ## ISSUE: Issue on HTML5 new elements in spec  
           
7767        } else {        } else {
7768            if ($token->{tag_name} eq 'sarcasm') {
7769              sleep 0.001; # take a deep breath
7770            }
7771    
7772          ## Step 1          ## Step 1
7773          my $node_i = -1;          my $node_i = -1;
7774          my $node = $self->{open_elements}->[$node_i];          my $node = $self->{open_elements}->[$node_i];
# Line 7790  sub _tree_construction_main ($) { Line 7817  sub _tree_construction_main ($) {
7817                ## Ignore the token                ## Ignore the token
7818                !!!next-token;                !!!next-token;
7819                last S2;                last S2;
             }  
7820    
7821                  ## NOTE: |<span><dd></span>a|: In Safari 3.1.2 and Opera
7822                  ## 9.27, "a" is a child of <dd> (conforming).  In
7823                  ## Firefox 3.0.2, "a" is a child of <body>.  In WinIE 7,
7824                  ## "a" is a child of both <body> and <dd>.
7825                }
7826                
7827              !!!cp ('t434');              !!!cp ('t434');
7828            }            }
7829                        

Legend:
Removed from v.1.185  
changed lines
  Added in v.1.195

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24