/[suikacvs]/markup/html/whatpm/Whatpm/HTML.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.185 by wakaba, Mon Sep 15 09:27:53 2008 UTC revision 1.191 by wakaba, Mon Sep 22 06:04:29 2008 UTC
# Line 323  my $foreign_attr_xname = { Line 323  my $foreign_attr_xname = {
323    
324  ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.  ## ISSUE: xmlns:xlink="non-xlink-ns" is not an error.
325    
326  my $c1_entity_char = {  my $charref_map = {
327      0x0D => 0x000A,
328    0x80 => 0x20AC,    0x80 => 0x20AC,
329    0x81 => 0xFFFD,    0x81 => 0xFFFD,
330    0x82 => 0x201A,    0x82 => 0x201A,
# Line 356  my $c1_entity_char = { Line 357  my $c1_entity_char = {
357    0x9D => 0xFFFD,    0x9D => 0xFFFD,
358    0x9E => 0x017E,    0x9E => 0x017E,
359    0x9F => 0x0178,    0x9F => 0x0178,
360  }; # $c1_entity_char  }; # $charref_map
361    $charref_map->{$_} = 0xFFFD
362        for 0x0000..0x0008, 0x000B, 0x000E..0x001F, 0x007F,
363            0xD800..0xDFFF, 0xFDD0..0xFDDF, ## ISSUE: 0xFDEF
364            0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
365            0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
366            0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
367            0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE,
368            0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF;
369    
370  sub parse_byte_string ($$$$;$) {  sub parse_byte_string ($$$$;$) {
371    my $self = shift;    my $self = shift;
# Line 401  sub parse_byte_stream ($$$$;$$) { Line 410  sub parse_byte_stream ($$$$;$$) {
410            ## TODO: Is this ok?  Transfer protocol's parameter should be            ## TODO: Is this ok?  Transfer protocol's parameter should be
411            ## interpreted in its semantics?            ## interpreted in its semantics?
412    
       ## ISSUE: Unsupported encoding is not ignored according to the spec.  
413        ($char_stream, $e_status) = $charset->get_decode_handle        ($char_stream, $e_status) = $charset->get_decode_handle
414            ($byte_stream, allow_error_reporting => 1,            ($byte_stream, allow_error_reporting => 1,
415             allow_fallback => 1);             allow_fallback => 1);
# Line 409  sub parse_byte_stream ($$$$;$$) { Line 417  sub parse_byte_stream ($$$$;$$) {
417          $self->{confident} = 1;          $self->{confident} = 1;
418          last SNIFFING;          last SNIFFING;
419        } else {        } else {
420          ## TODO: unsupported error          !!!parse-error (type => 'charset:not supported',
421                            layer => 'encode',
422                            line => 1, column => 1,
423                            value => $charset_name,
424                            level => $self->{level}->{uncertain});
425        }        }
426      }      }
427    
# Line 966  sub _initialize_tokenizer ($) { Line 978  sub _initialize_tokenizer ($) {
978  ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)  ## TODO: Polytheistic slash SHOULD NOT be used. (Applied only to atheists.)
979  ## (This requirement was dropped from HTML5 spec, unfortunately.)  ## (This requirement was dropped from HTML5 spec, unfortunately.)
980    
981    my $is_space = {
982      0x0009 => 1, # CHARACTER TABULATION (HT)
983      0x000A => 1, # LINE FEED (LF)
984      #0x000B => 0, # LINE TABULATION (VT)
985      0x000C => 1, # FORM FEED (FF)
986      #0x000D => 1, # CARRIAGE RETURN (CR)
987      0x0020 => 1, # SPACE (SP)
988    };
989    
990  sub _get_next_token ($) {  sub _get_next_token ($) {
991    my $self = shift;    my $self = shift;
992    
# Line 1336  sub _get_next_token ($) { Line 1357  sub _get_next_token ($) {
1357            redo A;            redo A;
1358          }          }
1359        } else { # after "<{tag-name}"        } else { # after "<{tag-name}"
1360          unless ({          unless ($is_space->{$self->{nc}} or
1361                   0x0009 => 1, # HT                  {
                  0x000A => 1, # LF  
                  0x000B => 1, # VT  
                  0x000C => 1, # FF  
                  0x0020 => 1, # SP  
1362                   0x003E => 1, # >                   0x003E => 1, # >
1363                   0x002F => 1, # /                   0x002F => 1, # /
1364                   -1 => 1, # EOF                   -1 => 1, # EOF
# Line 1368  sub _get_next_token ($) { Line 1385  sub _get_next_token ($) {
1385          }          }
1386        }        }
1387      } elsif ($self->{state} == TAG_NAME_STATE) {      } elsif ($self->{state} == TAG_NAME_STATE) {
1388        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1389          !!!cp (34);          !!!cp (34);
1390          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1391          !!!next-input-character;          !!!next-input-character;
# Line 1444  sub _get_next_token ($) { Line 1457  sub _get_next_token ($) {
1457          redo A;          redo A;
1458        }        }
1459      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_NAME_STATE) {
1460        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1461          !!!cp (45);          !!!cp (45);
1462          ## Stay in the state          ## Stay in the state
1463          !!!next-input-character;          !!!next-input-character;
# Line 1544  sub _get_next_token ($) { Line 1553  sub _get_next_token ($) {
1553          }          }
1554        }; # $before_leave        }; # $before_leave
1555    
1556        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1557          !!!cp (59);          !!!cp (59);
1558          $before_leave->();          $before_leave->();
1559          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;          $self->{state} = AFTER_ATTRIBUTE_NAME_STATE;
# Line 1631  sub _get_next_token ($) { Line 1636  sub _get_next_token ($) {
1636          redo A;          redo A;
1637        }        }
1638      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_NAME_STATE) {
1639        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1640          !!!cp (71);          !!!cp (71);
1641          ## Stay in the state          ## Stay in the state
1642          !!!next-input-character;          !!!next-input-character;
# Line 1722  sub _get_next_token ($) { Line 1723  sub _get_next_token ($) {
1723          redo A;                  redo A;        
1724        }        }
1725      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {      } elsif ($self->{state} == BEFORE_ATTRIBUTE_VALUE_STATE) {
1726        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP        
1727          !!!cp (83);          !!!cp (83);
1728          ## Stay in the state          ## Stay in the state
1729          !!!next-input-character;          !!!next-input-character;
# Line 1907  sub _get_next_token ($) { Line 1904  sub _get_next_token ($) {
1904          redo A;          redo A;
1905        }        }
1906      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {      } elsif ($self->{state} == ATTRIBUTE_VALUE_UNQUOTED_STATE) {
1907        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # HT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1908          !!!cp (107);          !!!cp (107);
1909          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1910          !!!next-input-character;          !!!next-input-character;
# Line 1993  sub _get_next_token ($) { Line 1986  sub _get_next_token ($) {
1986          redo A;          redo A;
1987        }        }
1988      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {      } elsif ($self->{state} == AFTER_ATTRIBUTE_VALUE_QUOTED_STATE) {
1989        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
1990          !!!cp (118);          !!!cp (118);
1991          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;          $self->{state} = BEFORE_ATTRIBUTE_NAME_STATE;
1992          !!!next-input-character;          !!!next-input-character;
# Line 2438  sub _get_next_token ($) { Line 2427  sub _get_next_token ($) {
2427          redo A;          redo A;
2428        }        }
2429      } elsif ($self->{state} == DOCTYPE_STATE) {      } elsif ($self->{state} == DOCTYPE_STATE) {
2430        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2431          !!!cp (155);          !!!cp (155);
2432          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;          $self->{state} = BEFORE_DOCTYPE_NAME_STATE;
2433          !!!next-input-character;          !!!next-input-character;
# Line 2455  sub _get_next_token ($) { Line 2440  sub _get_next_token ($) {
2440          redo A;          redo A;
2441        }        }
2442      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_NAME_STATE) {
2443        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2444          !!!cp (157);          !!!cp (157);
2445          ## Stay in the state          ## Stay in the state
2446          !!!next-input-character;          !!!next-input-character;
# Line 2493  sub _get_next_token ($) { Line 2474  sub _get_next_token ($) {
2474        }        }
2475      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == DOCTYPE_NAME_STATE) {
2476  ## ISSUE: Redundant "First," in the spec.  ## ISSUE: Redundant "First," in the spec.
2477        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2478          !!!cp (161);          !!!cp (161);
2479          $self->{state} = AFTER_DOCTYPE_NAME_STATE;          $self->{state} = AFTER_DOCTYPE_NAME_STATE;
2480          !!!next-input-character;          !!!next-input-character;
# Line 2529  sub _get_next_token ($) { Line 2506  sub _get_next_token ($) {
2506          redo A;          redo A;
2507        }        }
2508      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_NAME_STATE) {
2509        if ($self->{nc} == 0x0009 or # HT        if ($is_space->{$self->{nc}}) {
           $self->{nc} == 0x000A or # LF  
           $self->{nc} == 0x000B or # VT  
           $self->{nc} == 0x000C or # FF  
           $self->{nc} == 0x0020) { # SP  
2510          !!!cp (165);          !!!cp (165);
2511          ## Stay in the state          ## Stay in the state
2512          !!!next-input-character;          !!!next-input-character;
# Line 2656  sub _get_next_token ($) { Line 2629  sub _get_next_token ($) {
2629          redo A;          redo A;
2630        }        }
2631      } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2632        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2633          !!!cp (181);          !!!cp (181);
2634          ## Stay in the state          ## Stay in the state
2635          !!!next-input-character;          !!!next-input-character;
# Line 2786  sub _get_next_token ($) { Line 2756  sub _get_next_token ($) {
2756          redo A;          redo A;
2757        }        }
2758      } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_PUBLIC_IDENTIFIER_STATE) {
2759        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2760          !!!cp (195);          !!!cp (195);
2761          ## Stay in the state          ## Stay in the state
2762          !!!next-input-character;          !!!next-input-character;
# Line 2835  sub _get_next_token ($) { Line 2802  sub _get_next_token ($) {
2802          redo A;          redo A;
2803        }        }
2804      } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == BEFORE_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2805        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2806          !!!cp (201);          !!!cp (201);
2807          ## Stay in the state          ## Stay in the state
2808          !!!next-input-character;          !!!next-input-character;
# Line 2964  sub _get_next_token ($) { Line 2928  sub _get_next_token ($) {
2928          redo A;          redo A;
2929        }        }
2930      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {      } elsif ($self->{state} == AFTER_DOCTYPE_SYSTEM_IDENTIFIER_STATE) {
2931        if ({        if ($is_space->{$self->{nc}}) {
             0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, 0x0020 => 1,  
             #0x000D => 1, # HT, LF, VT, FF, SP, CR  
           }->{$self->{nc}}) {  
2932          !!!cp (215);          !!!cp (215);
2933          ## Stay in the state          ## Stay in the state
2934          !!!next-input-character;          !!!next-input-character;
# Line 3099  sub _get_next_token ($) { Line 3060  sub _get_next_token ($) {
3060          redo A;          redo A;
3061        }        }
3062      } elsif ($self->{state} == ENTITY_STATE) {      } elsif ($self->{state} == ENTITY_STATE) {
3063        if ({        if ($is_space->{$self->{nc}} or
3064          0x0009 => 1, 0x000A => 1, 0x000B => 1, 0x000C => 1, # HT, LF, VT, FF,            {
3065          0x0020 => 1, 0x003C => 1, 0x0026 => 1, -1 => 1, # SP, <, &              0x003C => 1, 0x0026 => 1, -1 => 1, # <, &
3066          $self->{entity_add} => 1,              $self->{entity_add} => 1,
3067        }->{$self->{nc}}) {            }->{$self->{nc}}) {
3068          !!!cp (1001);          !!!cp (1001);
3069          ## Don't consume          ## Don't consume
3070          ## No error          ## No error
# Line 3222  sub _get_next_token ($) { Line 3183  sub _get_next_token ($) {
3183        my $code = $self->{s_kwd};        my $code = $self->{s_kwd};
3184        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3185        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3186        if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {        if ($charref_map->{$code}) {
3187          !!!cp (1015);          !!!cp (1015);
3188          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3189                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
3190                          line => $l, column => $c);                          line => $l, column => $c);
3191          $code = 0xFFFD;          $code = $charref_map->{$code};
3192        } elsif ($code > 0x10FFFF) {        } elsif ($code > 0x10FFFF) {
3193          !!!cp (1016);          !!!cp (1016);
3194          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3195                          text => (sprintf 'U-%08X', $code),                          text => (sprintf 'U-%08X', $code),
3196                          line => $l, column => $c);                          line => $l, column => $c);
3197          $code = 0xFFFD;          $code = 0xFFFD;
       } elsif ($code == 0x000D) {  
         !!!cp (1017);  
         !!!parse-error (type => 'CR character reference',  
                         line => $l, column => $c);  
         $code = 0x000A;  
       } elsif (0x80 <= $code and $code <= 0x9F) {  
         !!!cp (1018);  
         !!!parse-error (type => 'C1 character reference',  
                         text => (sprintf 'U+%04X', $code),  
                         line => $l, column => $c);  
         $code = $c1_entity_char->{$code};  
3198        }        }
3199    
3200        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
# Line 3341  sub _get_next_token ($) { Line 3291  sub _get_next_token ($) {
3291        my $code = $self->{s_kwd};        my $code = $self->{s_kwd};
3292        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3293        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3294        if ($code == 0 or (0xD800 <= $code and $code <= 0xDFFF)) {        if ($charref_map->{$code}) {
3295          !!!cp (1008);          !!!cp (1008);
3296          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3297                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
3298                          line => $l, column => $c);                          line => $l, column => $c);
3299          $code = 0xFFFD;          $code = $charref_map->{$code};
3300        } elsif ($code > 0x10FFFF) {        } elsif ($code > 0x10FFFF) {
3301          !!!cp (1009);          !!!cp (1009);
3302          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3303                          text => (sprintf 'U-%08X', $code),                          text => (sprintf 'U-%08X', $code),
3304                          line => $l, column => $c);                          line => $l, column => $c);
3305          $code = 0xFFFD;          $code = 0xFFFD;
       } elsif ($code == 0x000D) {  
         !!!cp (1010);  
         !!!parse-error (type => 'CR character reference', line => $l, column => $c);  
         $code = 0x000A;  
       } elsif (0x80 <= $code and $code <= 0x9F) {  
         !!!cp (1011);  
         !!!parse-error (type => 'C1 character reference', text => (sprintf 'U+%04X', $code), line => $l, column => $c);  
         $code = $c1_entity_char->{$code};  
3306        }        }
3307    
3308        if ($self->{prev_state} == DATA_STATE) {        if ($self->{prev_state} == DATA_STATE) {
# Line 3701  sub _tree_construction_initial ($) { Line 3643  sub _tree_construction_initial ($) {
3643        !!!ack-later;        !!!ack-later;
3644        return;        return;
3645      } elsif ($token->{type} == CHARACTER_TOKEN) {      } elsif ($token->{type} == CHARACTER_TOKEN) {
3646        if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D        if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3647          ## Ignore the token          ## Ignore the token
3648    
3649          unless (length $token->{data}) {          unless (length $token->{data}) {
# Line 3758  sub _tree_construction_root_element ($) Line 3700  sub _tree_construction_root_element ($)
3700          !!!next-token;          !!!next-token;
3701          redo B;          redo B;
3702        } elsif ($token->{type} == CHARACTER_TOKEN) {        } elsif ($token->{type} == CHARACTER_TOKEN) {
3703          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) { # \x0D          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
3704            ## Ignore the token.            ## Ignore the token.
3705    
3706            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 4572  sub _tree_construction_main ($) { Line 4514  sub _tree_construction_main ($) {
4514    
4515      if ($self->{insertion_mode} & HEAD_IMS) {      if ($self->{insertion_mode} & HEAD_IMS) {
4516        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
4517          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
4518            unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {            unless ($self->{insertion_mode} == BEFORE_HEAD_IM) {
4519              !!!cp ('t88.2');              !!!cp ('t88.2');
4520              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);              $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
# Line 4751  sub _tree_construction_main ($) { Line 4693  sub _tree_construction_main ($) {
4693                  } elsif ($token->{attributes}->{content}) {                  } elsif ($token->{attributes}->{content}) {
4694                    if ($token->{attributes}->{content}->{value}                    if ($token->{attributes}->{content}->{value}
4695                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                        =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
4696                            [\x09-\x0D\x20]*=                            [\x09\x0A\x0C\x0D\x20]*=
4697                            [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                            [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
4698                            ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {                            ([^"'\x09\x0A\x0C\x0D\x20]
4699                               [^\x09\x0A\x0C\x0D\x20\x3B]*))/x) {
4700                      !!!cp ('t107');                      !!!cp ('t107');
4701                      ## NOTE: Whether the encoding is supported or not is handled                      ## NOTE: Whether the encoding is supported or not is handled
4702                      ## in the {change_encoding} callback.                      ## in the {change_encoding} callback.
# Line 5562  sub _tree_construction_main ($) { Line 5505  sub _tree_construction_main ($) {
5505      } elsif ($self->{insertion_mode} & TABLE_IMS) {      } elsif ($self->{insertion_mode} & TABLE_IMS) {
5506        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
5507          if (not $open_tables->[-1]->[1] and # tainted          if (not $open_tables->[-1]->[1] and # tainted
5508              $token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {              $token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
5509            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
5510                                
5511            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 6246  sub _tree_construction_main ($) { Line 6189  sub _tree_construction_main ($) {
6189        }        }
6190      } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {      } elsif ($self->{insertion_mode} == IN_COLUMN_GROUP_IM) {
6191            if ($token->{type} == CHARACTER_TOKEN) {            if ($token->{type} == CHARACTER_TOKEN) {
6192              if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {              if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6193                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);                $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6194                unless (length $token->{data}) {                unless (length $token->{data}) {
6195                  !!!cp ('t260');                  !!!cp ('t260');
# Line 6587  sub _tree_construction_main ($) { Line 6530  sub _tree_construction_main ($) {
6530        }        }
6531      } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {      } elsif ($self->{insertion_mode} & BODY_AFTER_IMS) {
6532        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
6533          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6534            my $data = $1;            my $data = $1;
6535            ## As if in body            ## As if in body
6536            $reconstruct_active_formatting_elements->($insert_to_current);            $reconstruct_active_formatting_elements->($insert_to_current);
# Line 6604  sub _tree_construction_main ($) { Line 6547  sub _tree_construction_main ($) {
6547          if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {          if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) {
6548            !!!cp ('t301');            !!!cp ('t301');
6549            !!!parse-error (type => 'after html:#text', token => $token);            !!!parse-error (type => 'after html:#text', token => $token);
6550              #
           ## Reprocess in the "after body" insertion mode.  
6551          } else {          } else {
6552            !!!cp ('t302');            !!!cp ('t302');
6553              ## "after body" insertion mode
6554              !!!parse-error (type => 'after body:#text', token => $token);
6555              #
6556          }          }
           
         ## "after body" insertion mode  
         !!!parse-error (type => 'after body:#text', token => $token);  
6557    
6558          $self->{insertion_mode} = IN_BODY_IM;          $self->{insertion_mode} = IN_BODY_IM;
6559          ## reprocess          ## reprocess
# Line 6621  sub _tree_construction_main ($) { Line 6563  sub _tree_construction_main ($) {
6563            !!!cp ('t303');            !!!cp ('t303');
6564            !!!parse-error (type => 'after html',            !!!parse-error (type => 'after html',
6565                            text => $token->{tag_name}, token => $token);                            text => $token->{tag_name}, token => $token);
6566                        #
           ## Reprocess in the "after body" insertion mode.  
6567          } else {          } else {
6568            !!!cp ('t304');            !!!cp ('t304');
6569              ## "after body" insertion mode
6570              !!!parse-error (type => 'after body',
6571                              text => $token->{tag_name}, token => $token);
6572              #
6573          }          }
6574    
         ## "after body" insertion mode  
         !!!parse-error (type => 'after body',  
                         text => $token->{tag_name}, token => $token);  
   
6575          $self->{insertion_mode} = IN_BODY_IM;          $self->{insertion_mode} = IN_BODY_IM;
6576          !!!ack-later;          !!!ack-later;
6577          ## reprocess          ## reprocess
# Line 6641  sub _tree_construction_main ($) { Line 6582  sub _tree_construction_main ($) {
6582            !!!parse-error (type => 'after html:/',            !!!parse-error (type => 'after html:/',
6583                            text => $token->{tag_name}, token => $token);                            text => $token->{tag_name}, token => $token);
6584                        
6585            $self->{insertion_mode} = AFTER_BODY_IM;            $self->{insertion_mode} = IN_BODY_IM;
6586            ## Reprocess in the "after body" insertion mode.            ## Reprocess.
6587              next B;
6588          } else {          } else {
6589            !!!cp ('t306');            !!!cp ('t306');
6590          }          }
# Line 6680  sub _tree_construction_main ($) { Line 6622  sub _tree_construction_main ($) {
6622        }        }
6623      } elsif ($self->{insertion_mode} & FRAME_IMS) {      } elsif ($self->{insertion_mode} & FRAME_IMS) {
6624        if ($token->{type} == CHARACTER_TOKEN) {        if ($token->{type} == CHARACTER_TOKEN) {
6625          if ($token->{data} =~ s/^([\x09\x0A\x0B\x0C\x20]+)//) {          if ($token->{data} =~ s/^([\x09\x0A\x0C\x20]+)//) {
6626            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);            $self->{open_elements}->[-1]->[0]->manakai_append_text ($1);
6627                        
6628            unless (length $token->{data}) {            unless (length $token->{data}) {
# Line 6690  sub _tree_construction_main ($) { Line 6632  sub _tree_construction_main ($) {
6632            }            }
6633          }          }
6634                    
6635          if ($token->{data} =~ s/^[^\x09\x0A\x0B\x0C\x20]+//) {          if ($token->{data} =~ s/^[^\x09\x0A\x0C\x20]+//) {
6636            if ($self->{insertion_mode} == IN_FRAMESET_IM) {            if ($self->{insertion_mode} == IN_FRAMESET_IM) {
6637              !!!cp ('t311');              !!!cp ('t311');
6638              !!!parse-error (type => 'in frameset:#text', token => $token);              !!!parse-error (type => 'in frameset:#text', token => $token);
# Line 6867  sub _tree_construction_main ($) { Line 6809  sub _tree_construction_main ($) {
6809            } elsif ($token->{attributes}->{content}) {            } elsif ($token->{attributes}->{content}) {
6810              if ($token->{attributes}->{content}->{value}              if ($token->{attributes}->{content}->{value}
6811                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]                  =~ /[Cc][Hh][Aa][Rr][Ss][Ee][Tt]
6812                      [\x09-\x0D\x20]*=                      [\x09\x0A\x0C\x0D\x20]*=
6813                      [\x09-\x0D\x20]*(?>"([^"]*)"|'([^']*)'|                      [\x09\x0A\x0C\x0D\x20]*(?>"([^"]*)"|'([^']*)'|
6814                      ([^"'\x09-\x0D\x20][^\x09-\x0D\x20\x3B]*))/x) {                      ([^"'\x09\x0A\x0C\x0D\x20][^\x09\x0A\x0C\x0D\x20\x3B]*))
6815                       /x) {
6816                !!!cp ('t336');                !!!cp ('t336');
6817                ## NOTE: Whether the encoding is supported or not is handled                ## NOTE: Whether the encoding is supported or not is handled
6818                ## in the {change_encoding} callback.                ## in the {change_encoding} callback.

Legend:
Removed from v.1.185  
changed lines
  Added in v.1.191

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24