/[suikacvs]/markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src
Suika

Diff of /markup/html/whatpm/Whatpm/HTML/Tokenizer.pm.src

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.22 by wakaba, Sun Oct 19 10:12:54 2008 UTC revision 1.26 by wakaba, Thu Jul 2 21:42:43 2009 UTC
# Line 1270  sub _get_next_token ($) { Line 1270  sub _get_next_token ($) {
1270    
1271          redo A;          redo A;
1272        } else {        } else {
1273          if ($self->{nc} == 0x003D) { # =          if ($self->{nc} == 0x003D or $self->{nc} == 0x003C) { # =, <
1274            !!!cp (93);            !!!cp (93);
1275            ## XML5: Not a parse error.            ## XML5: Not a parse error.
1276            !!!parse-error (type => 'bad attribute value');            !!!parse-error (type => 'bad attribute value');
# Line 1316  sub _get_next_token ($) { Line 1316  sub _get_next_token ($) {
1316          $self->{state} = ENTITY_STATE;          $self->{state} = ENTITY_STATE;
1317          !!!next-input-character;          !!!next-input-character;
1318          redo A;          redo A;
1319          } elsif ($self->{is_xml} and
1320                   $is_space->{$self->{nc}}) {
1321            !!!cp (97.1);
1322            $self->{ca}->{value} .= ' ';
1323            ## Stay in the state.
1324            !!!next-input-character;
1325            redo A;
1326        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1327          !!!parse-error (type => 'unclosed attribute value');          !!!parse-error (type => 'unclosed attribute value');
1328          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
# Line 1363  sub _get_next_token ($) { Line 1370  sub _get_next_token ($) {
1370          }          }
1371          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1372          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1373                                q["&<],                                qq["&<\x09\x0C\x20],
1374                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1375    
1376          ## Stay in the state          ## Stay in the state
# Line 1400  sub _get_next_token ($) { Line 1407  sub _get_next_token ($) {
1407          $self->{state} = ENTITY_STATE;          $self->{state} = ENTITY_STATE;
1408          !!!next-input-character;          !!!next-input-character;
1409          redo A;          redo A;
1410          } elsif ($self->{is_xml} and
1411                   $is_space->{$self->{nc}}) {
1412            !!!cp (103.1);
1413            $self->{ca}->{value} .= ' ';
1414            ## Stay in the state.
1415            !!!next-input-character;
1416            redo A;
1417        } elsif ($self->{nc} == -1) {        } elsif ($self->{nc} == -1) {
1418          !!!parse-error (type => 'unclosed attribute value');          !!!parse-error (type => 'unclosed attribute value');
1419          if ($self->{ct}->{type} == START_TAG_TOKEN) {          if ($self->{ct}->{type} == START_TAG_TOKEN) {
# Line 1447  sub _get_next_token ($) { Line 1461  sub _get_next_token ($) {
1461          }          }
1462          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1463          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1464                                q['&<],                                qq['&<\x09\x0C\x20],
1465                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1466    
1467          ## Stay in the state          ## Stay in the state
# Line 1559  sub _get_next_token ($) { Line 1573  sub _get_next_token ($) {
1573               0x0022 => 1, # "               0x0022 => 1, # "
1574               0x0027 => 1, # '               0x0027 => 1, # '
1575               0x003D => 1, # =               0x003D => 1, # =
1576                 0x003C => 1, # <
1577              }->{$self->{nc}}) {              }->{$self->{nc}}) {
1578            !!!cp (115);            !!!cp (115);
1579            ## XML5: Not a parse error.            ## XML5: Not a parse error.
# Line 1568  sub _get_next_token ($) { Line 1583  sub _get_next_token ($) {
1583          }          }
1584          $self->{ca}->{value} .= chr ($self->{nc});          $self->{ca}->{value} .= chr ($self->{nc});
1585          $self->{read_until}->($self->{ca}->{value},          $self->{read_until}->($self->{ca}->{value},
1586                                q["'=& >],                                qq["'=& \x09\x0C>],
1587                                length $self->{ca}->{value});                                length $self->{ca}->{value});
1588    
1589          ## Stay in the state          ## Stay in the state
# Line 3217  sub _get_next_token ($) { Line 3232  sub _get_next_token ($) {
3232        my $code = $self->{kwd};        my $code = $self->{kwd};
3233        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3234        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3235        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
3236              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
3237              ($self->{is_xml} and $code == 0x0000)) {
3238          !!!cp (1015);          !!!cp (1015);
3239          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3240                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 3330  sub _get_next_token ($) { Line 3347  sub _get_next_token ($) {
3347        my $code = $self->{kwd};        my $code = $self->{kwd};
3348        my $l = $self->{line_prev};        my $l = $self->{line_prev};
3349        my $c = $self->{column_prev};        my $c = $self->{column_prev};
3350        if ($charref_map->{$code}) {        if ((not $self->{is_xml} and $charref_map->{$code}) or
3351              ($self->{is_xml} and 0xD800 <= $code and $code <= 0xDFFF) or
3352              ($self->{is_xml} and $code == 0x0000)) {
3353          !!!cp (1008);          !!!cp (1008);
3354          !!!parse-error (type => 'invalid character reference',          !!!parse-error (type => 'invalid character reference',
3355                          text => (sprintf 'U+%04X', $code),                          text => (sprintf 'U+%04X', $code),
# Line 3659  sub _get_next_token ($) { Line 3678  sub _get_next_token ($) {
3678          ## XML5: Not defined yet.          ## XML5: Not defined yet.
3679    
3680          ## TODO:          ## TODO:
3681    
3682            if (not $self->{stop_processing} and
3683                not $self->{document}->xml_standalone) {
3684              !!!parse-error (type => 'stop processing', ## TODO: type
3685                              level => $self->{level}->{info});
3686              $self->{stop_processing} = 1;
3687            }
3688    
3689          !!!next-input-character;          !!!next-input-character;
3690          redo A;          redo A;
3691        } elsif ($self->{nc} == 0x005D) { # ]        } elsif ($self->{nc} == 0x005D) { # ]
# Line 3893  sub _get_next_token ($) { Line 3920  sub _get_next_token ($) {
3920          }          }
3921          $self->{ct} = {type => ELEMENT_TOKEN, name => '',          $self->{ct} = {type => ELEMENT_TOKEN, name => '',
3922                         line => $self->{line_prev},                         line => $self->{line_prev},
3923                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 7};
3924          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
3925          !!!next-input-character;          !!!next-input-character;
3926          redo A;          redo A;
# Line 3941  sub _get_next_token ($) { Line 3968  sub _get_next_token ($) {
3968          $self->{ct} = {type => ATTLIST_TOKEN, name => '',          $self->{ct} = {type => ATTLIST_TOKEN, name => '',
3969                         attrdefs => [],                         attrdefs => [],
3970                         line => $self->{line_prev},                         line => $self->{line_prev},
3971                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 7};
3972          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
3973          !!!next-input-character;          !!!next-input-character;
3974          redo A;          redo A;
# Line 3990  sub _get_next_token ($) { Line 4017  sub _get_next_token ($) {
4017          }          }
4018          $self->{ct} = {type => NOTATION_TOKEN, name => '',          $self->{ct} = {type => NOTATION_TOKEN, name => '',
4019                         line => $self->{line_prev},                         line => $self->{line_prev},
4020                         column => $self->{column_prev} - 6};                         column => $self->{column_prev} - 8};
4021          $self->{state} = DOCTYPE_MD_STATE;          $self->{state} = DOCTYPE_MD_STATE;
4022          !!!next-input-character;          !!!next-input-character;
4023          redo A;          redo A;

Legend:
Removed from v.1.22  
changed lines
  Added in v.1.26

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24