| 105 |
sub COMMENT_START_DASH_STATE () { 15 } |
sub COMMENT_START_DASH_STATE () { 15 } |
| 106 |
sub COMMENT_STATE () { 16 } |
sub COMMENT_STATE () { 16 } |
| 107 |
sub COMMENT_END_STATE () { 17 } |
sub COMMENT_END_STATE () { 17 } |
| 108 |
|
sub COMMENT_END_BANG_STATE () { 102 } ## LAST |
| 109 |
sub COMMENT_END_DASH_STATE () { 18 } |
sub COMMENT_END_DASH_STATE () { 18 } |
| 110 |
sub BOGUS_COMMENT_STATE () { 19 } |
sub BOGUS_COMMENT_STATE () { 19 } |
| 111 |
sub DOCTYPE_STATE () { 20 } |
sub DOCTYPE_STATE () { 20 } |
| 1249 |
if ({ |
if ({ |
| 1250 |
0x0022 => 1, # " |
0x0022 => 1, # " |
| 1251 |
0x0027 => 1, # ' |
0x0027 => 1, # ' |
| 1252 |
|
0x003C => 1, # < |
| 1253 |
0x003D => 1, # = |
0x003D => 1, # = |
| 1254 |
}->{$self->{nc}}) { |
}->{$self->{nc}}) { |
| 1255 |
|
|
| 1432 |
|
|
| 1433 |
redo A; |
redo A; |
| 1434 |
} else { |
} else { |
| 1435 |
if ($self->{nc} == 0x0022 or # " |
if ({ |
| 1436 |
$self->{nc} == 0x0027) { # ' |
0x0022 => 1, # " |
| 1437 |
|
0x0027 => 1, # ' |
| 1438 |
|
0x003C => 1, # < |
| 1439 |
|
}->{$self->{nc}}) { |
| 1440 |
|
|
| 1441 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1442 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
| 1607 |
|
|
| 1608 |
} |
} |
| 1609 |
|
|
| 1610 |
if ($self->{nc} == 0x0022 or # " |
if ({ |
| 1611 |
$self->{nc} == 0x0027) { # ' |
0x0022 => 1, # " |
| 1612 |
|
0x0027 => 1, # ' |
| 1613 |
|
0x003C => 1, # < |
| 1614 |
|
}->{$self->{nc}}) { |
| 1615 |
|
|
| 1616 |
## XML5: Not a parse error. |
## XML5: Not a parse error. |
| 1617 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'bad attribute name'); |
| 2943 |
|
|
| 2944 |
redo A; |
redo A; |
| 2945 |
} |
} |
| 2946 |
} elsif ($self->{state} == COMMENT_END_STATE) { |
} elsif ($self->{state} == COMMENT_END_STATE or |
| 2947 |
|
$self->{state} == COMMENT_END_BANG_STATE) { |
| 2948 |
## XML5: "Comment end state" and "DOCTYPE comment end state". |
## XML5: "Comment end state" and "DOCTYPE comment end state". |
| 2949 |
|
## (No comment end bang state.) |
| 2950 |
|
|
| 2951 |
if ($self->{nc} == 0x003E) { # > |
if ($self->{nc} == 0x003E) { # > |
| 2952 |
if ($self->{in_subset}) { |
if ($self->{in_subset}) { |
| 2973 |
|
|
| 2974 |
redo A; |
redo A; |
| 2975 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 2976 |
|
if ($self->{state} == COMMENT_END_BANG_STATE) { |
| 2977 |
|
|
| 2978 |
|
$self->{ct}->{data} .= '--!'; # comment |
| 2979 |
|
$self->{state} = COMMENT_END_DASH_STATE; |
| 2980 |
|
} else { |
| 2981 |
|
|
| 2982 |
|
## XML5: Not a parse error. |
| 2983 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
| 2984 |
|
line => $self->{line_prev}, |
| 2985 |
|
column => $self->{column_prev}); |
| 2986 |
|
$self->{ct}->{data} .= '-'; # comment |
| 2987 |
|
## Stay in the state |
| 2988 |
|
} |
| 2989 |
|
|
| 2990 |
## XML5: Not a parse error. |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 2991 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{line_prev} = $self->{line}; |
| 2992 |
line => $self->{line_prev}, |
$self->{column_prev} = $self->{column}; |
| 2993 |
column => $self->{column_prev}); |
$self->{column}++; |
| 2994 |
$self->{ct}->{data} .= '-'; # comment |
$self->{nc} |
| 2995 |
## Stay in the state |
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 2996 |
|
} else { |
| 2997 |
|
$self->{set_nc}->($self); |
| 2998 |
|
} |
| 2999 |
|
|
| 3000 |
|
redo A; |
| 3001 |
|
} elsif ($self->{nc} == 0x0021 and # ! |
| 3002 |
|
$self->{state} != COMMENT_END_BANG_STATE) { |
| 3003 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'comment end bang'); # XXX error type |
| 3004 |
|
$self->{state} = COMMENT_END_BANG_STATE; |
| 3005 |
|
|
| 3006 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3007 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 3024 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
| 3025 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
| 3026 |
} |
} |
| 3027 |
## reconsume |
## Reconsume. |
| 3028 |
|
|
| 3029 |
return ($self->{ct}); # comment |
return ($self->{ct}); # comment |
| 3030 |
|
|
| 3031 |
redo A; |
redo A; |
| 3032 |
} else { |
} else { |
| 3033 |
|
|
| 3034 |
## XML5: Not a parse error. |
if ($self->{state} == COMMENT_END_BANG_STATE) { |
| 3035 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'dash in comment', |
$self->{ct}->{data} .= '--!' . chr ($self->{nc}); # comment |
| 3036 |
line => $self->{line_prev}, |
} else { |
| 3037 |
column => $self->{column_prev}); |
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
| 3038 |
$self->{ct}->{data} .= '--' . chr ($self->{nc}); # comment |
} |
| 3039 |
$self->{state} = COMMENT_STATE; |
$self->{state} = COMMENT_STATE; |
| 3040 |
|
|
| 3041 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3066 |
} |
} |
| 3067 |
|
|
| 3068 |
redo A; |
redo A; |
| 3069 |
|
} elsif ($self->{nc} == -1) { |
| 3070 |
|
|
| 3071 |
|
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3072 |
|
$self->{ct}->{quirks} = 1; |
| 3073 |
|
|
| 3074 |
|
$self->{state} = DATA_STATE; |
| 3075 |
|
## Reconsume. |
| 3076 |
|
return ($self->{ct}); # DOCTYPE (quirks) |
| 3077 |
|
|
| 3078 |
|
redo A; |
| 3079 |
} else { |
} else { |
| 3080 |
|
|
| 3081 |
## XML5: Unless EOF, swith to the bogus comment state. |
## XML5: Swith to the bogus comment state. |
| 3082 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no space before DOCTYPE name'); |
| 3083 |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
$self->{state} = BEFORE_DOCTYPE_NAME_STATE; |
| 3084 |
## reconsume |
## reconsume |
| 3123 |
return ($self->{ct}); # DOCTYPE (quirks) |
return ($self->{ct}); # DOCTYPE (quirks) |
| 3124 |
|
|
| 3125 |
redo A; |
redo A; |
| 3126 |
|
} elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z |
| 3127 |
|
|
| 3128 |
|
$self->{ct}->{name} # DOCTYPE |
| 3129 |
|
= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 3130 |
|
delete $self->{ct}->{quirks}; |
| 3131 |
|
$self->{state} = DOCTYPE_NAME_STATE; |
| 3132 |
|
|
| 3133 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3134 |
|
$self->{line_prev} = $self->{line}; |
| 3135 |
|
$self->{column_prev} = $self->{column}; |
| 3136 |
|
$self->{column}++; |
| 3137 |
|
$self->{nc} |
| 3138 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3139 |
|
} else { |
| 3140 |
|
$self->{set_nc}->($self); |
| 3141 |
|
} |
| 3142 |
|
|
| 3143 |
|
redo A; |
| 3144 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3145 |
|
|
| 3146 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'no DOCTYPE name'); |
| 3227 |
return ($self->{ct}); # DOCTYPE |
return ($self->{ct}); # DOCTYPE |
| 3228 |
|
|
| 3229 |
redo A; |
redo A; |
| 3230 |
|
} elsif (0x0041 <= $self->{nc} and $self->{nc} <= 0x005A) { # A..Z |
| 3231 |
|
|
| 3232 |
|
$self->{ct}->{name} # DOCTYPE |
| 3233 |
|
.= chr ($self->{nc} + ($self->{is_xml} ? 0 : 0x0020)); |
| 3234 |
|
delete $self->{ct}->{quirks}; |
| 3235 |
|
## Stay in the state. |
| 3236 |
|
|
| 3237 |
|
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3238 |
|
$self->{line_prev} = $self->{line}; |
| 3239 |
|
$self->{column_prev} = $self->{column}; |
| 3240 |
|
$self->{column}++; |
| 3241 |
|
$self->{nc} |
| 3242 |
|
= ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); |
| 3243 |
|
} else { |
| 3244 |
|
$self->{set_nc}->($self); |
| 3245 |
|
} |
| 3246 |
|
|
| 3247 |
|
redo A; |
| 3248 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 3249 |
|
|
| 3250 |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
$self->{parse_error}->(level => $self->{level}->{must}, type => 'unclosed DOCTYPE'); |
| 3276 |
redo A; |
redo A; |
| 3277 |
} else { |
} else { |
| 3278 |
|
|
| 3279 |
$self->{ct}->{name} |
$self->{ct}->{name} .= chr ($self->{nc}); # DOCTYPE |
| 3280 |
.= chr ($self->{nc}); # DOCTYPE |
## Stay in the state. |
|
## Stay in the state |
|
| 3281 |
|
|
| 3282 |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
if ($self->{char_buffer_pos} < length $self->{char_buffer}) { |
| 3283 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |