| 726 |
$self->{char_buffer_pos} += $count; |
$self->{char_buffer_pos} += $count; |
| 727 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 728 |
$self->{column_prev} = $self->{column} - 1; |
$self->{column_prev} = $self->{column} - 1; |
|
$self->{prev_char} = [-1, -1, -1]; |
|
| 729 |
$self->{nc} = -1; |
$self->{nc} = -1; |
| 730 |
} |
} |
| 731 |
return $count; |
return $count; |
| 738 |
$self->{column} += $count; |
$self->{column} += $count; |
| 739 |
$self->{line_prev} = $self->{line}; |
$self->{line_prev} = $self->{line}; |
| 740 |
$self->{column_prev} = $self->{column} - 1; |
$self->{column_prev} = $self->{column} - 1; |
|
$self->{prev_char} = [-1, -1, -1]; |
|
| 741 |
$self->{nc} = -1; |
$self->{nc} = -1; |
| 742 |
} |
} |
| 743 |
return $count; |
return $count; |
| 930 |
delete $self->{self_closing}; |
delete $self->{self_closing}; |
| 931 |
$self->{char_buffer} = ''; |
$self->{char_buffer} = ''; |
| 932 |
$self->{char_buffer_pos} = 0; |
$self->{char_buffer_pos} = 0; |
|
$self->{prev_char} = [-1, -1, -1]; |
|
| 933 |
$self->{nc} = -1; # next input character |
$self->{nc} = -1; # next input character |
| 934 |
#$self->{next_nc} |
#$self->{next_nc} |
| 935 |
!!!next-input-character; |
!!!next-input-character; |
| 984 |
A: { |
A: { |
| 985 |
if ($self->{state} == DATA_STATE) { |
if ($self->{state} == DATA_STATE) { |
| 986 |
if ($self->{nc} == 0x0026) { # & |
if ($self->{nc} == 0x0026) { # & |
| 987 |
|
delete $self->{s_kwd}; |
| 988 |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
if ($self->{content_model} & CM_ENTITY and # PCDATA | RCDATA |
| 989 |
not $self->{escape}) { |
not $self->{escape}) { |
| 990 |
!!!cp (1); |
!!!cp (1); |
| 1003 |
} |
} |
| 1004 |
} elsif ($self->{nc} == 0x002D) { # - |
} elsif ($self->{nc} == 0x002D) { # - |
| 1005 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1006 |
unless ($self->{escape}) { |
if (defined $self->{s_kwd}) { |
| 1007 |
if ($self->{prev_char}->[0] == 0x002D and # - |
!!!cp (2.1); |
| 1008 |
$self->{prev_char}->[1] == 0x0021 and # ! |
$self->{s_kwd} .= '-'; |
| 1009 |
$self->{prev_char}->[2] == 0x003C) { # < |
} else { |
| 1010 |
!!!cp (3); |
!!!cp (2.2); |
| 1011 |
$self->{escape} = 1; |
$self->{s_kwd} = '-'; |
| 1012 |
} else { |
} |
| 1013 |
!!!cp (4); |
|
| 1014 |
} |
if ($self->{s_kwd} eq '<!--') { |
| 1015 |
|
!!!cp (3); |
| 1016 |
|
$self->{escape} = 1; # unless $self->{escape}; |
| 1017 |
|
$self->{s_kwd} = '--'; |
| 1018 |
|
# |
| 1019 |
|
} elsif ($self->{s_kwd} eq '---') { |
| 1020 |
|
!!!cp (4); |
| 1021 |
|
$self->{s_kwd} = '--'; |
| 1022 |
|
# |
| 1023 |
} else { |
} else { |
| 1024 |
!!!cp (5); |
!!!cp (5); |
| 1025 |
|
# |
| 1026 |
} |
} |
| 1027 |
} |
} |
| 1028 |
|
|
| 1029 |
# |
# |
| 1030 |
|
} elsif ($self->{nc} == 0x0021) { # ! |
| 1031 |
|
if (defined $self->{s_kwd}) { |
| 1032 |
|
!!!cp (5.1); |
| 1033 |
|
$self->{s_kwd} .= '!'; |
| 1034 |
|
# |
| 1035 |
|
} else { |
| 1036 |
|
!!!cp (5.2); |
| 1037 |
|
# |
| 1038 |
|
} |
| 1039 |
|
# |
| 1040 |
} elsif ($self->{nc} == 0x003C) { # < |
} elsif ($self->{nc} == 0x003C) { # < |
| 1041 |
|
delete $self->{s_kwd}; |
| 1042 |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
if ($self->{content_model} & CM_FULL_MARKUP or # PCDATA |
| 1043 |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
(($self->{content_model} & CM_LIMITED_MARKUP) and # CDATA | RCDATA |
| 1044 |
not $self->{escape})) { |
not $self->{escape})) { |
| 1053 |
} elsif ($self->{nc} == 0x003E) { # > |
} elsif ($self->{nc} == 0x003E) { # > |
| 1054 |
if ($self->{escape} and |
if ($self->{escape} and |
| 1055 |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
($self->{content_model} & CM_LIMITED_MARKUP)) { # RCDATA | CDATA |
| 1056 |
if ($self->{prev_char}->[0] == 0x002D and # - |
if (defined $self->{s_kwd} and $self->{s_kwd} eq '--') { |
|
$self->{prev_char}->[1] == 0x002D) { # - |
|
| 1057 |
!!!cp (8); |
!!!cp (8); |
| 1058 |
delete $self->{escape}; |
delete $self->{escape}; |
| 1059 |
} else { |
} else { |
| 1063 |
!!!cp (10); |
!!!cp (10); |
| 1064 |
} |
} |
| 1065 |
|
|
| 1066 |
|
delete $self->{s_kwd}; |
| 1067 |
# |
# |
| 1068 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
| 1069 |
!!!cp (11); |
!!!cp (11); |
| 1070 |
|
delete $self->{s_kwd}; |
| 1071 |
!!!emit ({type => END_OF_FILE_TOKEN, |
!!!emit ({type => END_OF_FILE_TOKEN, |
| 1072 |
line => $self->{line}, column => $self->{column}}); |
line => $self->{line}, column => $self->{column}}); |
| 1073 |
last A; ## TODO: ok? |
last A; ## TODO: ok? |
| 1074 |
} else { |
} else { |
| 1075 |
!!!cp (12); |
!!!cp (12); |
| 1076 |
|
delete $self->{s_kwd}; |
| 1077 |
|
# |
| 1078 |
} |
} |
| 1079 |
|
|
| 1080 |
# Anything else |
# Anything else |
| 1081 |
my $token = {type => CHARACTER_TOKEN, |
my $token = {type => CHARACTER_TOKEN, |
| 1082 |
data => chr $self->{nc}, |
data => chr $self->{nc}, |
| 1083 |
line => $self->{line}, column => $self->{column}, |
line => $self->{line}, column => $self->{column}, |
| 1084 |
}; |
}; |
| 1085 |
$self->{read_until}->($token->{data}, q[-!<>&], length $token->{data}); |
if ($self->{read_until}->($token->{data}, q[-!<>&], |
| 1086 |
|
length $token->{data})) { |
| 1087 |
|
delete $self->{s_kwd}; |
| 1088 |
|
} |
| 1089 |
|
|
| 1090 |
## Stay in the data state |
## Stay in the data state |
| 1091 |
!!!next-input-character; |
!!!next-input-character; |
|
|
|
| 1092 |
!!!emit ($token); |
!!!emit ($token); |
|
|
|
| 1093 |
redo A; |
redo A; |
| 1094 |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
} elsif ($self->{state} == TAG_OPEN_STATE) { |
| 1095 |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
if ($self->{content_model} & CM_LIMITED_MARKUP) { # RCDATA | CDATA |
| 1098 |
!!!next-input-character; |
!!!next-input-character; |
| 1099 |
$self->{state} = CLOSE_TAG_OPEN_STATE; |
$self->{state} = CLOSE_TAG_OPEN_STATE; |
| 1100 |
redo A; |
redo A; |
| 1101 |
|
} elsif ($self->{nc} == 0x0021) { # ! |
| 1102 |
|
!!!cp (15.1); |
| 1103 |
|
$self->{s_kwd} = '<' unless $self->{escape}; |
| 1104 |
|
# |
| 1105 |
} else { |
} else { |
| 1106 |
!!!cp (16); |
!!!cp (16); |
| 1107 |
## reconsume |
# |
|
$self->{state} = DATA_STATE; |
|
|
|
|
|
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
|
|
line => $self->{line_prev}, |
|
|
column => $self->{column_prev}, |
|
|
}); |
|
|
|
|
|
redo A; |
|
| 1108 |
} |
} |
| 1109 |
|
|
| 1110 |
|
## reconsume |
| 1111 |
|
$self->{state} = DATA_STATE; |
| 1112 |
|
!!!emit ({type => CHARACTER_TOKEN, data => '<', |
| 1113 |
|
line => $self->{line_prev}, |
| 1114 |
|
column => $self->{column_prev}, |
| 1115 |
|
}); |
| 1116 |
|
redo A; |
| 1117 |
} elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA |
} elsif ($self->{content_model} & CM_FULL_MARKUP) { # PCDATA |
| 1118 |
if ($self->{nc} == 0x0021) { # ! |
if ($self->{nc} == 0x0021) { # ! |
| 1119 |
!!!cp (17); |
!!!cp (17); |
| 7909 |
$p->{char_buffer_pos} += $count; |
$p->{char_buffer_pos} += $count; |
| 7910 |
$p->{line_prev} = $p->{line}; |
$p->{line_prev} = $p->{line}; |
| 7911 |
$p->{column_prev} = $p->{column} - 1; |
$p->{column_prev} = $p->{column} - 1; |
|
$p->{prev_char} = [-1, -1, -1]; |
|
| 7912 |
$p->{nc} = -1; |
$p->{nc} = -1; |
| 7913 |
} |
} |
| 7914 |
return $count; |
return $count; |
| 7920 |
if ($count) { |
if ($count) { |
| 7921 |
$p->{column} += $count; |
$p->{column} += $count; |
| 7922 |
$p->{column_prev} += $count; |
$p->{column_prev} += $count; |
|
$p->{prev_char} = [-1, -1, -1]; |
|
| 7923 |
$p->{nc} = -1; |
$p->{nc} = -1; |
| 7924 |
} |
} |
| 7925 |
return $count; |
return $count; |