| 669 |
$self->{column} = 0; |
$self->{column} = 0; |
| 670 |
} elsif ($self->{next_char} == 0x000D) { # CR |
} elsif ($self->{next_char} == 0x000D) { # CR |
| 671 |
!!!cp ('j2'); |
!!!cp ('j2'); |
| 672 |
|
## TODO: support for abort/streaming |
| 673 |
my $next = $input->getc; |
my $next = $input->getc; |
| 674 |
if (defined $next and $next ne "\x0A") { |
if (defined $next and $next ne "\x0A") { |
| 675 |
$self->{next_next_char} = $next; |
$self->{next_next_char} = $next; |
| 2947 |
## Return nothing. |
## Return nothing. |
| 2948 |
# |
# |
| 2949 |
} elsif ($self->{next_char} == 0x0023) { # # |
} elsif ($self->{next_char} == 0x0023) { # # |
| 2950 |
|
!!!cp (999); |
| 2951 |
$self->{state} = ENTITY_HASH_STATE; |
$self->{state} = ENTITY_HASH_STATE; |
| 2952 |
$self->{state_keyword} = '#'; |
$self->{state_keyword} = '#'; |
| 2953 |
!!!next-input-character; |
!!!next-input-character; |
| 2956 |
$self->{next_char} <= 0x005A) or # A..Z |
$self->{next_char} <= 0x005A) or # A..Z |
| 2957 |
(0x0061 <= $self->{next_char} and |
(0x0061 <= $self->{next_char} and |
| 2958 |
$self->{next_char} <= 0x007A)) { # a..z |
$self->{next_char} <= 0x007A)) { # a..z |
| 2959 |
|
!!!cp (998); |
| 2960 |
require Whatpm::_NamedEntityList; |
require Whatpm::_NamedEntityList; |
| 2961 |
$self->{state} = ENTITY_NAME_STATE; |
$self->{state} = ENTITY_NAME_STATE; |
| 2962 |
$self->{state_keyword} = chr $self->{next_char}; |
$self->{state_keyword} = chr $self->{next_char}; |
| 2978 |
## process of the tokenizer. |
## process of the tokenizer. |
| 2979 |
|
|
| 2980 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 2981 |
|
!!!cp (997); |
| 2982 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2983 |
## Reconsume. |
## Reconsume. |
| 2984 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
| 2987 |
}); |
}); |
| 2988 |
redo A; |
redo A; |
| 2989 |
} else { |
} else { |
| 2990 |
|
!!!cp (996); |
| 2991 |
$self->{current_attribute}->{value} .= '&'; |
$self->{current_attribute}->{value} .= '&'; |
| 2992 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 2993 |
## Reconsume. |
## Reconsume. |
| 2996 |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
| 2997 |
if ($self->{next_char} == 0x0078 or # x |
if ($self->{next_char} == 0x0078 or # x |
| 2998 |
$self->{next_char} == 0x0058) { # X |
$self->{next_char} == 0x0058) { # X |
| 2999 |
|
!!!cp (995); |
| 3000 |
$self->{state} = HEXREF_X_STATE; |
$self->{state} = HEXREF_X_STATE; |
| 3001 |
$self->{state_keyword} .= chr $self->{next_char}; |
$self->{state_keyword} .= chr $self->{next_char}; |
| 3002 |
!!!next-input-character; |
!!!next-input-character; |
| 3003 |
redo A; |
redo A; |
| 3004 |
} elsif (0x0030 <= $self->{next_char} and |
} elsif (0x0030 <= $self->{next_char} and |
| 3005 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
| 3006 |
|
!!!cp (994); |
| 3007 |
$self->{state} = NCR_NUM_STATE; |
$self->{state} = NCR_NUM_STATE; |
| 3008 |
$self->{state_keyword} = $self->{next_char} - 0x0030; |
$self->{state_keyword} = $self->{next_char} - 0x0030; |
| 3009 |
!!!next-input-character; |
!!!next-input-character; |
| 3010 |
redo A; |
redo A; |
| 3011 |
} else { |
} else { |
|
!!!cp (1019); |
|
| 3012 |
!!!parse-error (type => 'bare nero', |
!!!parse-error (type => 'bare nero', |
| 3013 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 3014 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
| 3018 |
## value in the later processing. |
## value in the later processing. |
| 3019 |
|
|
| 3020 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3021 |
|
!!!cp (1019); |
| 3022 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3023 |
## Reconsume. |
## Reconsume. |
| 3024 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 3028 |
}); |
}); |
| 3029 |
redo A; |
redo A; |
| 3030 |
} else { |
} else { |
| 3031 |
|
!!!cp (993); |
| 3032 |
$self->{current_attribute}->{value} .= '&#'; |
$self->{current_attribute}->{value} .= '&#'; |
| 3033 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3034 |
## Reconsume. |
## Reconsume. |
| 3085 |
} |
} |
| 3086 |
|
|
| 3087 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3088 |
|
!!!cp (992); |
| 3089 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3090 |
## Reconsume. |
## Reconsume. |
| 3091 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
| 3093 |
}); |
}); |
| 3094 |
redo A; |
redo A; |
| 3095 |
} else { |
} else { |
| 3096 |
|
!!!cp (991); |
| 3097 |
$self->{current_attribute}->{value} .= chr $code; |
$self->{current_attribute}->{value} .= chr $code; |
| 3098 |
$self->{current_attribute}->{has_reference} = 1; |
$self->{current_attribute}->{has_reference} = 1; |
| 3099 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3105 |
(0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or |
(0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or |
| 3106 |
(0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) { |
(0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) { |
| 3107 |
# 0..9, A..F, a..f |
# 0..9, A..F, a..f |
| 3108 |
|
!!!cp (990); |
| 3109 |
$self->{state} = HEXREF_HEX_STATE; |
$self->{state} = HEXREF_HEX_STATE; |
| 3110 |
$self->{state_keyword} = 0; |
$self->{state_keyword} = 0; |
| 3111 |
## Reconsume. |
## Reconsume. |
| 3112 |
redo A; |
redo A; |
| 3113 |
} else { |
} else { |
|
!!!cp (1005); |
|
| 3114 |
!!!parse-error (type => 'bare hcro', |
!!!parse-error (type => 'bare hcro', |
| 3115 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
| 3116 |
column => $self->{column_prev} - 2); |
column => $self->{column_prev} - 2); |
| 3120 |
## element or the attribute value in the later processing. |
## element or the attribute value in the later processing. |
| 3121 |
|
|
| 3122 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3123 |
|
!!!cp (1005); |
| 3124 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3125 |
## Reconsume. |
## Reconsume. |
| 3126 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 3130 |
}); |
}); |
| 3131 |
redo A; |
redo A; |
| 3132 |
} else { |
} else { |
| 3133 |
|
!!!cp (989); |
| 3134 |
$self->{current_attribute}->{value} .= '&' . $self->{state_keyword}; |
$self->{current_attribute}->{value} .= '&' . $self->{state_keyword}; |
| 3135 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3136 |
## Reconsume. |
## Reconsume. |
| 3201 |
} |
} |
| 3202 |
|
|
| 3203 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3204 |
|
!!!cp (988); |
| 3205 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3206 |
## Reconsume. |
## Reconsume. |
| 3207 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
| 3209 |
}); |
}); |
| 3210 |
redo A; |
redo A; |
| 3211 |
} else { |
} else { |
| 3212 |
|
!!!cp (987); |
| 3213 |
$self->{current_attribute}->{value} .= chr $code; |
$self->{current_attribute}->{value} .= chr $code; |
| 3214 |
$self->{current_attribute}->{has_reference} = 1; |
$self->{current_attribute}->{has_reference} = 1; |
| 3215 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3293 |
## appropriate attribute value state anyway. |
## appropriate attribute value state anyway. |
| 3294 |
|
|
| 3295 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
| 3296 |
|
!!!cp (986); |
| 3297 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
| 3298 |
## Reconsume. |
## Reconsume. |
| 3299 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
| 3303 |
}); |
}); |
| 3304 |
redo A; |
redo A; |
| 3305 |
} else { |
} else { |
| 3306 |
|
!!!cp (985); |
| 3307 |
$self->{current_attribute}->{value} .= $data; |
$self->{current_attribute}->{value} .= $data; |
| 3308 |
$self->{current_attribute}->{has_reference} = 1 if $has_ref; |
$self->{current_attribute}->{has_reference} = 1 if $has_ref; |
| 3309 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |