669 |
$self->{column} = 0; |
$self->{column} = 0; |
670 |
} elsif ($self->{next_char} == 0x000D) { # CR |
} elsif ($self->{next_char} == 0x000D) { # CR |
671 |
!!!cp ('j2'); |
!!!cp ('j2'); |
672 |
|
## TODO: support for abort/streaming |
673 |
my $next = $input->getc; |
my $next = $input->getc; |
674 |
if (defined $next and $next ne "\x0A") { |
if (defined $next and $next ne "\x0A") { |
675 |
$self->{next_next_char} = $next; |
$self->{next_next_char} = $next; |
2947 |
## Return nothing. |
## Return nothing. |
2948 |
# |
# |
2949 |
} elsif ($self->{next_char} == 0x0023) { # # |
} elsif ($self->{next_char} == 0x0023) { # # |
2950 |
|
!!!cp (999); |
2951 |
$self->{state} = ENTITY_HASH_STATE; |
$self->{state} = ENTITY_HASH_STATE; |
2952 |
$self->{state_keyword} = '#'; |
$self->{state_keyword} = '#'; |
2953 |
!!!next-input-character; |
!!!next-input-character; |
2956 |
$self->{next_char} <= 0x005A) or # A..Z |
$self->{next_char} <= 0x005A) or # A..Z |
2957 |
(0x0061 <= $self->{next_char} and |
(0x0061 <= $self->{next_char} and |
2958 |
$self->{next_char} <= 0x007A)) { # a..z |
$self->{next_char} <= 0x007A)) { # a..z |
2959 |
|
!!!cp (998); |
2960 |
require Whatpm::_NamedEntityList; |
require Whatpm::_NamedEntityList; |
2961 |
$self->{state} = ENTITY_NAME_STATE; |
$self->{state} = ENTITY_NAME_STATE; |
2962 |
$self->{state_keyword} = chr $self->{next_char}; |
$self->{state_keyword} = chr $self->{next_char}; |
2978 |
## process of the tokenizer. |
## process of the tokenizer. |
2979 |
|
|
2980 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
2981 |
|
!!!cp (997); |
2982 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2983 |
## Reconsume. |
## Reconsume. |
2984 |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
!!!emit ({type => CHARACTER_TOKEN, data => '&', |
2987 |
}); |
}); |
2988 |
redo A; |
redo A; |
2989 |
} else { |
} else { |
2990 |
|
!!!cp (996); |
2991 |
$self->{current_attribute}->{value} .= '&'; |
$self->{current_attribute}->{value} .= '&'; |
2992 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
2993 |
## Reconsume. |
## Reconsume. |
2996 |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
} elsif ($self->{state} == ENTITY_HASH_STATE) { |
2997 |
if ($self->{next_char} == 0x0078 or # x |
if ($self->{next_char} == 0x0078 or # x |
2998 |
$self->{next_char} == 0x0058) { # X |
$self->{next_char} == 0x0058) { # X |
2999 |
|
!!!cp (995); |
3000 |
$self->{state} = HEXREF_X_STATE; |
$self->{state} = HEXREF_X_STATE; |
3001 |
$self->{state_keyword} .= chr $self->{next_char}; |
$self->{state_keyword} .= chr $self->{next_char}; |
3002 |
!!!next-input-character; |
!!!next-input-character; |
3003 |
redo A; |
redo A; |
3004 |
} elsif (0x0030 <= $self->{next_char} and |
} elsif (0x0030 <= $self->{next_char} and |
3005 |
$self->{next_char} <= 0x0039) { # 0..9 |
$self->{next_char} <= 0x0039) { # 0..9 |
3006 |
|
!!!cp (994); |
3007 |
$self->{state} = NCR_NUM_STATE; |
$self->{state} = NCR_NUM_STATE; |
3008 |
$self->{state_keyword} = $self->{next_char} - 0x0030; |
$self->{state_keyword} = $self->{next_char} - 0x0030; |
3009 |
!!!next-input-character; |
!!!next-input-character; |
3010 |
redo A; |
redo A; |
3011 |
} else { |
} else { |
|
!!!cp (1019); |
|
3012 |
!!!parse-error (type => 'bare nero', |
!!!parse-error (type => 'bare nero', |
3013 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
3014 |
column => $self->{column_prev} - 1); |
column => $self->{column_prev} - 1); |
3018 |
## value in the later processing. |
## value in the later processing. |
3019 |
|
|
3020 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
3021 |
|
!!!cp (1019); |
3022 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3023 |
## Reconsume. |
## Reconsume. |
3024 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
3028 |
}); |
}); |
3029 |
redo A; |
redo A; |
3030 |
} else { |
} else { |
3031 |
|
!!!cp (993); |
3032 |
$self->{current_attribute}->{value} .= '&#'; |
$self->{current_attribute}->{value} .= '&#'; |
3033 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3034 |
## Reconsume. |
## Reconsume. |
3085 |
} |
} |
3086 |
|
|
3087 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
3088 |
|
!!!cp (992); |
3089 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3090 |
## Reconsume. |
## Reconsume. |
3091 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
3093 |
}); |
}); |
3094 |
redo A; |
redo A; |
3095 |
} else { |
} else { |
3096 |
|
!!!cp (991); |
3097 |
$self->{current_attribute}->{value} .= chr $code; |
$self->{current_attribute}->{value} .= chr $code; |
3098 |
$self->{current_attribute}->{has_reference} = 1; |
$self->{current_attribute}->{has_reference} = 1; |
3099 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3105 |
(0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or |
(0x0041 <= $self->{next_char} and $self->{next_char} <= 0x0046) or |
3106 |
(0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) { |
(0x0061 <= $self->{next_char} and $self->{next_char} <= 0x0066)) { |
3107 |
# 0..9, A..F, a..f |
# 0..9, A..F, a..f |
3108 |
|
!!!cp (990); |
3109 |
$self->{state} = HEXREF_HEX_STATE; |
$self->{state} = HEXREF_HEX_STATE; |
3110 |
$self->{state_keyword} = 0; |
$self->{state_keyword} = 0; |
3111 |
## Reconsume. |
## Reconsume. |
3112 |
redo A; |
redo A; |
3113 |
} else { |
} else { |
|
!!!cp (1005); |
|
3114 |
!!!parse-error (type => 'bare hcro', |
!!!parse-error (type => 'bare hcro', |
3115 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
3116 |
column => $self->{column_prev} - 2); |
column => $self->{column_prev} - 2); |
3120 |
## element or the attribute value in the later processing. |
## element or the attribute value in the later processing. |
3121 |
|
|
3122 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
3123 |
|
!!!cp (1005); |
3124 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3125 |
## Reconsume. |
## Reconsume. |
3126 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
3130 |
}); |
}); |
3131 |
redo A; |
redo A; |
3132 |
} else { |
} else { |
3133 |
|
!!!cp (989); |
3134 |
$self->{current_attribute}->{value} .= '&' . $self->{state_keyword}; |
$self->{current_attribute}->{value} .= '&' . $self->{state_keyword}; |
3135 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3136 |
## Reconsume. |
## Reconsume. |
3201 |
} |
} |
3202 |
|
|
3203 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
3204 |
|
!!!cp (988); |
3205 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3206 |
## Reconsume. |
## Reconsume. |
3207 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
3209 |
}); |
}); |
3210 |
redo A; |
redo A; |
3211 |
} else { |
} else { |
3212 |
|
!!!cp (987); |
3213 |
$self->{current_attribute}->{value} .= chr $code; |
$self->{current_attribute}->{value} .= chr $code; |
3214 |
$self->{current_attribute}->{has_reference} = 1; |
$self->{current_attribute}->{has_reference} = 1; |
3215 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3293 |
## appropriate attribute value state anyway. |
## appropriate attribute value state anyway. |
3294 |
|
|
3295 |
if ($self->{prev_state} == DATA_STATE) { |
if ($self->{prev_state} == DATA_STATE) { |
3296 |
|
!!!cp (986); |
3297 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |
3298 |
## Reconsume. |
## Reconsume. |
3299 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
3303 |
}); |
}); |
3304 |
redo A; |
redo A; |
3305 |
} else { |
} else { |
3306 |
|
!!!cp (985); |
3307 |
$self->{current_attribute}->{value} .= $data; |
$self->{current_attribute}->{value} .= $data; |
3308 |
$self->{current_attribute}->{has_reference} = 1 if $has_ref; |
$self->{current_attribute}->{has_reference} = 1 if $has_ref; |
3309 |
$self->{state} = $self->{prev_state}; |
$self->{state} = $self->{prev_state}; |