114 |
sub ENTITY_NAME_STATE () { 49 } |
sub ENTITY_NAME_STATE () { 49 } |
115 |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
sub PCDATA_STATE () { 50 } # "data state" in the spec |
116 |
|
|
117 |
|
## XML states |
118 |
|
sub PI_STATE () { 51 } |
119 |
|
sub PI_TARGET_STATE () { 52 } |
120 |
|
sub PI_TARGET_AFTER_STATE () { 53 } |
121 |
|
sub PI_DATA_STATE () { 54 } |
122 |
|
sub PI_AFTER_STATE () { 55 } |
123 |
|
sub PI_DATA_AFTER_STATE () { 56 } |
124 |
|
|
125 |
## Tree constructor state constants (see Whatpm::HTML for the full |
## Tree constructor state constants (see Whatpm::HTML for the full |
126 |
## list and descriptions) |
## list and descriptions) |
127 |
|
|
217 |
## ->{value} |
## ->{value} |
218 |
## ->{has_reference} == 1 or 0 |
## ->{has_reference} == 1 or 0 |
219 |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
## ->{data} (COMMENT_TOKEN, CHARACTER_TOKEN) |
220 |
|
## ->{has_reference} == 1 or 0 (CHARACTER_TOKEN) |
221 |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
## NOTE: The "self-closing flag" is hold as |$self->{self_closing}|. |
222 |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
## |->{self_closing}| is used to save the value of |$self->{self_closing}| |
223 |
## while the token is pushed back to the stack. |
## while the token is pushed back to the stack. |
508 |
|
|
509 |
redo A; |
redo A; |
510 |
} elsif ($self->{nc} == 0x003F) { # ? |
} elsif ($self->{nc} == 0x003F) { # ? |
511 |
!!!cp (22); |
if ($self->{is_xml}) { |
512 |
!!!parse-error (type => 'pio', |
!!!cp (22.1); |
513 |
line => $self->{line_prev}, |
$self->{state} = PI_STATE; |
514 |
column => $self->{column_prev}); |
!!!next-input-character; |
515 |
$self->{state} = BOGUS_COMMENT_STATE; |
redo A; |
516 |
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
} else { |
517 |
line => $self->{line_prev}, |
!!!cp (22); |
518 |
column => $self->{column_prev}, |
!!!parse-error (type => 'pio', |
519 |
}; |
line => $self->{line_prev}, |
520 |
## $self->{nc} is intentionally left as is |
column => $self->{column_prev}); |
521 |
redo A; |
$self->{state} = BOGUS_COMMENT_STATE; |
522 |
|
$self->{ct} = {type => COMMENT_TOKEN, data => '', |
523 |
|
line => $self->{line_prev}, |
524 |
|
column => $self->{column_prev}, |
525 |
|
}; |
526 |
|
## $self->{nc} is intentionally left as is |
527 |
|
redo A; |
528 |
|
} |
529 |
} else { |
} else { |
530 |
!!!cp (23); |
!!!cp (23); |
531 |
!!!parse-error (type => 'bare stago', |
!!!parse-error (type => 'bare stago', |
1573 |
redo A; |
redo A; |
1574 |
} elsif ($self->{s_kwd} eq '[CDATA' and |
} elsif ($self->{s_kwd} eq '[CDATA' and |
1575 |
$self->{nc} == 0x005B) { # [ |
$self->{nc} == 0x005B) { # [ |
|
!!!cp (135.2); |
|
|
|
|
1576 |
if ($self->{is_xml} and |
if ($self->{is_xml} and |
1577 |
not $self->{tainted} and |
not $self->{tainted} and |
1578 |
@{$self->{open_elements} or []} == 0) { |
@{$self->{open_elements} or []} == 0) { |
1579 |
|
!!!cp (135.2); |
1580 |
!!!parse-error (type => 'cdata outside of root element', |
!!!parse-error (type => 'cdata outside of root element', |
1581 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
1582 |
column => $self->{column_prev} - 7); |
column => $self->{column_prev} - 7); |
1583 |
$self->{tainted} = 1; |
$self->{tainted} = 1; |
1584 |
|
} else { |
1585 |
|
!!!cp (135.21); |
1586 |
} |
} |
1587 |
|
|
1588 |
$self->{ct} = {type => CHARACTER_TOKEN, |
$self->{ct} = {type => CHARACTER_TOKEN, |
2359 |
redo A; |
redo A; |
2360 |
} elsif ($self->{nc} == -1) { |
} elsif ($self->{nc} == -1) { |
2361 |
if ($self->{is_xml}) { |
if ($self->{is_xml}) { |
2362 |
|
!!!cp (221.11); |
2363 |
!!!parse-error (type => 'no mse'); ## TODO: type |
!!!parse-error (type => 'no mse'); ## TODO: type |
2364 |
|
} else { |
2365 |
|
!!!cp (221.12); |
2366 |
} |
} |
2367 |
|
|
2368 |
$self->{state} = DATA_STATE; |
$self->{state} = DATA_STATE; |
2576 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
2577 |
## Reconsume. |
## Reconsume. |
2578 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2579 |
|
has_reference => 1, |
2580 |
line => $l, column => $c, |
line => $l, column => $c, |
2581 |
}); |
}); |
2582 |
redo A; |
redo A; |
2689 |
$self->{s_kwd} = ''; |
$self->{s_kwd} = ''; |
2690 |
## Reconsume. |
## Reconsume. |
2691 |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
!!!emit ({type => CHARACTER_TOKEN, data => chr $code, |
2692 |
|
has_reference => 1, |
2693 |
line => $l, column => $c, |
line => $l, column => $c, |
2694 |
}); |
}); |
2695 |
redo A; |
redo A; |
2785 |
## Reconsume. |
## Reconsume. |
2786 |
!!!emit ({type => CHARACTER_TOKEN, |
!!!emit ({type => CHARACTER_TOKEN, |
2787 |
data => $data, |
data => $data, |
2788 |
|
has_reference => $has_ref, |
2789 |
line => $self->{line_prev}, |
line => $self->{line_prev}, |
2790 |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
column => $self->{column_prev} + 1 - length $self->{s_kwd}, |
2791 |
}); |
}); |
2799 |
## Reconsume. |
## Reconsume. |
2800 |
redo A; |
redo A; |
2801 |
} |
} |
2802 |
|
|
2803 |
|
## XML-only states |
2804 |
|
|
2805 |
|
} elsif ($self->{state} == PI_STATE) { |
2806 |
|
if ($is_space->{$self->{nc}} or |
2807 |
|
$self->{nc} == 0x003F or # ? ## XML5: Same as "Anything else" |
2808 |
|
$self->{nc} == -1) { |
2809 |
|
!!!parse-error (type => 'bare pio', ## TODO: type |
2810 |
|
line => $self->{line_prev}, |
2811 |
|
column => $self->{column_prev} |
2812 |
|
- 1 * ($self->{nc} != -1)); |
2813 |
|
$self->{state} = BOGUS_COMMENT_STATE; |
2814 |
|
## Reconsume. |
2815 |
|
$self->{ct} = {type => COMMENT_TOKEN, |
2816 |
|
data => '?', |
2817 |
|
line => $self->{line_prev}, |
2818 |
|
column => $self->{column_prev} |
2819 |
|
- 1 * ($self->{nc} != -1), |
2820 |
|
}; |
2821 |
|
redo A; |
2822 |
|
} else { |
2823 |
|
$self->{ct} = {type => PI_TOKEN, |
2824 |
|
target => chr $self->{nc}, |
2825 |
|
data => '', |
2826 |
|
line => $self->{line_prev}, |
2827 |
|
column => $self->{column_prev} - 1, |
2828 |
|
}; |
2829 |
|
$self->{state} = PI_TARGET_STATE; |
2830 |
|
!!!next-input-character; |
2831 |
|
redo A; |
2832 |
|
} |
2833 |
|
} elsif ($self->{state} == PI_TARGET_STATE) { |
2834 |
|
if ($is_space->{$self->{nc}}) { |
2835 |
|
$self->{state} = PI_TARGET_AFTER_STATE; |
2836 |
|
!!!next-input-character; |
2837 |
|
redo A; |
2838 |
|
} elsif ($self->{nc} == -1) { |
2839 |
|
!!!parse-error (type => 'no pic'); ## TODO: type |
2840 |
|
$self->{state} = DATA_STATE; |
2841 |
|
$self->{s_kwd} = ''; |
2842 |
|
## Reconsume. |
2843 |
|
!!!emit ($self->{ct}); # pi |
2844 |
|
redo A; |
2845 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
2846 |
|
$self->{state} = PI_AFTER_STATE; |
2847 |
|
!!!next-input-character; |
2848 |
|
redo A; |
2849 |
|
} else { |
2850 |
|
## XML5: typo ("tag name" -> "target") |
2851 |
|
$self->{ct}->{target} .= chr $self->{nc}; # pi |
2852 |
|
!!!next-input-character; |
2853 |
|
redo A; |
2854 |
|
} |
2855 |
|
} elsif ($self->{state} == PI_TARGET_AFTER_STATE) { |
2856 |
|
if ($is_space->{$self->{nc}}) { |
2857 |
|
## Stay in the state. |
2858 |
|
!!!next-input-character; |
2859 |
|
redo A; |
2860 |
|
} else { |
2861 |
|
$self->{state} = PI_DATA_STATE; |
2862 |
|
## Reprocess. |
2863 |
|
redo A; |
2864 |
|
} |
2865 |
|
} elsif ($self->{state} == PI_DATA_STATE) { |
2866 |
|
if ($self->{nc} == 0x003F) { # ? |
2867 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
2868 |
|
!!!next-input-character; |
2869 |
|
redo A; |
2870 |
|
} elsif ($self->{nc} == -1) { |
2871 |
|
!!!parse-error (type => 'no pic'); ## TODO: type |
2872 |
|
$self->{state} = DATA_STATE; |
2873 |
|
$self->{s_kwd} = ''; |
2874 |
|
## Reprocess. |
2875 |
|
!!!emit ($self->{ct}); # pi |
2876 |
|
redo A; |
2877 |
|
} else { |
2878 |
|
$self->{ct}->{data} .= chr $self->{nc}; # pi |
2879 |
|
$self->{read_until}->($self->{ct}->{data}, q[?], |
2880 |
|
length $self->{ct}->{data}); |
2881 |
|
## Stay in the state. |
2882 |
|
!!!next-input-character; |
2883 |
|
## Reprocess. |
2884 |
|
redo A; |
2885 |
|
} |
2886 |
|
} elsif ($self->{state} == PI_AFTER_STATE) { |
2887 |
|
if ($self->{nc} == 0x003E) { # > |
2888 |
|
$self->{state} = DATA_STATE; |
2889 |
|
$self->{s_kwd} = ''; |
2890 |
|
!!!next-input-character; |
2891 |
|
!!!emit ($self->{ct}); # pi |
2892 |
|
redo A; |
2893 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
2894 |
|
!!!parse-error (type => 'no s after target', ## TODO: type |
2895 |
|
line => $self->{line_prev}, |
2896 |
|
column => $self->{column_prev}); ## XML5: no error |
2897 |
|
$self->{ct}->{data} .= '?'; |
2898 |
|
$self->{state} = PI_DATA_AFTER_STATE; |
2899 |
|
!!!next-input-character; |
2900 |
|
redo A; |
2901 |
|
} else { |
2902 |
|
!!!parse-error (type => 'no s after target', ## TODO: type |
2903 |
|
line => $self->{line_prev}, |
2904 |
|
column => $self->{column_prev} |
2905 |
|
+ 1 * ($self->{nc} == -1)); ## XML5: no error |
2906 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
2907 |
|
$self->{state} = PI_DATA_STATE; |
2908 |
|
## Reprocess. |
2909 |
|
redo A; |
2910 |
|
} |
2911 |
|
} elsif ($self->{state} == PI_DATA_AFTER_STATE) { |
2912 |
|
## XML5: Same as "pi after state" in XML5 |
2913 |
|
if ($self->{nc} == 0x003E) { # > |
2914 |
|
$self->{state} = DATA_STATE; |
2915 |
|
$self->{s_kwd} = ''; |
2916 |
|
!!!next-input-character; |
2917 |
|
!!!emit ($self->{ct}); # pi |
2918 |
|
redo A; |
2919 |
|
} elsif ($self->{nc} == 0x003F) { # ? |
2920 |
|
$self->{ct}->{data} .= '?'; |
2921 |
|
## Stay in the state. |
2922 |
|
!!!next-input-character; |
2923 |
|
redo A; |
2924 |
|
} else { |
2925 |
|
$self->{ct}->{data} .= '?'; ## XML5: not appended |
2926 |
|
$self->{state} = PI_DATA_STATE; |
2927 |
|
## Reprocess. |
2928 |
|
redo A; |
2929 |
|
} |
2930 |
|
|
2931 |
} else { |
} else { |
2932 |
die "$0: $self->{state}: Unknown state"; |
die "$0: $self->{state}: Unknown state"; |
2933 |
} |
} |