164 |
|
|
165 |
@DISLang:role: DOMLS|ParserRole |
@DISLang:role: DOMLS|ParserRole |
166 |
|
|
167 |
|
@enDesc: |
168 |
|
Note that the <Class::ManakaiXMLParser> reports any XML errors |
169 |
|
(syntax errors and / or well-formedness constraint errors) |
170 |
|
via the <IF::DOMCore:error-handler> registered to |
171 |
|
the <A::DOMLS:LSParser.domConfig> object. Each error has |
172 |
|
its <A::DOMCore:DOMError.severity>, either <C::DOMCore:SEVERITY_ERROR> |
173 |
|
or <C::DOMCore:SEVERITY_FATAL_ERROR>. However, their semantics |
174 |
|
are slight different from the ones of <QUOTE::error> and |
175 |
|
<QUOTE::fatal error> in XML; in this implemenetation, |
176 |
|
<C::DOMCore:SEVERITY_ERROR> implies that the parsing process |
177 |
|
can effectively be continued to detect more errors while |
178 |
|
<C::DOMCore:SEVERITY_FATAL_ERROR> implies that the error |
179 |
|
is serious so that the result document tree and any errors |
180 |
|
might be far from the ones obtained when the error would not |
181 |
|
be found. |
182 |
|
|
183 |
@Attr: |
@Attr: |
184 |
@@Name: domConfig |
@@Name: domConfig |
185 |
@@enDesc: |
@@enDesc: |
238 |
$self->{char} = []; |
$self->{char} = []; |
239 |
$self->{token} = []; |
$self->{token} = []; |
240 |
$self->{source} = $sourceText; |
$self->{source} = $sourceText; |
241 |
|
$self->{xml_version} = '1.0'; |
242 |
|
$self->{standalone} = false; |
243 |
|
## Well-formedness constraint Entity Declared takes effect? |
244 |
|
$self->{line} = 1; |
245 |
|
$self->{column} = 1; |
246 |
|
|
247 |
__DEEP{ |
__DEEP{ |
248 |
$r = $self->_parse_DocumentEntity |
$r = $self->_parse_DocumentEntity |
269 |
|
|
270 |
if (length $char) { |
if (length $char) { |
271 |
$r = ord $char; |
$r = ord $char; |
272 |
|
if ($r == 0x000A) { |
273 |
|
$self->{line}++; |
274 |
|
$self->{column} = 1; |
275 |
|
} elsif ($r == 0x000D) { |
276 |
|
my $next_char = substr ($self->{source}, |
277 |
|
pos ($self->{source}), 1); |
278 |
|
if ($next_char eq "\x0A") { |
279 |
|
pos ($self->{source})++; |
280 |
|
$self->{column} = 1; |
281 |
|
} elsif ($next_char eq "\x85") { |
282 |
|
if ($self->{xml_version} eq '1.1') { |
283 |
|
pos ($self->{source})++; |
284 |
|
$self->{column} = 1; |
285 |
|
} else { |
286 |
|
$self->{column} = 0; |
287 |
|
} |
288 |
|
} else { |
289 |
|
$self->{column} = 1; |
290 |
|
} |
291 |
|
$r = 0x000A; |
292 |
|
$self->{line}++; |
293 |
|
} elsif ( |
294 |
|
not ((0x0020 <= $r and $r <= 0x007E) or |
295 |
|
(0x00A0 <= $r and $r <= 0xD7FF) or |
296 |
|
(0xE000 <= $r and $r <= 0xFFFD) or |
297 |
|
(0x10000 <= $r and $r <= 0x10FFFF)) and |
298 |
|
$r != 0x0009 and $r != 0x0085 and |
299 |
|
not ($self->{xml_version} eq '1.0' and |
300 |
|
(0x007F <= $r and $r <= 0x009F)) |
301 |
|
) { |
302 |
|
my $location = { |
303 |
|
utf32_offset => pos ($self->{source}), |
304 |
|
line_number => $self->{line}, |
305 |
|
column_number => $self->{column}, |
306 |
|
}; |
307 |
|
my $continue = __DOMCore:ERROR{xp|wf-legal-literal-character:: |
308 |
|
DOMCore|location => {$location}, |
309 |
|
xp|character-number => {$r}, |
310 |
|
}__; |
311 |
|
unless ($continue) { |
312 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
313 |
|
} |
314 |
|
$self->{has_error} = true; |
315 |
|
$self->{column}++; |
316 |
|
} elsif ($r == 0x0085 or $r == 0x2028) { |
317 |
|
$r = 0x000A if $self->{xml_version} eq '1.1'; |
318 |
|
$self->{line}++; |
319 |
|
$self->{column} = 1; |
320 |
|
} else { |
321 |
|
$self->{column}++; |
322 |
|
} |
323 |
} else { |
} else { |
324 |
$r = -1; |
$r = -1; |
325 |
} |
} |
386 |
*/ |
*/ |
387 |
?lexmode 'DocumentStart'; |
?lexmode 'DocumentStart'; |
388 |
|
|
389 |
&XMLDeclarationOpt ($doc => $doc); |
~? (XDO) { |
390 |
|
&_XMLDeclaration_ ($doc => $doc); |
391 |
?lexmode 'DocumentProlog'; |
|
392 |
|
~ (PIC) { |
393 |
|
?lexmode DocumentProlog; |
394 |
|
} else { |
395 |
|
?lexmode DocumentProlog; |
396 |
|
} |
397 |
|
} else { |
398 |
|
?lexmode 'DocumentProlog'; |
399 |
|
} |
400 |
|
|
401 |
// *Misc |
// *Misc |
402 |
~* (CDO) { |
~* (CDO) { |
423 |
~? (MDO) { |
~? (MDO) { |
424 |
&_DocumentTypeDeclaration_ ($doc => $doc); |
&_DocumentTypeDeclaration_ ($doc => $doc); |
425 |
|
|
426 |
~ (MDC) { } |
~ (MDC) { |
427 |
|
?lexmode DocumentMisc; |
428 |
|
} else { |
429 |
|
?lexmode DocumentMisc; |
430 |
|
} |
431 |
|
} else { |
432 |
|
lang:Perl { |
433 |
|
$self->{standalone} = true; |
434 |
|
} |
435 |
|
?lexmode DocumentMisc; |
436 |
} |
} |
437 |
|
|
|
?lexmode 'DocumentMisc'; |
|
|
|
|
438 |
// *Misc |
// *Misc |
439 |
~* (CDO) { |
~* (CDO) { |
440 |
&_CommentDeclaration_ ($doc => $doc, $parent => $doc); |
&_CommentDeclaration_ ($doc => $doc, $parent => $doc); |
512 |
NOTE: XML declaration is optional in XML 1.0 |
NOTE: XML declaration is optional in XML 1.0 |
513 |
while it is required in XML 1.1. |
while it is required in XML 1.1. |
514 |
*/ |
*/ |
515 |
|
rule _XMLDeclaration_ ($doc) { |
516 |
|
?lexmode XMLDeclaration; |
517 |
|
|
518 |
rule XMLDeclarationOpt ($doc) { |
~ (S) { } |
519 |
~? (XDO) { |
|
520 |
&_XMLDeclaration ($doc => $doc); |
~ (Name == 'version') { |
521 |
|
~? (S) { } |
522 |
|
~ (VI) { } |
523 |
|
~? (S) { } |
524 |
|
|
525 |
|
my $ver; |
526 |
|
my $bad_token; |
527 |
|
|
528 |
|
~ (LIT) { |
529 |
|
?lexmode AttributeValueLiteral; |
530 |
|
|
531 |
|
~ (STRING) { |
532 |
|
lang:Perl ($version => $token.value) { |
533 |
|
$ver = $version; |
534 |
|
$bad_token = $token; |
535 |
|
} |
536 |
|
} |
537 |
|
|
538 |
|
~ (LIT) { |
539 |
|
?lexmode XMLDeclaration; |
540 |
|
} |
541 |
|
} (LITA) { |
542 |
|
?lexmode AttributeValueLiteralA; |
543 |
|
|
544 |
|
~ (STRING) { |
545 |
|
lang:Perl ($version => $token.value) { |
546 |
|
$ver = $version; |
547 |
|
$bad_token = $token; |
548 |
|
} |
549 |
|
} |
550 |
|
|
551 |
|
~ (LITA) { |
552 |
|
?lexmode XMLDeclaration; |
553 |
|
} |
554 |
|
} |
555 |
|
|
556 |
|
lang:Perl : has-error { |
557 |
|
unless ($ver eq '1.0' or $ver eq '1.1') { |
558 |
|
my $location; |
559 |
|
__CODE{xp|get-location-from-token:: |
560 |
|
$token => {$bad_token}, |
561 |
|
$result => {$location}, |
562 |
|
}__; |
563 |
|
my $continue = __DOMCore:ERROR{xp|wf-unsupported-xml-version:: |
564 |
|
DOMCore|location => {$location}, |
565 |
|
xp|parent => {$doc}, |
566 |
|
infoset|version => {$ver}, |
567 |
|
xp|error-token => {$bad_token}, |
568 |
|
}__; |
569 |
|
unless ($continue) { |
570 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
571 |
|
} |
572 |
|
$self->{has_error} = true; |
573 |
|
} |
574 |
|
$doc-><AS::Document.xmlVersion> ($ver); |
575 |
|
$self->{xml_version} = $ver; |
576 |
|
} |
577 |
|
|
578 |
|
~? (S) { } |
579 |
} |
} |
|
} // XMLDeclarationOpt |
|
580 |
|
|
581 |
rule XMLDeclaration ($doc) { |
~? (Name == 'encoding') { |
582 |
~ (XDO) { |
~? (S) { } |
583 |
&_XMLDeclaration ($doc => $doc); |
~ (VI) { } |
584 |
|
~? (S) { } |
585 |
|
|
586 |
|
my $ver; |
587 |
|
my $bad_token; |
588 |
|
|
589 |
|
~ (LIT) { |
590 |
|
?lexmode AttributeValueLiteral; |
591 |
|
|
592 |
|
~ (STRING) { |
593 |
|
lang:Perl ($version => $token.value) { |
594 |
|
$ver = $version; |
595 |
|
$bad_token = $token; |
596 |
|
} |
597 |
|
} |
598 |
|
|
599 |
|
~ (LIT) { |
600 |
|
?lexmode XMLDeclaration; |
601 |
|
} |
602 |
|
} (LITA) { |
603 |
|
?lexmode AttributeValueLiteralA; |
604 |
|
|
605 |
|
~ (STRING) { |
606 |
|
lang:Perl ($version => $token.value) { |
607 |
|
$ver = $version; |
608 |
|
$bad_token = $token; |
609 |
|
} |
610 |
|
} |
611 |
|
|
612 |
|
~ (LITA) { |
613 |
|
?lexmode XMLDeclaration; |
614 |
|
} |
615 |
|
} |
616 |
|
|
617 |
|
lang:Perl : has-error { |
618 |
|
unless ($ver =~ /\A[A-Za-z][A-Za-z0-9._-]*\z/) { |
619 |
|
my $location; |
620 |
|
__CODE{xp|get-location-from-token:: |
621 |
|
$token => {$bad_token}, |
622 |
|
$result => {$location}, |
623 |
|
}__; |
624 |
|
my $continue = __DOMCore:ERROR{xp|wf-malformed-enc-name:: |
625 |
|
DOMCore|location => {$location}, |
626 |
|
xp|parent => {$doc}, |
627 |
|
xp|name => {$ver}, |
628 |
|
xp|error-token => {$bad_token}, |
629 |
|
}__; |
630 |
|
unless ($continue) { |
631 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
632 |
|
} |
633 |
|
$self->{has_error} = true; |
634 |
|
} |
635 |
|
$doc-><AS::Document.xmlEncoding> ($ver); |
636 |
|
} |
637 |
|
|
638 |
|
~? (S) { } |
639 |
} |
} |
640 |
} // XMLDeclaration |
|
641 |
|
~? (Name == 'standalone') { |
642 |
|
~? (S) { } |
643 |
|
~ (VI) { } |
644 |
|
~? (S) { } |
645 |
|
|
646 |
|
my $ver; |
647 |
|
my $bad_token; |
648 |
|
|
649 |
|
~ (LIT) { |
650 |
|
?lexmode AttributeValueLiteral; |
651 |
|
|
652 |
|
~ (STRING) { |
653 |
|
lang:Perl ($version => $token.value) { |
654 |
|
$ver = $version; |
655 |
|
$bad_token = $token; |
656 |
|
} |
657 |
|
} |
658 |
|
|
659 |
rule _XMLDeclaration ($doc) { |
~ (LIT) { |
660 |
?lexmode 'TAG'; |
?lexmode XMLDeclaration; |
661 |
|
} |
662 |
// TODO: implement this |
} (LITA) { |
663 |
|
?lexmode AttributeValueLiteralA; |
664 |
} // _XMLDeclaration |
|
665 |
|
~ (STRING) { |
666 |
|
lang:Perl ($version => $token.value) { |
667 |
|
$ver = $version; |
668 |
|
$bad_token = $token; |
669 |
|
} |
670 |
|
} |
671 |
|
|
672 |
|
~ (LITA) { |
673 |
|
?lexmode XMLDeclaration; |
674 |
|
} |
675 |
|
} |
676 |
|
|
677 |
|
lang:Perl : has-error { |
678 |
|
unless ($ver eq 'yes' or $ver eq 'no') { |
679 |
|
my $location; |
680 |
|
__CODE{xp|get-location-from-token:: |
681 |
|
$token => {$bad_token}, |
682 |
|
$result => {$location}, |
683 |
|
}__; |
684 |
|
my $continue = __DOMCore:ERROR{xp|wf-malformed-xml-standalone:: |
685 |
|
DOMCore|location => {$location}, |
686 |
|
xp|parent => {$doc}, |
687 |
|
xp|name => {$ver}, |
688 |
|
xp|error-token => {$bad_token}, |
689 |
|
}__; |
690 |
|
unless ($continue) { |
691 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
692 |
|
} |
693 |
|
$self->{has_error} = true; |
694 |
|
} |
695 |
|
$doc-><AS::Document.xmlStandalone> ($ver); |
696 |
|
$self->{standalone} = true if $ver eq 'yes'; |
697 |
|
} |
698 |
|
|
699 |
|
~? (S) { } |
700 |
|
} |
701 |
|
|
702 |
|
// ~ (PIC) { } |
703 |
|
} // _XMLDeclaration_ |
704 |
|
|
705 |
/* |
/* |
706 |
Document Type Declaration |
Document Type Declaration |
719 |
|
|
720 |
// TODO: Implement this |
// TODO: Implement this |
721 |
|
|
722 |
|
// TODO: set $self->{standalone} true if only internal subset |
723 |
|
// with no param ref |
724 |
|
|
725 |
// ~ (MDC) { } |
// ~ (MDC) { } |
726 |
} // _DocumentTypeDeclaration_ |
} // _DocumentTypeDeclaration_ |
727 |
|
|
767 |
my $pi; |
my $pi; |
768 |
|
|
769 |
~ (Name) { |
~ (Name) { |
770 |
lang:Perl ($name => $token.value) { |
lang:Perl ($name => $token.value) : has-error { |
771 |
if (lc $name eq 'xml') { |
if (lc $name eq 'xml') { |
772 |
## TODO: Well-formedness (syntax) error |
my $location; |
773 |
|
__CODE{xp|get-location-from-token:: |
774 |
|
$token => {$token}, |
775 |
|
$result => {$location}, |
776 |
|
}__; |
777 |
|
my $continue = __DOMCore:ERROR{xp|wf-pi-target-is-xml:: |
778 |
|
xp|name => {$name}, |
779 |
|
DOMCore|location => {$location}, |
780 |
|
xp|parent => {$parent}, |
781 |
|
}__; |
782 |
|
unless ($continue) { |
783 |
|
__EXCEPTION{DOMLS|PARSE_ERR:: |
784 |
|
}__; |
785 |
|
} |
786 |
|
$self->{has_error} = true; |
787 |
} |
} |
788 |
## TODO: Namespace well-formedness |
## TODO: Namespace well-formedness |
789 |
$pi = $doc-><M::Document.createProcessingInstruction> |
$pi = $doc-><M::Document.createProcessingInstruction> |
853 |
(CharData) { |
(CharData) { |
854 |
// Character data |
// Character data |
855 |
lang:Perl ($data => $token.value) { |
lang:Perl ($data => $token.value) { |
|
if (index ($data, ']]>') > -1) { |
|
|
## TODO: Well-formedness (syntax) error |
|
|
} |
|
856 |
$node-><M::Node.appendChild> |
$node-><M::Node.appendChild> |
857 |
($doc-><M::Document.createTextNode> ($data)); |
($doc-><M::Document.createTextNode> ($data)); |
858 |
} |
} |
1000 |
my $is_docel; |
my $is_docel; |
1001 |
|
|
1002 |
~ (Name) { |
~ (Name) { |
1003 |
lang:Perl ($name => $token.value) { |
lang:Perl ($name => $token.value) : has-error { |
1004 |
if ($name eq $type) { |
if ($name eq $type) { |
1005 |
$type = pop @{$types}; |
$type = pop @{$types}; |
1006 |
if ($type eq '') { |
if ($type eq '') { |
1009 |
$node = pop @{$nodes}; |
$node = pop @{$nodes}; |
1010 |
$ns = pop @{$nses}; |
$ns = pop @{$nses}; |
1011 |
} else { |
} else { |
1012 |
## TODO: Element type match well-formedness error |
my $location; |
1013 |
|
__CODE{xp|get-location-from-token:: |
1014 |
|
$token => $token, |
1015 |
|
$result => $location, |
1016 |
|
}__; |
1017 |
|
my $continue = __DOMCore:ERROR{xp|wf-element-type-match:: |
1018 |
|
DOMCore:location => {$location}, |
1019 |
|
xp|token => {$token}, |
1020 |
|
xp|expected-element-type => {$type}, |
1021 |
|
xp|actual-element-type => {$name}, |
1022 |
|
xp|node => {$node}, |
1023 |
|
}__; |
1024 |
|
unless ($continue) { |
1025 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1026 |
|
} |
1027 |
|
$self->{has_error} = true; |
1028 |
} |
} |
1029 |
} |
} |
1030 |
} |
} |
1032 |
~? (S) { } |
~? (S) { } |
1033 |
|
|
1034 |
if-true ($is_docel) { |
if-true ($is_docel) { |
1035 |
lang:Perl { |
lang:Perl : has-error { |
1036 |
if (@{$types}) { |
if (@{$types}) { |
1037 |
## WF error |
my $location; |
1038 |
|
__CODE{xp|get-location-from-token:: |
1039 |
|
$token => $token, |
1040 |
|
$result => $location, |
1041 |
|
}__; |
1042 |
|
for my $type (reverse @{$types}) { |
1043 |
|
my $continue = __DOMCore:ERROR{xp|wf-no-end-tag:: |
1044 |
|
DOMCore:location => {$location}, |
1045 |
|
xp|token => {$token}, |
1046 |
|
xp|expected-element-type => {$type}, |
1047 |
|
xp|node => {$node}, |
1048 |
|
}__; |
1049 |
|
unless ($continue) { |
1050 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1051 |
|
} |
1052 |
|
$node = shift @{$nodes}; |
1053 |
|
} |
1054 |
|
$self->{has_error} = true; |
1055 |
} |
} |
1056 |
} |
} |
1057 |
return; |
return; |
1115 |
?lexmode ElementContent; |
?lexmode ElementContent; |
1116 |
} |
} |
1117 |
} |
} |
1118 |
|
|
1119 |
|
~ (#NONE) { } |
1120 |
} // Element_ |
} // Element_ |
1121 |
|
|
1122 |
rule AttributeSpecificationList ($doc, $attrs) |
rule AttributeSpecificationList ($doc, $attrs) |
1135 |
$atqname = $name; |
$atqname = $name; |
1136 |
} |
} |
1137 |
|
|
|
~? (S) { } |
|
|
~ (VI) { } |
|
|
~? (S) { } |
|
|
|
|
1138 |
my $vals; |
my $vals; |
1139 |
lang:Perl { |
lang:Perl { |
1140 |
if ($attrs->{$atqname}) { |
if ($attrs->{$atqname}) { |
1141 |
## TODO: Unique attr name well-formedness error |
my $location; |
1142 |
|
__CODE{xp|get-location-from-token:: |
1143 |
|
$token => $token, |
1144 |
|
$result => $location, |
1145 |
|
}__; |
1146 |
|
my $continue = __DOMCore:ERROR{xp|wf-unique-att-spec:: |
1147 |
|
DOMCore:location => {$location}, |
1148 |
|
xp|token => {$token}, |
1149 |
|
xp|name => {$atqname}, |
1150 |
|
}__; |
1151 |
|
unless ($continue) { |
1152 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1153 |
|
} |
1154 |
|
$self->{has_error} = true; |
1155 |
} |
} |
1156 |
|
|
1157 |
$vals = $attrs->{$atqname} = { |
$vals = $attrs->{$atqname} = { |
1160 |
index => $i++, |
index => $i++, |
1161 |
}; |
}; |
1162 |
} |
} |
1163 |
|
|
1164 |
|
~? (S) { } |
1165 |
|
~ (VI) { } |
1166 |
|
~? (S) { } |
1167 |
|
|
1168 |
~ (LIT) { |
~ (LIT) { |
1169 |
&_AttributeValueSpecification_ |
&_AttributeValueSpecification_ |
1306 |
?lexmode 'NumericCharacterReference'; |
?lexmode 'NumericCharacterReference'; |
1307 |
|
|
1308 |
~ (NUMBER) { |
~ (NUMBER) { |
1309 |
lang:Perl ($num => $token.value) { |
lang:Perl ($num => $token.value) : has-error { |
1310 |
## TODO: [WFC: Lecal Character] |
$num += 0; |
1311 |
my $ncr = $doc-><M::Document.createTextNode> |
unless ( |
1312 |
|
($self->{xml_version} eq '1.0' and |
1313 |
|
((0x0020 <= $num and $num <= 0xD7FF) or |
1314 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1315 |
|
(0x10000 <= $num and $num <= 0x10FFFF) or |
1316 |
|
$num == 0x9 or $num == 0xA or $num == 0xD)) or |
1317 |
|
($self->{xml_version} eq '1.1' and |
1318 |
|
((0x0001 <= $num and $num <= 0xD7FF) or |
1319 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1320 |
|
(0x10000 <= $num and $num <= 0x10FFFF))) |
1321 |
|
) { |
1322 |
|
my $location; |
1323 |
|
__CODE{xp|get-location-from-token:: |
1324 |
|
$token => $token, |
1325 |
|
$result => $location, |
1326 |
|
}__; |
1327 |
|
my $continue = __DOMCore:ERROR{xp|wf-legal-character:: |
1328 |
|
DOMCore:location => {$location}, |
1329 |
|
xp|token => {$token}, |
1330 |
|
xp|character-number => {$num}, |
1331 |
|
xp|parent => {$parent}, |
1332 |
|
}__; |
1333 |
|
unless ($continue) { |
1334 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1335 |
|
} |
1336 |
|
$self->{has_error} = true; |
1337 |
|
} |
1338 |
|
my $ncr = $doc-><M::Document.createTextNode> (chr $num); |
1339 |
(chr (0+$num)); |
(chr (0+$num)); |
1340 |
$parent-><M::Node.appendChild> ($ncr); |
$parent-><M::Node.appendChild> ($ncr); |
1341 |
} |
} |
1348 |
?lexmode 'NumericCharacterReference'; |
?lexmode 'NumericCharacterReference'; |
1349 |
|
|
1350 |
~ (NUMBER) { |
~ (NUMBER) { |
1351 |
lang:Perl ($num => $token.value) { |
lang:Perl ($num => $token.value) : has-error { |
1352 |
## TODO: [WFC: Lecal Character] |
$num += 0; |
1353 |
|
unless ( |
1354 |
|
($self->{xml_version} eq '1.0' and |
1355 |
|
((0x0020 <= $num and $num <= 0xD7FF) or |
1356 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1357 |
|
(0x10000 <= $num and $num <= 0x10FFFF) or |
1358 |
|
$num == 0x9 or $num == 0xA or $num == 0xD)) or |
1359 |
|
($self->{xml_version} eq '1.1' and |
1360 |
|
((0x0001 <= $num and $num <= 0xD7FF) or |
1361 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1362 |
|
(0x10000 <= $num and $num <= 0x10FFFF))) |
1363 |
|
) { |
1364 |
|
my $location; |
1365 |
|
__CODE{xp|get-location-from-token:: |
1366 |
|
$token => $token, |
1367 |
|
$result => $location, |
1368 |
|
}__; |
1369 |
|
my $continue = __DOMCore:ERROR{xp|wf-legal-character:: |
1370 |
|
DOMCore:location => {$location}, |
1371 |
|
xp|token => {$token}, |
1372 |
|
xp|character-number => {$num}, |
1373 |
|
}__; |
1374 |
|
unless ($continue) { |
1375 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1376 |
|
} |
1377 |
|
$self->{has_error} = true; |
1378 |
|
} |
1379 |
my $ncr = $doc-><M::Document.createTextNode> |
my $ncr = $doc-><M::Document.createTextNode> |
1380 |
(my $char = chr (0+$num)); |
(my $char = chr (0+$num)); |
1381 |
push @{$vals->{nodes}}, $ncr; |
push @{$vals->{nodes}}, $ncr; |
1390 |
?lexmode 'HexadecimalCharacterReference'; |
?lexmode 'HexadecimalCharacterReference'; |
1391 |
|
|
1392 |
~ (Hex) { |
~ (Hex) { |
1393 |
lang:Perl ($num => $token.value) { |
lang:Perl ($num => $token.value) : has-error { |
1394 |
## TODO: [WFC: Lecal Character] |
$num += 0; |
1395 |
|
unless ( |
1396 |
|
($self->{xml_version} eq '1.0' and |
1397 |
|
((0x0020 <= $num and $num <= 0xD7FF) or |
1398 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1399 |
|
(0x10000 <= $num and $num <= 0x10FFFF) or |
1400 |
|
$num == 0x9 or $num == 0xA or $num == 0xD)) or |
1401 |
|
($self->{xml_version} eq '1.1' and |
1402 |
|
((0x0001 <= $num and $num <= 0xD7FF) or |
1403 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1404 |
|
(0x10000 <= $num and $num <= 0x10FFFF))) |
1405 |
|
) { |
1406 |
|
my $location; |
1407 |
|
__CODE{xp|get-location-from-token:: |
1408 |
|
$token => $token, |
1409 |
|
$result => $location, |
1410 |
|
}__; |
1411 |
|
my $continue = __DOMCore:ERROR{xp|wf-legal-character:: |
1412 |
|
DOMCore:location => {$location}, |
1413 |
|
xp|token => {$token}, |
1414 |
|
xp|character-number => {$num}, |
1415 |
|
xp|parent => {$parent}, |
1416 |
|
}__; |
1417 |
|
unless ($continue) { |
1418 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1419 |
|
} |
1420 |
|
$self->{has_error} = true; |
1421 |
|
} |
1422 |
my $ncr = $doc-><M::Document.createTextNode> |
my $ncr = $doc-><M::Document.createTextNode> |
1423 |
(chr hex $num); |
(chr hex $num); |
1424 |
$parent-><M::Node.appendChild> ($ncr); |
$parent-><M::Node.appendChild> ($ncr); |
1428 |
// ~ (REFC) { } |
// ~ (REFC) { } |
1429 |
} // _HexadecimalCharacterReference_ |
} // _HexadecimalCharacterReference_ |
1430 |
|
|
1431 |
rule _HexadecimalCharacterReferenceV_ ($doc, $parent) { |
rule _HexadecimalCharacterReferenceV_ ($doc, $vals) { |
1432 |
?lexmode 'HexadecimalCharacterReference'; |
?lexmode 'HexadecimalCharacterReference'; |
1433 |
|
|
1434 |
~ (Hex) { |
~ (Hex) { |
1435 |
lang:Perl ($num => $token.value) { |
lang:Perl ($num => $token.value) : has-error { |
1436 |
## TODO: [WFC: Lecal Character] |
$num += 0; |
1437 |
|
unless ( |
1438 |
|
($self->{xml_version} eq '1.0' and |
1439 |
|
((0x0020 <= $num and $num <= 0xD7FF) or |
1440 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1441 |
|
(0x10000 <= $num and $num <= 0x10FFFF) or |
1442 |
|
$num == 0x9 or $num == 0xA or $num == 0xD)) or |
1443 |
|
($self->{xml_version} eq '1.1' and |
1444 |
|
((0x0001 <= $num and $num <= 0xD7FF) or |
1445 |
|
(0xE000 <= $num and $num <= 0xFFFD) or |
1446 |
|
(0x10000 <= $num and $num <= 0x10FFFF))) |
1447 |
|
) { |
1448 |
|
my $location; |
1449 |
|
__CODE{xp|get-location-from-token:: |
1450 |
|
$token => $token, |
1451 |
|
$result => $location, |
1452 |
|
}__; |
1453 |
|
my $continue = __DOMCore:ERROR{xp|wf-legal-character:: |
1454 |
|
DOMCore:location => {$location}, |
1455 |
|
xp|token => {$token}, |
1456 |
|
xp|character-number => {$num}, |
1457 |
|
}__; |
1458 |
|
unless ($continue) { |
1459 |
|
__EXCEPTION{DOMLS|PARSE_ERR}__; |
1460 |
|
} |
1461 |
|
$self->{has_error} = true; |
1462 |
|
} |
1463 |
my $ncr = $doc-><M::Document.createTextNode> |
my $ncr = $doc-><M::Document.createTextNode> |
1464 |
(my $char = chr hex $num); |
(my $char = chr hex $num); |
1465 |
push @{$vals->{nodes}}, $ncr; |
push @{$vals->{nodes}}, $ncr; |
1995 |
REFC := [';']; |
REFC := [';']; |
1996 |
} // HexadecimalCharacterReference |
} // HexadecimalCharacterReference |
1997 |
|
|
1998 |
|
lexmode XMLDeclaration |
1999 |
|
: standalone |
2000 |
|
: extends => 'Name' |
2001 |
|
: extends => 'S' |
2002 |
|
{ |
2003 |
|
|
2004 |
|
/* |
2005 |
|
Value indicator |
2006 |
|
*/ |
2007 |
|
VI := ['=']; |
2008 |
|
|
2009 |
|
/* |
2010 |
|
Literal open |
2011 |
|
*/ |
2012 |
|
LIT := ['"']; |
2013 |
|
LITA := [U+0027]; |
2014 |
|
|
2015 |
|
/* |
2016 |
|
Processing instruction close |
2017 |
|
*/ |
2018 |
|
PIC := ['?'] ['>']; |
2019 |
|
} // XMLDeclaration |
2020 |
|
|
2021 |
lexmode StartTag |
lexmode StartTag |
2022 |
: standalone |
: standalone |
2023 |
: extends => 'Name' |
: extends => 'Name' |
2068 |
: extends => 'AttributeValueLiteral_' |
: extends => 'AttributeValueLiteral_' |
2069 |
{ |
{ |
2070 |
LIT := ['"']; |
LIT := ['"']; |
2071 |
STRING : value := [^'"' '&' '<']; |
STRING : value := [^'"' '&' '<']+; |
2072 |
} // AttributeValueLiteral |
} // AttributeValueLiteral |
2073 |
|
|
2074 |
lexmode AttributeValueLiteralA |
lexmode AttributeValueLiteralA |
2076 |
: extends => 'AttributeValueLiteral_' |
: extends => 'AttributeValueLiteral_' |
2077 |
{ |
{ |
2078 |
LIT := [U+0027]; |
LIT := [U+0027]; |
2079 |
STRING : value := [^U+0027 '&' '<']; |
STRING : value := [^U+0027 '&' '<']+; |
2080 |
} // AttributeValueLiteralA |
} // AttributeValueLiteralA |
2081 |
|
|
2082 |
token-error default : default { |
token-error default : default { |
2083 |
lang:Perl { |
lang:Perl { |
2084 |
my $location = { |
my $location; |
2085 |
utf32_offset => pos ($self->{source}), |
__CODE{xp|get-location-from-token:: |
2086 |
}; |
$token => {$token}, |
2087 |
my $continue = __DOMCore:ERROR{xp|bad-token-error:: |
$result => {$location}, |
2088 |
|
}__; |
2089 |
|
my $continue = __DOMCore:ERROR{xp|wf-syntax-error:: |
2090 |
xp|error-token => {$token}, |
xp|error-token => {$token}, |
2091 |
DOMCore|location => {$location}, |
DOMCore|location => {$location}, |
|
xp|source-text => {\($self->{source})}, |
|
2092 |
}__; |
}__; |
2093 |
unless ($continue) { |
unless ($continue) { |
2094 |
__EXCEPTION{DOMLS|PARSE_ERR:: |
__EXCEPTION{DOMLS|PARSE_ERR:: |
2099 |
} // default |
} // default |
2100 |
##ManakaiXMLParser |
##ManakaiXMLParser |
2101 |
|
|
2102 |
|
ResourceDef: |
2103 |
|
@QName: xp|get-location-from-token |
2104 |
|
@rdf:type: DISPerl|BlockCode |
2105 |
|
@enDesc: |
2106 |
|
Creates a <IF::DOMCore:DOMLocator> object from a token. |
2107 |
|
@PerlDef: |
2108 |
|
$result = { |
2109 |
|
utf32_offset => pos ($self->{source}), |
2110 |
|
}; |
2111 |
|
@For: ManakaiDOM|ManakaiDOM3 |
2112 |
|
|
2113 |
ElementTypeBinding: |
ElementTypeBinding: |
2114 |
@Name: RuleDef |
@Name: RuleDef |
2141 |
@@For: ManakaiDOM|DOM3 |
@@For: ManakaiDOM|DOM3 |
2142 |
@@ecore:textFormatter: ManakaiXMLParserExceptionFormatter |
@@ecore:textFormatter: ManakaiXMLParserExceptionFormatter |
2143 |
|
|
2144 |
ErrDef: |
ElementTypeBinding: |
2145 |
@QName: xp|bad-token-error |
@Name: WFErrDef |
2146 |
|
@ElementType: |
2147 |
|
dis:ResourceDef |
2148 |
|
@ShadowContent: |
2149 |
|
@@rdf:type: DOMCore|DOMErrorType |
2150 |
|
@@For: ManakaiDOM|DOM3 |
2151 |
|
@@ecore:textFormatter: ManakaiXMLParserExceptionFormatter |
2152 |
|
|
2153 |
|
WFErrDef: |
2154 |
|
@QName: xp|wf-syntax-error |
2155 |
@enDesc: |
@enDesc: |
2156 |
The parser is encountered to a token whose type is not |
The entity does not match to the production rule; it is not |
2157 |
allowed there. |
well-formed. |
2158 |
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
@DOMCore:severity: DOMCore|SEVERITY_FATAL_ERROR |
2159 |
@enMufDef: |
@enMufDef: |
2160 |
Token |%xp-error-token-type;|%xp-error-token-value |
|%xp-error-token-type;|%xp-error-token-value |
2161 |
(prefix => { (|}, suffix => {|)}); is not |
(prefix => { (|}, suffix => {|)}); is not |
2162 |
allowed %xp-error-lines (prefix => {(|}, suffix => {|)}); |
allowed%xp-error-lines (prefix => { (|}, suffix => {|)}); |
2163 |
|
@ecore:hasParameter: |
2164 |
|
@@@: xp|error-token |
2165 |
|
@@enDesc: |
2166 |
|
The token that is not allowed. |
2167 |
|
|
2168 |
|
WFErrDef: |
2169 |
|
@QName: xp|wf-pi-target-is-xml |
2170 |
|
@enDesc: |
2171 |
|
A processing instruction has its <CODE::PITarget> of |
2172 |
|
<XML::xml> (in any case) which is not allowed. |
2173 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2174 |
|
@enMufDef: |
2175 |
|
Processing instruction target name cannot be |%p |
2176 |
|
(name => {<Q::xp|name>});| |
2177 |
|
@ecore:hasParameter: |
2178 |
|
@@@: xp|error-token |
2179 |
|
@@enDesc: |
2180 |
|
The token that contains the name. |
2181 |
|
@ecore:hasParameter: |
2182 |
|
@@@: xp|name |
2183 |
|
@@enDesc: |
2184 |
|
A string that is specified as target name of the |
2185 |
|
processing instruction. |
2186 |
|
@ecore:hasParameter: xp|parent |
2187 |
|
|
2188 |
PropDef: |
WFErrDef: |
2189 |
|
@QName: xp|wf-no-end-tag |
2190 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2191 |
|
@enDesc: |
2192 |
|
An end-tag is not found. |
2193 |
|
@enMufDef: |
2194 |
|
End-tag |</%p (name => {<Q::xp|expected-element-type>});>| is required |
2195 |
|
@ecore:hasParameter: xp|error-token |
2196 |
|
@ecore:hasParameter: |
2197 |
|
@@@: xp|node |
2198 |
|
@@enDesc: |
2199 |
|
The element node that is not closed. |
2200 |
|
@ecore:hasParameter: |
2201 |
|
@@@: xp|expected-element-type |
2202 |
|
@@enDesc: |
2203 |
|
The element type name of the element that is not closed. |
2204 |
|
|
2205 |
|
WFErrDef: |
2206 |
|
@QName: xp|wf-unsupported-xml-version |
2207 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2208 |
|
@enDesc: |
2209 |
|
The XML version specified in the version declaration is not supported. |
2210 |
|
@enMufDef: |
2211 |
|
XML version |%p (name => {<Q::infoset|version>});| is not supported |
2212 |
|
@ecore:hasParameter: xp|bad-token |
2213 |
|
@ecore:hasParameter: |
2214 |
|
@@@: xp|parent |
2215 |
|
@@enDesc: |
2216 |
|
The document node. |
2217 |
|
@ecore:hasParameter: |
2218 |
|
@@@: infoset|version |
2219 |
|
@@enDesc: |
2220 |
|
The specified XML version. |
2221 |
|
|
2222 |
|
WFErrDef: |
2223 |
|
@QName: xp|wf-malformed-enc-name |
2224 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2225 |
|
@enDesc: |
2226 |
|
An <XA::encoding> pseudo-attribute value does not match |
2227 |
|
to the procduction rule <CODE::EncName>. |
2228 |
|
@enMufDef: |
2229 |
|
Encoding name |%p (name => {<Q::xp|name>});| is not allowed |
2230 |
|
@ecore:hasParameter: xp|error-token |
2231 |
|
@ecore:hasParameter: |
2232 |
|
@@@: xp|parent |
2233 |
|
@@enDesc: The document node. |
2234 |
|
@ecore:hasParameter: |
2235 |
|
@@@: xp|name |
2236 |
|
@@enDesc: |
2237 |
|
The <XA::encoding> value. |
2238 |
|
|
2239 |
|
WFErrDef: |
2240 |
|
@QName: xp|wf-malformed-xml-standalone |
2241 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2242 |
|
@enDesc: |
2243 |
|
An <XA::standalone> pseudo-attribute value is neither <XML::yes> |
2244 |
|
or <XML::no>. |
2245 |
|
@enMufDef: |
2246 |
|
|standalone| pseudo-attribute value |%p (name => {<Q::xp|name>});| |
2247 |
|
is not allowed |
2248 |
|
@ecore:hasParameter: xp|error-token |
2249 |
|
@ecore:hasParameter: |
2250 |
|
@@@: xp|parent |
2251 |
|
@@enDesc: The document node. |
2252 |
|
@ecore:hasParameter: |
2253 |
|
@@@: xp|name |
2254 |
|
@@enDesc: |
2255 |
|
The <XA::standalone> value. |
2256 |
|
|
2257 |
|
WFErrDef: |
2258 |
|
@QName: xp|wf-legal-literal-character |
2259 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2260 |
|
@enDesc: |
2261 |
|
Each character in XML entity must match to the production |
2262 |
|
rule <CODE::Char - RestrictedChar>. |
2263 |
|
@enMufDef: |
2264 |
|
Character %character-code-point |
2265 |
|
(v => {<Q::xp|character-number>}); is not allowed |
2266 |
|
@ecore:hasParameter: |
2267 |
|
@@@: xp|character-number |
2268 |
|
@@enDesc: |
2269 |
|
The code position of the character being referred. |
2270 |
|
|
2271 |
|
WFErrDef: |
2272 |
|
@QName: xp|wf-element-type-match |
2273 |
|
@DOMCore:severity: DOMCore|SEVERITY_FATAL_ERROR |
2274 |
|
@enDesc: |
2275 |
|
The <CODE::Name> in an element's end-tag must match the element type |
2276 |
|
in the start-tag. |
2277 |
|
@enMufDef: |
2278 |
|
End-tag |</%p (name => {<Q::xp|actual-element-type>});>| does |
2279 |
|
not match to start-tag |<%p (name => {<Q::xp|expected-element-type>});>| |
2280 |
|
@ecore:hasParameter: xp|error-token |
2281 |
|
@ecore:hasParameter: |
2282 |
|
@@@: xp|node |
2283 |
|
@@enDesc: |
2284 |
|
The current opening element node. |
2285 |
|
@ecore:hasParameter: |
2286 |
|
@@@: xp|expected-element-type |
2287 |
|
@@enDesc: |
2288 |
|
The element type name of the current element. |
2289 |
|
@ecore:hasParameter: |
2290 |
|
@@@: xp|actual-element-type |
2291 |
|
@@enDesc: |
2292 |
|
The <CODE::Name> occurs in the end-tag. |
2293 |
|
|
2294 |
|
WFErrDef: |
2295 |
|
@QName: xp|wf-unique-att-spec |
2296 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2297 |
|
@enDesc: |
2298 |
|
An attribute name <kwd:MUST-NOT> appear more than once in |
2299 |
|
the same start-tag or empty-element tag. |
2300 |
|
@enMufDef: |
2301 |
|
Attribute |%p (name => {<Q::xp|name>});| is specified more |
2302 |
|
than once in the same tag |
2303 |
|
@ecore:hasParameter: xp|error-token |
2304 |
|
@ecore:hasParameter: |
2305 |
|
@@@: xp|name |
2306 |
|
@@enDesc: |
2307 |
|
The name of the attribute. |
2308 |
|
|
2309 |
|
WFErrDef: |
2310 |
|
@QName: xp|wf-legal-character |
2311 |
|
@DOMCore:severity: DOMCore|SEVERITY_ERROR |
2312 |
|
@enDesc: |
2313 |
|
Characters referred to using character references <kwd:MUST> |
2314 |
|
match the production for <CODE::Char>. |
2315 |
|
@enMufDef: |
2316 |
|
Reference to character %character-code-point |
2317 |
|
(v => {<Q::xp|character-number>}); is not allowed |
2318 |
|
@ecore:hasParameter: xp|error-token |
2319 |
|
@ecore:hasParameter: |
2320 |
|
@@@: xp|character-number |
2321 |
|
@@enDesc: |
2322 |
|
The code position of the character being referred. |
2323 |
|
@ecore:hasParameter: |
2324 |
|
@@@: xp|parent |
2325 |
|
@@enDesc: |
2326 |
|
The parent node in which the character reference has |
2327 |
|
occurred, if available. |
2328 |
|
|
2329 |
|
XWParam: |
2330 |
@QName: xp|error-token |
@QName: xp|error-token |
2331 |
@enDesc: |
@enDesc: |
2332 |
The token where the parser found an error. |
The token where the parser found an error. |
2333 |
|
|
2334 |
PropDef: |
XWParam: |
2335 |
@QName: xp|source-text |
@QName: xp|name |
2336 |
|
@enDesc: |
2337 |
|
A name. |
2338 |
|
|
2339 |
|
XWParam: |
2340 |
|
@QName: xp|parent |
2341 |
@enDesc: |
@enDesc: |
2342 |
A reference to the original source text, if available. |
The parent node in which the error occurs. |
2343 |
|
|
2344 |
|
XWParam: |
2345 |
|
@QName: xp|node |
2346 |
|
@enDesc: |
2347 |
|
The current node. |
2348 |
|
|
2349 |
|
XWParam: |
2350 |
|
@QName: xp|actual-element-type |
2351 |
|
@enDesc: |
2352 |
|
The actual element type name occured in the source. |
2353 |
|
|
2354 |
|
XWParam: |
2355 |
|
@QName: xp|expected-element-type |
2356 |
|
@enDesc: |
2357 |
|
The element type name expected. |
2358 |
|
|
2359 |
|
XWParam: |
2360 |
|
@QName: xp|character-number |
2361 |
|
@enDesc: |
2362 |
|
The character code position. |
2363 |
|
|
2364 |
|
ElementTypeBinding: |
2365 |
|
@Name: XWParam |
2366 |
|
@ElementType: |
2367 |
|
dis:ResourceDef |
2368 |
|
@ShadowContent: |
2369 |
|
@@For: =ManakaiDOM|all |
2370 |
|
@@rdf:type: ecore|Parameter |
2371 |
|
|
2372 |
ElementTypeBinding: |
ElementTypeBinding: |
2373 |
@Name:enMufDef |
@Name:enMufDef |
2526 |
my $pos = $o-><AG::DOMCore|DOMError.location> |
my $pos = $o-><AG::DOMCore|DOMError.location> |
2527 |
-><AG::DOMCore|DOMLocator.utf32Offset>; |
-><AG::DOMCore|DOMLocator.utf32Offset>; |
2528 |
if ($pos > -1) { |
if ($pos > -1) { |
2529 |
my $src = $o->{<H::xp|source-text>}; |
my $src = \($o->{<H::ecore|object>}->{source}); |
2530 |
my $start = $pos; |
my $start = $pos; |
2531 |
$start = rindex ($$src, "\x0A", $start - 1) for 0..2; |
$start = rindex ($$src, "\x0A", $start - 1) for 0..2; |
2532 |
$start++; |
$start++; |
2535 |
$end = length $$src if $end < 0; |
$end = length $$src if $end < 0; |
2536 |
$p->{-result} = substr $$src, $start, $end - $start; |
$p->{-result} = substr $$src, $start, $end - $start; |
2537 |
} |
} |
2538 |
|
|
2539 |
|
@RuleDef: |
2540 |
|
@@Name: character-code-point |
2541 |
|
@@enDesc: |
2542 |
|
The character code position, in <CODE::U+<VAR::HHHH>> notation. |
2543 |
|
|
2544 |
|
@@Method: |
2545 |
|
@@@Name: after |
2546 |
|
@@@Param: |
2547 |
|
@@@@Name: name |
2548 |
|
@@@@Type: DOMString |
2549 |
|
@@@@enDesc: The name of the method. |
2550 |
|
@@@Param: |
2551 |
|
@@@@Name: p |
2552 |
|
@@@@Type: DISPerl|HASH |
2553 |
|
@@@@enDesc: The set of the parameters to the method. |
2554 |
|
@@@Param: |
2555 |
|
@@@@Name: o |
2556 |
|
@@@@Type: DISPerl|HASH |
2557 |
|
@@@@enDesc: The option value. |
2558 |
|
@@@RuleParam: |
2559 |
|
@@@@Name: v |
2560 |
|
@@@@Type: DISPerl|Number |
2561 |
|
@@@@enDesc: |
2562 |
|
The name of the error parameter that contains the character code. |
2563 |
|
@@@Return: |
2564 |
|
@@@@PerlDef: |
2565 |
|
$p->{-result} = sprintf 'U+%04X', $o->{$p->{v}}; |
2566 |
##XMLParserExceptionFormatter |
##XMLParserExceptionFormatter |