/[suikacvs]/messaging/manakai/lib/Message/DOM/XMLParser.dis
Suika

Contents of /messaging/manakai/lib/Message/DOM/XMLParser.dis

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download)
Sat Jan 21 17:37:51 2006 UTC (18 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +4 -2 lines
++ manakai/lib/Message/Util/ChangeLog	21 Jan 2006 17:23:17 -0000
2006-01-22  Wakaba  <wakaba@suika.fam.cx>

	* PerlCode.dis: Fixed not |getAttributeNS| to cause
	uninitialized value warnings.

++ manakai/lib/Message/Util/DIS/ChangeLog	21 Jan 2006 17:24:28 -0000
	* DPG.dis (plCodeFragment): A returning |#EOF| case
	was missing so that illegal character was appended to the sequence.
++ manakai/lib/Message/DOM/ChangeLog	21 Jan 2006 17:27:27 -0000
2006-01-22  Wakaba  <wakaba@suika.fam.cx>

	* Tree.dis (getAttribute): Returns |null| if there
	is no attribute in |ManakaiDOM:DOMLatest| for compatibility
	with Web browser implementations.
	(getAttributeNS): It returned |null| if there
	is no attribute in any |For| unintentionally.  It now
	returns an empty string in DOM levels less than or equals
	to three.

	* XMLParser.dis (shiftChar): Fixed not to be warned as
	uninitialized value or substring out of range.

1 wakaba 1.1 Module:
2     @QName: MDOM|XMLParser
3     @Namespace:
4     http://suika.fam.cx/~wakaba/archive/2004/dom/xml-parser#
5    
6     @FullName:
7     @@lang:en
8     @@@: XML Parser
9    
10     @DISCore:author: DISCore|Wakaba
11     @License: license|Perl+MPL
12     @Date:
13 wakaba 1.7 $Date: 2006/01/21 16:28:13 $
14 wakaba 1.1
15     @DefaultFor: ManakaiDOM|ManakaiDOMLatest
16    
17     @Require:
18     @@Module:
19     @@@QName: MDOM|DOMLS
20     @@@WithFor: ManakaiDOM|ManakaiDOMLatest
21 wakaba 1.6 @@Module:
22     @@@QName: MDOM|XDoctype
23     @@@WithFor: ManakaiDOM|ManakaiDOMLatest
24 wakaba 1.1
25     Namespace:
26 wakaba 1.6 @d:
27     http://suika.fam.cx/~wakaba/archive/2004/dom/xdt#
28 wakaba 1.1 @dis:
29     http://suika.fam.cx/~wakaba/archive/2004/8/18/lang#dis--
30     @DOMCore:
31     http://suika.fam.cx/~wakaba/archive/2004/8/18/dom-core#
32     @DOMMain:
33     http://suika.fam.cx/~wakaba/archive/2004/dom/main#
34 wakaba 1.5 @dtest:
35     http://suika.fam.cx/~wakaba/archive/2005/manakai/Util/DIS#Test/
36 wakaba 1.1 @dx:
37     http://suika.fam.cx/~wakaba/archive/2005/manakai/Util/Error/DOMException#
38     @ecore:
39     http://suika.fam.cx/~wakaba/archive/2005/manakai/Util/Error/Core/
40     @f:
41     http://suika.fam.cx/~wakaba/archive/2004/dom/feature#
42     @idl:
43     http://suika.fam.cx/~wakaba/archive/2004/dis/IDL#
44     @infoset:
45     http://www.w3.org/2001/04/infoset#
46     @lang:
47     http://suika.fam.cx/~wakaba/archive/2004/8/18/lang#
48     @license:
49     http://suika.fam.cx/~wakaba/archive/2004/8/18/license#
50     @LSEV:
51     http://www.w3.org/2002/DOMLS
52     @ManakaiDOM:
53     http://suika.fam.cx/~wakaba/archive/2004/8/18/manakai-dom#
54     @ManakaiDOMLS:
55     http://suika.fam.cx/~wakaba/archive/2004/mdom-ls#
56     @MDOM:
57     http://suika.fam.cx/~wakaba/archive/2004/8/18/manakai-dom#ManakaiDOM.
58     @MDOMX:
59     http://suika.fam.cx/~wakaba/archive/2004/8/4/manakai-dom-exception#
60     @rdf:
61     http://www.w3.org/1999/02/22-rdf-syntax-ns#
62     @rdfs:
63     http://www.w3.org/2000/01/rdf-schema#
64     @t:
65     http://suika.fam.cx/~wakaba/archive/2004/dom/tree#
66 wakaba 1.5 @test:
67     http://suika.fam.cx/~wakaba/archive/2004/dis/Test#
68 wakaba 1.1 @xml:
69     http://www.w3.org/XML/1998/namespace
70     @xmlns:
71     http://www.w3.org/2000/xmlns/
72     @xp:
73     http://suika.fam.cx/~wakaba/archive/2004/dom/xml-parser#
74    
75     ## -- Features
76    
77     ElementTypeBinding:
78     @Name: FeatureDef
79     @ElementType:
80     dis:ResourceDef
81     @ShadowContent:
82     @@rdf:type: f|Feature
83     @@For: =ManakaiDOM|all
84    
85     ElementTypeBinding:
86     @Name: FeatureVerDef
87     @ElementType:
88     dis:ResourceDef
89     @ShadowContent:
90     @@rdf:type: f|Feature
91    
92     ElementTypeBinding:
93     @Name: featureQName
94     @ElementType:
95     f:name
96     @ShadowContent:
97     @@ContentType: DISCore|QName
98    
99     ResourceDef:
100     @QName: DOMString
101     @AliasFor: DOMMain|DOMString
102     @For: ManakaiDOM|DOM
103    
104     ResourceDef:
105     @QName: Node
106     @AliasFor: t|Node
107     @For: ManakaiDOM|DOM
108    
109     ResourceDef:
110     @QName: Element
111     @AliasFor: t|Element
112     @For: ManakaiDOM|DOM
113    
114     ResourceDef:
115     @QName: Document
116     @AliasFor: t|Document
117     @For: ManakaiDOM|DOM
118    
119 wakaba 1.6 ResourceDef:
120     @QName: DocumentXDoctype
121     @AliasFor: d|DocumentXDoctype
122     @For: ManakaiDOM|DOM
123    
124 wakaba 1.1 ElementTypeBinding:
125     @Name: ClsDef
126     @ElementType:
127     dis:ResourceDef
128     @ShadowContent:
129     @@rdf:type:
130     @@@@: dis|MultipleResource
131     @@@ForCheck: !ManakaiDOM|ForIF !ManakaiDOM|ForClass
132     @@resourceFor:
133     @@@@: ManakaiDOM|ForClass
134     @@@ForCheck: ManakaiDOM|ManakaiDOM !=ManakaiDOM|ManakaiDOM
135     @@For: ManakaiDOM|DOM3
136     @@For: =ManakaiDOM|ManakaiDOM
137    
138     @@rdf:type:
139     @@@@: DISLang|Class
140     @@@ForCheck: ManakaiDOM|ForClass
141    
142     ElementTypeBinding:
143     @Name: ClsQName
144     @ElementType:
145     dis:QName
146     @ShadowContent:
147     @@ForCheck: ManakaiDOM|ForClass
148    
149     ElementTypeBinding:
150     @Name: ClsISA
151     @ElementType:
152     dis:ISA
153     @ShadowContent:
154     @@ForCheck: ManakaiDOM|ForClass
155    
156     ElementTypeBinding:
157     @Name: nullCase
158     @ElementType:
159     dis:ResourceDef
160     @ShadowContent:
161     @@rdf:type: ManakaiDOM|InCase
162     @@Value:
163     @@@is-null:1
164    
165     ResourceDef:
166     @QName: LSParser
167     @AliasFor: DOMLS|LSParser
168     @For: ManakaiDOM|DOM3
169    
170     ClsDef:
171     @ClsQName: ManakaiXMLParser
172    
173     @Implement: DOMLS|LSParser
174    
175     @f:implements:
176     @@@: DOMLS|LSFeature30
177     @@For: ManakaiDOM|DOM3
178    
179     @DISLang:role: DOMLS|ParserRole
180    
181 wakaba 1.3 @enDesc:
182     Note that the <Class::ManakaiXMLParser> reports any XML errors
183     (syntax errors and / or well-formedness constraint errors)
184     via the <IF::DOMCore:error-handler> registered to
185     the <A::DOMLS:LSParser.domConfig> object. Each error has
186     its <A::DOMCore:DOMError.severity>, either <C::DOMCore:SEVERITY_ERROR>
187     or <C::DOMCore:SEVERITY_FATAL_ERROR>. However, their semantics
188     are slight different from the ones of <QUOTE::error> and
189     <QUOTE::fatal error> in XML; in this implemenetation,
190     <C::DOMCore:SEVERITY_ERROR> implies that the parsing process
191     can effectively be continued to detect more errors while
192     <C::DOMCore:SEVERITY_FATAL_ERROR> implies that the error
193     is serious so that the result document tree and any errors
194     might be far from the ones obtained when the error would not
195     be found.
196    
197 wakaba 1.1 @Attr:
198     @@Name: domConfig
199     @@enDesc:
200     The configuration of the parser.
201    
202     @@Get:
203     @@@Type: DOMCore|DOMConfiguration
204     @@@enDesc: The DOM configuration object.
205     @@@PerlDef:
206     __CODE{DOMCore|getConfigObject::
207     $target => $self,
208     $targetHash => $self,
209     $targetType => {<IFName::LSParser>},
210     $result => $r,
211     }__;
212    
213     @Method:
214     @@ManakaiDOM:isForInternal:1
215     @@ForCheck: ManakaiDOM|ForClass
216     @@Operator: DISPerl|NewMethod
217     @@enDesc:
218     Creates a new instance of the object.
219     @@Param:
220     @@@Name: impl
221     @@@Type: DOMLS|GLSImplementation
222     @@@enDesc:
223     The implementation from which the parser is created.
224     @@Param:
225     @@@Name: features
226     @@@Type: DOMString
227     @@@dis:actualType: f|FeaturesString
228     @@@enDesc:
229     The set of features requested for the parser.
230     @@Return:
231     @@@Type: DOMMain|DOMObject
232     @@@dis:actualType: LSParser
233     @@@enDesc:
234     The newly created parser.
235     @@@PerlDef:
236     $r = bless {
237     <H::DOMCore:implementation> => $impl,
238     }, $self;
239    
240     @Method:
241     @@Name: parseString
242     @@enImplNote:
243     Non-standard - to be removed
244    
245     @@Param:
246     @@@Name: sourceText
247     @@@Type: DOMString
248     @@Return:
249     @@@Type: Document
250     @@@PerlDef:
251    
252     $self->{char} = [];
253     $self->{token} = [];
254 wakaba 1.6 $self->{entity} = [{
255     reptxt => \$sourceText,
256     line => 1,
257     column => 1,
258     pos => 0,
259     }];
260     $self->{entity_char} = [];
261     $self->{entity_token} = [];
262 wakaba 1.3 $self->{xml_version} = '1.0';
263     $self->{standalone} = false;
264 wakaba 1.6 $self->{general_entity} = {};
265     $self->{param_entity} = {};
266     $self->{has_error} = false;
267 wakaba 1.3 ## Well-formedness constraint Entity Declared takes effect?
268 wakaba 1.1
269     __DEEP{
270     $r = $self->_parse_DocumentEntity
271     ($self->{<H::DOMCore:implementation>});
272     }__;
273    
274     @Method:
275     @@Name: shiftChar
276     @@ManakaiDOM:isForInternal:1
277     @@ForCheck: ManakaiDOM|ForClass
278     @@enDesc:
279     Returns the next character.
280     @@Return:
281     @@@Type: idl|long||ManakaiDOM|all
282     @@@enDesc:
283     The code position number of the next character, if any,
284 wakaba 1.6 or <CODE::-1>.
285 wakaba 1.1 @@@PerlDef:
286     if (@{$self->{char}}) {
287     $r = shift @{$self->{char}};
288     } else {
289 wakaba 1.7 no warnings 'substr';
290     ## substr outside length warnings at the end of the string
291 wakaba 1.6 GETCHAR: {
292     my $char = substr (${$self->{entity}->[-1]->{reptxt}},
293     $self->{entity}->[-1]->{pos}, 1);
294     $self->{entity}->[-1]->{pos}++;
295    
296 wakaba 1.7 if (defined $char and length $char) {
297 wakaba 1.6 $r = ord $char;
298     if ($r == 0x000A) {
299     $self->{entity}->[-1]->{line}++;
300     $self->{entity}->[-1]->{column} = 1;
301     } elsif ($r == 0x000D) {
302     my $next_char = substr (${$self->{entity}->[-1]->{reptxt}},
303     $self->{entity}->[-1]->{pos}, 1);
304     if ($next_char eq "\x0A") {
305     $self->{entity}->[-1]->{pos}++;
306     $self->{entity}->[-1]->{column} = 1;
307     } elsif ($next_char eq "\x85") {
308     if ($self->{xml_version} eq '1.1') {
309     $self->{entity}->[-1]->{pos}++;
310     $self->{entity}->[-1]->{column} = 1;
311     } else {
312     $self->{entity}->[-1]->{column} = 0;
313     }
314 wakaba 1.3 } else {
315 wakaba 1.6 $self->{entity}->[-1]->{column} = 1;
316     }
317     $r = 0x000A;
318     $self->{entity}->[-1]->{line}++;
319     } elsif (
320     not ((0x0020 <= $r and $r <= 0x007E) or
321     (0x00A0 <= $r and $r <= 0xD7FF) or
322     (0xE000 <= $r and $r <= 0xFFFD) or
323     (0x10000 <= $r and $r <= 0x10FFFF)) and
324     $r != 0x0009 and $r != 0x0085 and
325     not ($self->{xml_version} eq '1.0' and
326     (0x007F <= $r and $r <= 0x009F))
327     ) {
328     my $location = {
329     utf32_offset => $self->{entity}->[-1]->{pos},
330     line_number => $self->{entity}->[-1]->{line},
331     column_number => $self->{entity}->[-1]->{column},
332     };
333     my $continue = __DOMCore:ERROR{xp|wf-legal-literal-character::
334     DOMCore|location => {$location},
335     xp|character-number => {$r},
336     }__;
337     unless ($continue) {
338     __EXCEPTION{DOMLS|PARSE_ERR}__;
339 wakaba 1.3 }
340 wakaba 1.6 $self->{has_error} = true;
341     $self->{entity}->[-1]->{column}++;
342     } elsif ($r == 0x0085 or $r == 0x2028) {
343     $r = 0x000A if $self->{xml_version} eq '1.1';
344     $self->{entity}->[-1]->{line}++;
345     $self->{entity}->[-1]->{column} = 1;
346 wakaba 1.3 } else {
347 wakaba 1.6 $self->{entity}->[-1]->{column}++;
348 wakaba 1.3 }
349 wakaba 1.6 # } elsif (@{$self->{entity}} > 1) {
350     # pop @{$self->{entity}};
351     # redo GETCHAR;
352     # ## ISSUE: How cope with delimiter scanning &
353     # ## self-containedness constraints??
354 wakaba 1.3 } else {
355 wakaba 1.6 $r = -1;
356 wakaba 1.3 }
357 wakaba 1.6 } # GETCHAR
358 wakaba 1.1 }
359    
360     @Method:
361     @@ManakaiDOM:isForInternal: 1
362     @@Operator: ManakaiDOM|MUErrorHandler
363     @@enDesc:
364     When a <IF::ecore|ErrorInterface||ManakaiDOM|Perl> is <Perl::report>ed,
365     then this method is invoked.
366    
367     The method calls the <cfg::DOMCore|error-handler> if the error is of
368     <IF::DOMCore|DOMError>. Otherwise, the error is re-thrown so that
369     corresponding <Perl::catch> clause, if any, can catch the error.
370     @@Param:
371     @@@Name: err
372     @@@Type: ecore|ErrorInterface||ManakaiDOM|Perl
373     @@@enDesc:
374     The reported error object.
375     @@Return:
376     @@@Type: DISPerl|Any
377     @@@enDesc:
378     If the <P::err> is a <IF::DOMCore|DOMError>, then the return value
379     of the error handler.
380    
381     {NOTE:: If the error is thrown, the method never returns.
382     }
383     @@@nullCase:
384     @@@@enDesc:
385     No error handler.
386     @@@PerlDef:
387     if ($err->isa (<IFName::DOMCore|DOMError||ManakaiDOM|ManakaiDOM>)) {
388     __DEEP{
389     A: {
390     my $cfg = $self-><AG::LSParser.domConfig>;
391     my $h = $cfg-><M::DOMCore|DOMConfiguration.getParameter>
392     ('error-handler');
393     $r = $h-><M::DOMCore|DOMErrorHandler.handleError> ($err);
394     } # A
395     }__;
396     } else {
397     $err-><M::ecore|ErrorInterface||ManakaiDOM|Perl.throw>;
398     }
399    
400     @DISPerl:dpgDef:
401    
402     /*
403     XML Document Entity
404    
405     document := prolog element *Misc
406     - *Char RestrictedChar *Char ;; [1]
407     */
408     rule DocumentEntity ($impl) : standalone {
409     my $doc : return;
410    
411     lang:Perl {
412     $doc = $impl-><M::DOMImpl.createDocument>;
413     $doc-><AS::Document.strictErrorChecking> (false);
414     }
415    
416     /*
417     prolog := XMLDecl? *Misc [doctypedecl *Misc] ;; [22]
418     */
419     ?lexmode 'DocumentStart';
420    
421 wakaba 1.3 ~? (XDO) {
422     &_XMLDeclaration_ ($doc => $doc);
423    
424     ~ (PIC) {
425     ?lexmode DocumentProlog;
426     } else {
427     ?lexmode DocumentProlog;
428     }
429     } else {
430     ?lexmode 'DocumentProlog';
431     }
432 wakaba 1.1
433     // *Misc
434 wakaba 1.2 ~* (CDO) {
435 wakaba 1.1 &_CommentDeclaration_ ($doc => $doc, $parent => $doc);
436    
437     ~ (MDC) {
438     ?lexmode DocumentProlog;
439     } else {
440     ?lexmode DocumentProlog;
441     }
442     } (PIO) {
443     &_ProcessingInstruction_ ($doc => $doc, $parent => $doc);
444    
445     ~ (PIC) {
446     ?lexmode 'DocumentProlog';
447     } else {
448     ?lexmode DocumentProlog;
449     }
450     } (S) {
451     //
452     }
453    
454     // doctypedecl
455     ~? (MDO) {
456     &_DocumentTypeDeclaration_ ($doc => $doc);
457    
458 wakaba 1.3 ~ (MDC) {
459     ?lexmode DocumentMisc;
460     } else {
461     ?lexmode DocumentMisc;
462     }
463     } else {
464     lang:Perl {
465     $self->{standalone} = true;
466     }
467     ?lexmode DocumentMisc;
468 wakaba 1.1 }
469    
470     // *Misc
471 wakaba 1.2 ~* (CDO) {
472 wakaba 1.1 &_CommentDeclaration_ ($doc => $doc, $parent => $doc);
473    
474     ~ (MDC) {
475     ?lexmode DocumentMisc;
476     } else {
477     ?lexmode DocumentMisc;
478     }
479     } (PIO) {
480     &_ProcessingInstruction_ ($doc => $doc, $parent => $doc);
481    
482     ~ (PIC) {
483     ?lexmode 'DocumentMisc';
484     } else {
485     ?lexmode DocumentMisc;
486     }
487     } (S) {
488     //
489     }
490    
491     // Document element
492     ~ (STAGO) {
493     &Element_ ($doc => $doc, $parent => $doc)
494     : unshift-current-token;
495     ~ (TAGC) {
496     ?lexmode DocumentEnd;
497     } else {
498     ?lexmode DocumentEnd;
499     }
500     } else {
501     ?lexmode 'DocumentEnd';
502     }
503    
504     // *Misc
505 wakaba 1.2 ~* (CDO) {
506 wakaba 1.1 &_CommentDeclaration_ ($doc => $doc, $parent => $doc);
507    
508     ~ (MDC) {
509     ?lexmode DocumentEnd;
510     } else {
511     ?lexmode DocumentEnd;
512     }
513     } (PIO) {
514     &_ProcessingInstruction_ ($doc => $doc, $parent => $doc);
515     ~ (PIC) {
516     ?lexmode 'DocumentEnd';
517     } else {
518     ?lexmode DocumentEnd;
519     }
520     } (S) {
521     //
522     }
523    
524     ~ (#EOF) { }
525    
526     lang:Perl {
527     if ($self->{has_error}) {
528     __EXCEPTION{DOMLS|PARSE_ERR::
529     }__;
530     }
531    
532     $doc-><AS::Document.strictErrorChecking> (true);
533     }
534     } // DocumentEntity
535    
536     /*
537     XML Declaration
538    
539     XMLDecl := '<?xml' VersionInfo
540     [EncodingDecl]
541     [SDDecl]
542     [S] '?>' ;; [23]
543    
544     NOTE: XML declaration is optional in XML 1.0
545     while it is required in XML 1.1.
546     */
547 wakaba 1.3 rule _XMLDeclaration_ ($doc) {
548     ?lexmode XMLDeclaration;
549    
550     ~ (S) { }
551    
552     ~ (Name == 'version') {
553     ~? (S) { }
554     ~ (VI) { }
555     ~? (S) { }
556 wakaba 1.1
557 wakaba 1.3 my $ver;
558     my $bad_token;
559    
560     ~ (LIT) {
561     ?lexmode AttributeValueLiteral;
562    
563     ~ (STRING) {
564     lang:Perl ($version => $token.value) {
565     $ver = $version;
566     $bad_token = $token;
567     }
568     }
569    
570     ~ (LIT) {
571     ?lexmode XMLDeclaration;
572     }
573     } (LITA) {
574     ?lexmode AttributeValueLiteralA;
575    
576     ~ (STRING) {
577     lang:Perl ($version => $token.value) {
578     $ver = $version;
579     $bad_token = $token;
580     }
581     }
582    
583     ~ (LITA) {
584     ?lexmode XMLDeclaration;
585     }
586     }
587    
588     lang:Perl : has-error {
589     unless ($ver eq '1.0' or $ver eq '1.1') {
590     my $location;
591     __CODE{xp|get-location-from-token::
592     $token => {$bad_token},
593     $result => {$location},
594     }__;
595     my $continue = __DOMCore:ERROR{xp|wf-unsupported-xml-version::
596     DOMCore|location => {$location},
597     xp|parent => {$doc},
598     infoset|version => {$ver},
599     xp|error-token => {$bad_token},
600     }__;
601     unless ($continue) {
602     __EXCEPTION{DOMLS|PARSE_ERR}__;
603     }
604     $self->{has_error} = true;
605     }
606     $doc-><AS::Document.xmlVersion> ($ver);
607     $self->{xml_version} = $ver;
608     }
609    
610     ~? (S) { }
611 wakaba 1.1 }
612    
613 wakaba 1.3 ~? (Name == 'encoding') {
614     ~? (S) { }
615     ~ (VI) { }
616     ~? (S) { }
617    
618     my $ver;
619     my $bad_token;
620    
621     ~ (LIT) {
622     ?lexmode AttributeValueLiteral;
623    
624     ~ (STRING) {
625     lang:Perl ($version => $token.value) {
626     $ver = $version;
627     $bad_token = $token;
628     }
629     }
630    
631     ~ (LIT) {
632     ?lexmode XMLDeclaration;
633     }
634     } (LITA) {
635     ?lexmode AttributeValueLiteralA;
636    
637     ~ (STRING) {
638     lang:Perl ($version => $token.value) {
639     $ver = $version;
640     $bad_token = $token;
641     }
642     }
643    
644     ~ (LITA) {
645     ?lexmode XMLDeclaration;
646     }
647     }
648    
649     lang:Perl : has-error {
650     unless ($ver =~ /\A[A-Za-z][A-Za-z0-9._-]*\z/) {
651     my $location;
652     __CODE{xp|get-location-from-token::
653     $token => {$bad_token},
654     $result => {$location},
655     }__;
656     my $continue = __DOMCore:ERROR{xp|wf-malformed-enc-name::
657     DOMCore|location => {$location},
658     xp|parent => {$doc},
659     xp|name => {$ver},
660     xp|error-token => {$bad_token},
661     }__;
662     unless ($continue) {
663     __EXCEPTION{DOMLS|PARSE_ERR}__;
664     }
665     $self->{has_error} = true;
666     }
667     $doc-><AS::Document.xmlEncoding> ($ver);
668     }
669    
670     ~? (S) { }
671 wakaba 1.1 }
672 wakaba 1.3
673     ~? (Name == 'standalone') {
674     ~? (S) { }
675     ~ (VI) { }
676     ~? (S) { }
677    
678     my $ver;
679     my $bad_token;
680    
681     ~ (LIT) {
682     ?lexmode AttributeValueLiteral;
683    
684     ~ (STRING) {
685     lang:Perl ($version => $token.value) {
686     $ver = $version;
687     $bad_token = $token;
688     }
689     }
690    
691     ~ (LIT) {
692     ?lexmode XMLDeclaration;
693     }
694     } (LITA) {
695     ?lexmode AttributeValueLiteralA;
696    
697     ~ (STRING) {
698     lang:Perl ($version => $token.value) {
699     $ver = $version;
700     $bad_token = $token;
701     }
702     }
703 wakaba 1.1
704 wakaba 1.3 ~ (LITA) {
705     ?lexmode XMLDeclaration;
706     }
707     }
708    
709     lang:Perl : has-error {
710     unless ($ver eq 'yes' or $ver eq 'no') {
711     my $location;
712     __CODE{xp|get-location-from-token::
713     $token => {$bad_token},
714     $result => {$location},
715     }__;
716     my $continue = __DOMCore:ERROR{xp|wf-malformed-xml-standalone::
717     DOMCore|location => {$location},
718     xp|parent => {$doc},
719     xp|name => {$ver},
720     xp|error-token => {$bad_token},
721     }__;
722     unless ($continue) {
723     __EXCEPTION{DOMLS|PARSE_ERR}__;
724     }
725     $self->{has_error} = true;
726     }
727     $doc-><AS::Document.xmlStandalone> ($ver);
728     $self->{standalone} = true if $ver eq 'yes';
729     }
730    
731     ~? (S) { }
732     }
733    
734     // ~ (PIC) { }
735     } // _XMLDeclaration_
736 wakaba 1.1
737     /*
738     Comment Declaration
739    
740     Comment := '<!--' *(Char - '-' / '-' (Char - '-'))
741     '-->' ;; [15]
742 wakaba 1.4 */
743 wakaba 1.1 rule _CommentDeclaration_ ($doc, $parent) {
744     ?lexmode 'CommentDeclaration';
745    
746     ~? (STRING) {
747     lang:Perl ($data => $token.value) {
748     my $com = $doc-><M::Document.createComment> ($data);
749     $parent-><M::Node.appendChild> ($com);
750     }
751     } else {
752     lang:Perl {
753     my $com = $doc-><M::Document.createComment> ('');
754     $parent-><M::Node.appendChild> ($com);
755     }
756     }
757    
758     ~ (COM) {
759     ?lexmode MarkupDeclaration;
760     } else {
761     ?lexmode MarkupDeclaration;
762     }
763    
764     // ~ (MDC) { }
765 wakaba 1.4 } // _CommentDeclaration
766     _
767     rule _CommentDeclarationDTD ($doc) {
768     ?lexmode 'CommentDeclaration';
769    
770     ~? (STRING) {
771     //
772     }
773    
774     ~ (COM) {
775     ?lexmode MarkupDeclaration;
776     } else {
777     ?lexmode MarkupDeclaration;
778     }
779    
780     ~ (MDC) {
781     ?lexmode DTD;
782     } else {
783     ?lexmode DTD;
784     }
785     } // _CommentDeclarationDTD
786    
787 wakaba 1.1 /*
788     Processing Instruction
789    
790     PI := '<?' PITarget [S *Char - *Char '?>' *Char]
791     '?>' ;; [16]
792     */
793     rule _ProcessingInstruction_ ($doc, $parent) {
794     ?lexmode 'PIName';
795    
796     my $pi;
797    
798     ~ (Name) {
799 wakaba 1.3 lang:Perl ($name => $token.value) : has-error {
800 wakaba 1.1 if (lc $name eq 'xml') {
801 wakaba 1.3 my $location;
802     __CODE{xp|get-location-from-token::
803     $token => {$token},
804     $result => {$location},
805     }__;
806     my $continue = __DOMCore:ERROR{xp|wf-pi-target-is-xml::
807     xp|name => {$name},
808     DOMCore|location => {$location},
809     xp|parent => {$parent},
810     }__;
811     unless ($continue) {
812     __EXCEPTION{DOMLS|PARSE_ERR::
813     }__;
814     }
815     $self->{has_error} = true;
816 wakaba 1.1 }
817     ## TODO: Namespace well-formedness
818     $pi = $doc-><M::Document.createProcessingInstruction>
819     ($name);
820     }
821     }
822    
823     ~ (S) {
824     ?lexmode 'PIData';
825    
826     my $tdata;
827    
828     ~? (DATA) {
829     lang:Perl ($data => $token.value) {
830     $tdata = $data;
831     }
832     } else {
833     lang:Perl {
834     $tdata = '';
835     }
836     }
837    
838     lang:Perl {
839     $pi-><AS::Node.nodeValue> ($tdata);
840     }
841     }
842    
843     lang:Perl {
844     $parent-><M::Node.appendChild> ($pi);
845     }
846    
847     // ~ (PIC) { }
848     } // _ProcessingInstruction_
849 wakaba 1.4
850 wakaba 1.6 /*
851     Processing instruction in DTD
852     */
853     rule _ProcessingInstructionDTD ($doc, $doctype) {
854 wakaba 1.4 ?lexmode 'PIName';
855 wakaba 1.6
856     my $pi;
857 wakaba 1.4
858     ~ (Name) {
859 wakaba 1.6 lang:Perl ($name => $token.value) : has-error {
860     if (lc $name eq 'xml') {
861     my $location;
862     __CODE{xp|get-location-from-token::
863     $token => {$token},
864     $result => {$location},
865     }__;
866     my $continue = __DOMCore:ERROR{xp|wf-pi-target-is-xml::
867     xp|name => {$name},
868     DOMCore|location => {$location},
869     xp|parent => {$doctype},
870     }__;
871     unless ($continue) {
872     __EXCEPTION{DOMLS|PARSE_ERR::
873     }__;
874     }
875     $self->{has_error} = true;
876     }
877     ## TODO: Namespace well-formedness
878     $pi = $doc-><M::Document.createProcessingInstruction>
879     ($name);
880     }
881 wakaba 1.4 }
882    
883     ~ (S) {
884     ?lexmode 'PIData';
885    
886 wakaba 1.6 my $tdata;
887    
888 wakaba 1.4 ~? (DATA) {
889 wakaba 1.6 lang:Perl ($data => $token.value) {
890     $tdata = $data;
891     }
892     } else {
893     lang:Perl {
894     $tdata = '';
895     }
896     }
897    
898     lang:Perl {
899     $pi-><AS::Node.nodeValue> ($tdata);
900 wakaba 1.4 }
901     }
902    
903 wakaba 1.6 lang:Perl {
904     $doctype-><M::Node.appendChild> ($pi);
905     }
906    
907 wakaba 1.4 ~ (PIC) {
908     ?lexmode DTD;
909     } else {
910     ?lexmode DTD;
911     }
912     } // _ProcessingInstructionDTD
913 wakaba 1.1
914     /*
915     Element content parsing mode
916    
917     element := EmptyElemTag /
918     STag content ETag ;; [39]
919     content := (CharData / element / Reference / CDSect /
920     PI / Comment) ;; [43]
921     */
922 wakaba 1.6 rule Element_ ($doc, $parent, $ns) : standalone {
923 wakaba 1.1 ?lexmode 'ElementContent';
924    
925     my $node; // Current "parent" node
926     my $nodes; // Node stack (w/o $current_node)
927     my $type; // Current "parent" element type QName
928     my $types; // Element type stack (w/o $current_type)
929 wakaba 1.6 //my $ns; // Current in-scope namespace bindings
930 wakaba 1.1 my $nses; // Namespace binding stack (w/o $current_ns)
931    
932     lang:Perl {
933     $node = $parent;
934     $nodes = [];
935     $type = '';
936     $types = [];
937 wakaba 1.6 $ns ||= {
938 wakaba 1.1 xml => <Q::xml:>,
939     xmlns => <Q::xmlns:>,
940     };
941     $nses = [];
942     }
943    
944     ~* : name => CONTENT
945     (CharData) {
946     // Character data
947     lang:Perl ($data => $token.value) {
948     $node-><M::Node.appendChild>
949     ($doc-><M::Document.createTextNode> ($data));
950     }
951     } (STAGO) {
952     // Start tag or empty element tag
953    
954     ?lexmode 'StartTag';
955    
956     ~ (Name) {
957     my $attrs;
958     lang:Perl ($name => $token.value) {
959     push @{$types}, $type;
960     $type = $name;
961     $attrs = {};
962     }
963    
964     ~? (S) {
965     &AttributeSpecificationList
966     ($doc => $doc, $attrs => $attrs);
967     }
968    
969     my $el;
970    
971     lang:Perl {
972     push @{$nses}, $ns;
973     $ns = {%$ns};
974    
975     my %gattr;
976     my %lattr;
977     for my $atqname (keys %$attrs) {
978     my ($pfx, $lname) = split /:/, $atqname;
979     if (defined $lname) { ## Global attribute
980     ## TODO: Namespace well-formedness (lname is NCName)
981     if ($pfx eq 'xmlns') {
982     my $nsuri = $attrs->{$atqname}->{value};
983     if ($lname eq 'xml' and
984     $nsuri ne <Q::xml:>) {
985     ## TODO: error
986     } elsif ($lname eq 'xmlns') {
987     ## TODO: error
988     }
989     if ($nsuri eq '') {
990     ## TODO: error in XML 1.0
991     } elsif ($nsuri eq <Q::xml:> and
992     $lname ne 'xml') {
993     ## TODO: error
994     } elsif ($nsuri eq <Q::xmlns:>) {
995     ## TODO: error
996     }
997     $ns->{$lname} = $attrs->{$atqname}->{value};
998     delete $ns->{$lname} unless length $ns->{$lname};
999     } elsif ($pfx eq '') {
1000     ## TODO: pfx is not NCName error
1001     } else {
1002     if ($gattr{$pfx}->{$lname}) {
1003     ## TODO: Namespace well-formedness error
1004     }
1005     }
1006     $gattr{$pfx}->{$lname} = $attrs->{$atqname};
1007     } else { ## Local attribute
1008     if ($pfx eq 'xmlns') {
1009     $ns->{''} = $attrs->{xmlns}->{value};
1010     delete $ns->{''} unless length $ns->{''};
1011     } else {
1012     $lattr{$pfx} = $attrs->{$atqname};
1013     }
1014     }
1015     }
1016    
1017     my ($pfx, $lname) = split /:/, $type;
1018     my $nsuri;
1019     ## TODO: lname is NCName?
1020     if (defined $lname) { ## Prefixed namespace
1021     if ($pfx eq '') {
1022     ## TODO: pfx is not NCName error
1023     }
1024     if (defined $ns->{$pfx}) {
1025     $nsuri = $ns->{$pfx};
1026     } else {
1027     ## TODO: namespace ill-formed
1028     }
1029     } else { ## Default namespace
1030     $nsuri = $ns->{''};
1031     }
1032    
1033     $el = $doc-><M::Document.createElementNS>
1034     ($nsuri, $type);
1035    
1036     if ($attrs->{xmlns}) {
1037     my $attr = $doc-><M::Document.createAttributeNS>
1038     (<Q::xmlns:>, 'xmlns');
1039     for (@{$attrs->{xmlns}->{nodes}}) {
1040     $attr-><M::Node.appendChild> ($_);
1041     }
1042     $el-><M::Element.setAttributeNodeNS> ($attr);
1043     }
1044    
1045     for my $lname (keys %lattr) {
1046     my $attr = $doc-><M::Document.createAttributeNS>
1047     (null, $lname);
1048     for (@{$lattr{$lname}->{nodes}}) {
1049     $attr-><M::Node.appendChild> ($_);
1050     }
1051     $el-><M::Element.setAttributeNodeNS> ($attr);
1052     }
1053    
1054     for my $pfx (keys %gattr) {
1055     for my $lname (keys %{$gattr{$pfx}}) {
1056     my $attr = $doc-><M::Document.createAttributeNS>
1057     ($ns->{$pfx}, $pfx.':'.$lname);
1058     for (@{$gattr{$pfx}->{$lname}->{nodes}}) {
1059     $attr-><M::Node.appendChild> ($_);
1060     }
1061     $el-><M::Element.setAttributeNodeNS> ($attr);
1062     }
1063     }
1064    
1065     $node-><M::Node.appendChild> ($el);
1066     }
1067    
1068     ~ (TAGC) {
1069     lang:Perl {
1070     push @{$nodes}, $node;
1071     $node = $el;
1072     }
1073     ?lexmode ElementContent;
1074 wakaba 1.6 } (NESTC) {
1075     my $is_docel;
1076 wakaba 1.1 lang:Perl {
1077     $ns = pop @{$nses};
1078     $type = pop @{$types};
1079 wakaba 1.6 $is_docel = (@{$types} == 0);
1080     }
1081    
1082     if-true ($is_docel) {
1083     return;
1084     }
1085    
1086     ~ (TAGC) {
1087     ?lexmode ElementContent;
1088     } else {
1089     ?lexmode ElementContent;
1090 wakaba 1.1 }
1091     } else {
1092     ?lexmode ElementContent;
1093     }
1094     } else {
1095     ?lexmode ElementContent;
1096     }
1097    
1098     } (ETAGO) {
1099     // End tag
1100    
1101     ?lexmode 'EndTag';
1102    
1103     my $is_docel;
1104    
1105     ~ (Name) {
1106 wakaba 1.3 lang:Perl ($name => $token.value) : has-error {
1107 wakaba 1.1 if ($name eq $type) {
1108     $type = pop @{$types};
1109     if ($type eq '') {
1110     $is_docel = true;
1111     }
1112     $node = pop @{$nodes};
1113     $ns = pop @{$nses};
1114     } else {
1115 wakaba 1.3 my $location;
1116     __CODE{xp|get-location-from-token::
1117     $token => $token,
1118     $result => $location,
1119     }__;
1120     my $continue = __DOMCore:ERROR{xp|wf-element-type-match::
1121     DOMCore:location => {$location},
1122     xp|token => {$token},
1123     xp|expected-element-type => {$type},
1124     xp|actual-element-type => {$name},
1125     xp|node => {$node},
1126     }__;
1127     unless ($continue) {
1128     __EXCEPTION{DOMLS|PARSE_ERR}__;
1129     }
1130     $self->{has_error} = true;
1131 wakaba 1.1 }
1132     }
1133     }
1134    
1135     ~? (S) { }
1136    
1137     if-true ($is_docel) {
1138 wakaba 1.3 lang:Perl : has-error {
1139 wakaba 1.1 if (@{$types}) {
1140 wakaba 1.3 my $location;
1141     __CODE{xp|get-location-from-token::
1142     $token => $token,
1143     $result => $location,
1144     }__;
1145     for my $type (reverse @{$types}) {
1146     my $continue = __DOMCore:ERROR{xp|wf-no-end-tag::
1147     DOMCore:location => {$location},
1148     xp|token => {$token},
1149     xp|expected-element-type => {$type},
1150     xp|node => {$node},
1151     }__;
1152     unless ($continue) {
1153     __EXCEPTION{DOMLS|PARSE_ERR}__;
1154     }
1155     $node = shift @{$nodes};
1156     }
1157     $self->{has_error} = true;
1158 wakaba 1.1 }
1159     }
1160     return;
1161     }
1162    
1163     ~ (TAGC) {
1164     ?lexmode ElementContent;
1165     } else {
1166     ?lexmode 'ElementContent';
1167     }
1168    
1169     } (HCRO) {
1170     &_HexadecimalCharacterReference_
1171     ($doc => $doc, $parent => $node);
1172    
1173     ~ (REFC) {
1174     ?lexmode 'ElementContent';
1175     } else {
1176     ?lexmode ElementContent;
1177     }
1178     } (CRO) {
1179     &_NumericCharacterReference_
1180     ($doc => $doc, $parent => $node);
1181    
1182     ~ (REFC) {
1183     ?lexmode 'ElementContent';
1184     } else {
1185     ?lexmode ElementContent;
1186     }
1187     } (ERO) {
1188 wakaba 1.6 &_GeneralEntityReferenceEC
1189     ($doc => $doc, $parent => $node, $ns => $ns);
1190 wakaba 1.1 } (CDO) {
1191     &_CommentDeclaration_ ($doc => $doc, $parent => $node);
1192    
1193     ~ (MDC) {
1194     ?lexmode ElementContent;
1195     } else {
1196     ?lexmode ElementContent;
1197     }
1198     } (CDSO) {
1199     &_CDATASection_ ($doc => $doc, $parent => $node);
1200    
1201     ~ (MSE) {
1202     ?lexmode 'ElementContent';
1203     } else {
1204     ?lexmode ElementContent;
1205     }
1206     } (PIO) {
1207     &_ProcessingInstruction_ ($doc => $doc, $parent => $node);
1208    
1209     ~ (PIC) {
1210     ?lexmode 'ElementContent';
1211     } else {
1212     ?lexmode ElementContent;
1213     }
1214     }
1215 wakaba 1.3
1216     ~ (#NONE) { }
1217 wakaba 1.1 } // Element_
1218    
1219     rule AttributeSpecificationList ($doc, $attrs)
1220     : standalone
1221     {
1222     ?lexmode 'StartTag';
1223    
1224     my $i;
1225     lang:Perl {
1226     $i = 0;
1227     }
1228    
1229     ~* (Name) {
1230     my $atqname;
1231     lang:Perl ($name => $token.value) {
1232     $atqname = $name;
1233     }
1234    
1235     my $vals;
1236     lang:Perl {
1237     if ($attrs->{$atqname}) {
1238 wakaba 1.3 my $location;
1239     __CODE{xp|get-location-from-token::
1240     $token => $token,
1241     $result => $location,
1242     }__;
1243     my $continue = __DOMCore:ERROR{xp|wf-unique-att-spec::
1244     DOMCore:location => {$location},
1245     xp|token => {$token},
1246     xp|name => {$atqname},
1247     }__;
1248     unless ($continue) {
1249     __EXCEPTION{DOMLS|PARSE_ERR}__;
1250     }
1251     $self->{has_error} = true;
1252 wakaba 1.1 }
1253    
1254     $vals = $attrs->{$atqname} = {
1255     nodes => [],
1256     value => '',
1257     index => $i++,
1258     };
1259     }
1260 wakaba 1.3
1261     ~? (S) { }
1262     ~ (VI) { }
1263     ~? (S) { }
1264 wakaba 1.1
1265     ~ (LIT) {
1266     &_AttributeValueSpecification_
1267     ($doc => $doc, $vals => $vals);
1268    
1269     ~ (LIT) {
1270     ?lexmode StartTag;
1271     } else {
1272     ?lexmode StartTag;
1273     }
1274     } (LITA) {
1275     &_AttributeValueSpecificationA_
1276     ($doc => $doc, $vals => $vals);
1277    
1278     ~ (LITA) {
1279     ?lexmode StartTag;
1280     } else {
1281     ?lexmode StartTag;
1282     }
1283     }
1284     } (S) : separator : terminator? { }
1285     } // AttributeSpecificationList
1286    
1287     rule _AttributeValueSpecification_ ($doc, $vals) {
1288     // ~ (LIT) { }
1289     ?lexmode 'AttributeValueLiteral';
1290    
1291     ~* (STRING) {
1292     lang:Perl ($value => $token.value) {
1293     $value =~ s/[\x09\x0A\x0D]/ /g;
1294     my $text = $doc-><M::Document.createTextNode> ($value);
1295     push @{$vals->{nodes}}, $text;
1296     $vals->{value} .= $value;
1297     }
1298     } (HCRO) {
1299     &_HexadecimalCharacterReferenceV_
1300     ($doc => $doc, $vals => $vals);
1301    
1302     ~ (REFC) {
1303     ?lexmode AttributeValueLiteral;
1304     } else {
1305     ?lexmode AttributeValueLiteral;
1306     }
1307     } (CRO) {
1308     &_NumericCharacterReferenceV_
1309     ($doc => $doc, $vals => $vals);
1310    
1311     ~ (REFC) {
1312     ?lexmode AttributeValueLiteral;
1313     } else {
1314     ?lexmode AttributeValueLiteral;
1315     }
1316     } (ERO) {
1317     // TODO: Attribute value normalization
1318     &_GeneralEntityReferenceV_
1319     ($doc => $doc, $vals => $vals);
1320    
1321     ~ (REFC) {
1322     ?lexmode AttributeValueLiteral;
1323     } else {
1324     ?lexmode AttributeValueLiteral;
1325     }
1326     }
1327    
1328     // ~ (LIT) { } (LITA) { }
1329     } // _AttributeValueSpecification_
1330    
1331     rule _AttributeValueSpecificationA_ ($doc, $vals) {
1332     // ~ (LITA) { }
1333     ?lexmode 'AttributeValueLiteralA';
1334    
1335     ~* (STRING) {
1336     lang:Perl ($value => $token.value) {
1337     $value =~ s/[\x09\x0A\x0D]/ /g;
1338     my $text = $doc-><M::Document.createTextNode> ($value);
1339     push @{$vals->{nodes}}, $text;
1340     $vals->{value} .= $value;
1341     }
1342     } (HCRO) {
1343     &_HexadecimalCharacterReferenceV_
1344     ($doc => $doc, $vals => $vals);
1345    
1346     ~ (REFC) {
1347     ?lexmode AttributeValueLiteralA;
1348     } else {
1349     ?lexmode AttributeValueLiteralA;
1350     }
1351     } (CRO) {
1352     &_NumericCharacterReferenceV_
1353     ($doc => $doc, $vals => $vals);
1354    
1355     ~ (REFC) {
1356     ?lexmode AttributeValueLiteralA;
1357     } else {
1358     ?lexmode AttributeValueLiteralA;
1359     }
1360     } (ERO) {
1361     // TODO: Attribute value normalization
1362     &_GeneralEntityReferenceV_
1363     ($doc => $doc, $vals => $vals);
1364    
1365     ~ (REFC) {
1366     ?lexmode AttributeValueLiteralA;
1367     } else {
1368     ?lexmode AttributeValueLiteralA;
1369     }
1370     }
1371    
1372     // ~ (LITA) { }
1373     } // _AttributeValueSpecificationA_
1374    
1375     /*
1376     CDATA Section Content Parsing Mode
1377     */
1378     rule _CDATASection_ ($doc, $parent) {
1379     ?lexmode 'CDATASectionContent';
1380    
1381     my $cdata;
1382    
1383 wakaba 1.2 ~? (CData) {
1384 wakaba 1.1 lang:Perl ($data => $token.value) {
1385     $cdata = $data;
1386     }
1387     } else {
1388     lang:Perl {
1389     $cdata = '';
1390     }
1391     }
1392    
1393     lang:Perl {
1394     my $cdsect = $doc-><M::Document.createCDATASection>
1395     ($cdata);
1396     $parent-><M::Node.appendChild> ($cdsect);
1397     }
1398    
1399     // ~ (MSE) { }
1400     } // _CDATASection_
1401    
1402     rule _NumericCharacterReference_ ($doc, $parent) {
1403     ?lexmode 'NumericCharacterReference';
1404    
1405     ~ (NUMBER) {
1406 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1407     $num += 0;
1408     unless (
1409     ($self->{xml_version} eq '1.0' and
1410     ((0x0020 <= $num and $num <= 0xD7FF) or
1411     (0xE000 <= $num and $num <= 0xFFFD) or
1412     (0x10000 <= $num and $num <= 0x10FFFF) or
1413     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1414     ($self->{xml_version} eq '1.1' and
1415     ((0x0001 <= $num and $num <= 0xD7FF) or
1416     (0xE000 <= $num and $num <= 0xFFFD) or
1417     (0x10000 <= $num and $num <= 0x10FFFF)))
1418     ) {
1419     my $location;
1420     __CODE{xp|get-location-from-token::
1421     $token => $token,
1422     $result => $location,
1423     }__;
1424     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1425     DOMCore:location => {$location},
1426     xp|token => {$token},
1427     xp|character-number => {$num},
1428     xp|parent => {$parent},
1429     }__;
1430     unless ($continue) {
1431     __EXCEPTION{DOMLS|PARSE_ERR}__;
1432     }
1433     $self->{has_error} = true;
1434     }
1435     my $ncr = $doc-><M::Document.createTextNode> (chr $num);
1436 wakaba 1.1 $parent-><M::Node.appendChild> ($ncr);
1437     }
1438     }
1439    
1440     // ~ (REFC) { }
1441     } // _NumericCharacterReference_
1442    
1443     rule _NumericCharacterReferenceV_ ($doc, $vals) {
1444     ?lexmode 'NumericCharacterReference';
1445    
1446     ~ (NUMBER) {
1447 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1448     $num += 0;
1449     unless (
1450     ($self->{xml_version} eq '1.0' and
1451     ((0x0020 <= $num and $num <= 0xD7FF) or
1452     (0xE000 <= $num and $num <= 0xFFFD) or
1453     (0x10000 <= $num and $num <= 0x10FFFF) or
1454     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1455     ($self->{xml_version} eq '1.1' and
1456     ((0x0001 <= $num and $num <= 0xD7FF) or
1457     (0xE000 <= $num and $num <= 0xFFFD) or
1458     (0x10000 <= $num and $num <= 0x10FFFF)))
1459     ) {
1460     my $location;
1461     __CODE{xp|get-location-from-token::
1462     $token => $token,
1463     $result => $location,
1464     }__;
1465     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1466     DOMCore:location => {$location},
1467     xp|token => {$token},
1468     xp|character-number => {$num},
1469     }__;
1470     unless ($continue) {
1471     __EXCEPTION{DOMLS|PARSE_ERR}__;
1472     }
1473     $self->{has_error} = true;
1474     }
1475 wakaba 1.1 my $ncr = $doc-><M::Document.createTextNode>
1476     (my $char = chr (0+$num));
1477     push @{$vals->{nodes}}, $ncr;
1478     $vals->{value} .= $char;
1479     }
1480     }
1481    
1482     // ~ (REFC) { }
1483     } // _NumericCharacterReferenceV_
1484    
1485     rule _HexadecimalCharacterReference_ ($doc, $parent) {
1486     ?lexmode 'HexadecimalCharacterReference';
1487    
1488     ~ (Hex) {
1489 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1490     $num += 0;
1491     unless (
1492     ($self->{xml_version} eq '1.0' and
1493     ((0x0020 <= $num and $num <= 0xD7FF) or
1494     (0xE000 <= $num and $num <= 0xFFFD) or
1495     (0x10000 <= $num and $num <= 0x10FFFF) or
1496     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1497     ($self->{xml_version} eq '1.1' and
1498     ((0x0001 <= $num and $num <= 0xD7FF) or
1499     (0xE000 <= $num and $num <= 0xFFFD) or
1500     (0x10000 <= $num and $num <= 0x10FFFF)))
1501     ) {
1502     my $location;
1503     __CODE{xp|get-location-from-token::
1504     $token => $token,
1505     $result => $location,
1506     }__;
1507     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1508     DOMCore:location => {$location},
1509     xp|token => {$token},
1510     xp|character-number => {$num},
1511     xp|parent => {$parent},
1512     }__;
1513     unless ($continue) {
1514     __EXCEPTION{DOMLS|PARSE_ERR}__;
1515     }
1516     $self->{has_error} = true;
1517     }
1518 wakaba 1.1 my $ncr = $doc-><M::Document.createTextNode>
1519     (chr hex $num);
1520     $parent-><M::Node.appendChild> ($ncr);
1521     }
1522     }
1523    
1524     // ~ (REFC) { }
1525     } // _HexadecimalCharacterReference_
1526    
1527 wakaba 1.3 rule _HexadecimalCharacterReferenceV_ ($doc, $vals) {
1528 wakaba 1.1 ?lexmode 'HexadecimalCharacterReference';
1529    
1530     ~ (Hex) {
1531 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1532     $num += 0;
1533     unless (
1534     ($self->{xml_version} eq '1.0' and
1535     ((0x0020 <= $num and $num <= 0xD7FF) or
1536     (0xE000 <= $num and $num <= 0xFFFD) or
1537     (0x10000 <= $num and $num <= 0x10FFFF) or
1538     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1539     ($self->{xml_version} eq '1.1' and
1540     ((0x0001 <= $num and $num <= 0xD7FF) or
1541     (0xE000 <= $num and $num <= 0xFFFD) or
1542     (0x10000 <= $num and $num <= 0x10FFFF)))
1543     ) {
1544     my $location;
1545     __CODE{xp|get-location-from-token::
1546     $token => $token,
1547     $result => $location,
1548     }__;
1549     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1550     DOMCore:location => {$location},
1551     xp|token => {$token},
1552     xp|character-number => {$num},
1553     }__;
1554     unless ($continue) {
1555     __EXCEPTION{DOMLS|PARSE_ERR}__;
1556     }
1557     $self->{has_error} = true;
1558     }
1559 wakaba 1.1 my $ncr = $doc-><M::Document.createTextNode>
1560     (my $char = chr hex $num);
1561     push @{$vals->{nodes}}, $ncr;
1562     $vals->{value} .= $char;
1563     }
1564     }
1565    
1566     // ~ (REFC) { }
1567     } // _HexadecimalCharacterReferenceV_
1568    
1569 wakaba 1.6 /*
1570     General entity reference in element's content
1571     */
1572     rule _GeneralEntityReferenceEC ($doc, $parent, $ns)
1573     : recursive
1574     {
1575 wakaba 1.1 ?lexmode 'EntityReference';
1576    
1577     ~ (Name) {
1578 wakaba 1.6 my $er;
1579 wakaba 1.1 lang:Perl ($name => $token.value) {
1580     ## TODO: Namespace well-formedness
1581     ## TODO: Entity declared constraints
1582 wakaba 1.6 $er = $doc-><M::Document.createEntityReference>
1583 wakaba 1.1 ($name);
1584     $parent-><M::Node.appendChild> ($er);
1585 wakaba 1.6 push @{$self->{entity}}, <Code::getCopyOfEntityState::
1586     $entity_type = 'general_entity',
1587     $entity_name = $name>;
1588     push @{$self->{entity_token}}, $self->{token};
1589     $self->{token} = [];
1590     push @{$self->{entity_char}}, $self->{char};
1591     $self->{char} = [];
1592     }
1593    
1594     ?lexmode ElementContent;
1595     ~* (CharData) {
1596     lang:Perl ($data => $token.value) {
1597     $er-><M::Node.appendChild>
1598     ($doc-><M::Document.createTextNode> ($data));
1599     }
1600     } (STAGO) {
1601     &Element_ ($doc => $doc, $parent => $er, $ns => $ns)
1602     : unshift-current-token;
1603     ~ (TAGC) {
1604     ?lexmode ElementContent;
1605     } else {
1606     ?lexmode ElementContent;
1607     }
1608     } (HCRO) {
1609     &_HexadecimalCharacterReference_
1610     ($doc => $doc, $parent => $er);
1611    
1612     ~ (REFC) {
1613     ?lexmode 'ElementContent';
1614     } else {
1615     ?lexmode ElementContent;
1616     }
1617     } (CRO) {
1618     &_NumericCharacterReference_
1619     ($doc => $doc, $parent => $er);
1620    
1621     ~ (REFC) {
1622     ?lexmode 'ElementContent';
1623     } else {
1624     ?lexmode ElementContent;
1625     }
1626     } (ERO) {
1627     &_GeneralEntityReferenceEC
1628     ($doc => $doc, $parent => $er, $ns => $ns);
1629     } (CDO) {
1630     &_CommentDeclaration_ ($doc => $doc, $parent => $er);
1631    
1632     ~ (MDC) {
1633     ?lexmode ElementContent;
1634     } else {
1635     ?lexmode ElementContent;
1636     }
1637     } (CDSO) {
1638     &_CDATASection_ ($doc => $doc, $parent => $er);
1639    
1640     ~ (MSE) {
1641     ?lexmode 'ElementContent';
1642     } else {
1643     ?lexmode ElementContent;
1644     }
1645     } (PIO) {
1646     &_ProcessingInstruction_ ($doc => $doc, $parent => $er);
1647    
1648     ~ (PIC) {
1649     ?lexmode 'ElementContent';
1650     } else {
1651     ?lexmode ElementContent;
1652     }
1653 wakaba 1.1 }
1654 wakaba 1.6
1655     ~ (#EOF) { }
1656     lang:Perl {
1657     $self->{token} = pop @{$self->{entity_token}};
1658     $self->{char} = pop @{$self->{entity_char}};
1659     pop @{$self->{entity}};
1660     }
1661    
1662     ?lexmode EntityReference;
1663     ?requires-next-token;
1664 wakaba 1.1 }
1665    
1666 wakaba 1.6 ~ (REFC) {
1667     ?lexmode ElementContent;
1668     } else {
1669     ?lexmode ElementContent;
1670     }
1671     } // _GeneralEntityReferenceEC
1672 wakaba 1.1
1673     rule _GeneralEntityReferenceV_ ($doc, $vals) {
1674     // TODO: Expansion
1675     ?lexmode 'EntityReference';
1676    
1677     ~ (Name) {
1678     lang:Perl ($name => $token.value) {
1679     ## TODO: Namespace well-formedness
1680     ## TODO: Entity declared constraints
1681     my $er = $doc-><M::Document.createEntityReference>
1682     ($name);
1683     push @{$vals->{nodes}}, $er;
1684     }
1685     }
1686    
1687     // ~ (REFC) { }
1688     } // _GeneralEntityReferenceV_
1689 wakaba 1.6
1690     /*
1691     General entity reference in literal entity value
1692     */
1693     rule _GeneralEntityReferenceEV_ ($doc, $vals) {
1694     ?lexmode 'EntityReference';
1695    
1696     ~ (Name) {
1697     lang:Perl ($name => $token.value) {
1698     ## TODO: Namespace well-formedness
1699     ## TODO: Entity declared constraints
1700     push @$vals, $name;
1701     }
1702     }
1703    
1704     // ~ (REFC) { }
1705     } // _GeneralEntityReferenceEV_
1706 wakaba 1.1
1707     /*
1708 wakaba 1.4 Document Type Declaration
1709 wakaba 1.1 */
1710 wakaba 1.4 rule _DocumentTypeDeclaration_ ($doc) {
1711     ?lexmode MarkupDeclaration;
1712    
1713     ~ (Name == 'DOCTYPE') { }
1714    
1715     ~ (S) { }
1716    
1717 wakaba 1.6 my $node;
1718 wakaba 1.4 // Document type name
1719 wakaba 1.6 my $name;
1720 wakaba 1.4 ~ (Name) {
1721 wakaba 1.6 lang:Perl ($v => $token.value) {
1722     $name = $v;
1723     }
1724     }
1725     lang:Perl {
1726     $node = $doc-><M::DocumentXDoctype.createDocumentTypeDefinition> ($name);
1727     $doc-><M::Node.appendChild> ($node);
1728 wakaba 1.4 }
1729    
1730     ~? (S) {
1731     ~? (Name == 'PUBLIC') {
1732     ~ (S) { }
1733    
1734     &PubidLiteral ($doc => $doc);
1735    
1736     ~ (S) { }
1737    
1738     &SystemLiteral ($doc => $doc);
1739    
1740     ~? (S) { }
1741     } (Name == 'SYSTEM') {
1742     ~ (S) { }
1743    
1744     &SystemLiteral ($doc => $doc);
1745    
1746     ~? (S) { }
1747     }
1748     }
1749    
1750     ~? (DSO) {
1751 wakaba 1.6 &InternalSubset ($doc => $doc, $doctype => $node);
1752 wakaba 1.4
1753     ~ (DSC) {
1754     ?lexmode MarkupDeclaration;
1755     } else {
1756     ?lexmode MarkupDeclaration;
1757     }
1758    
1759     ~? (S) { }
1760     }
1761    
1762     // TODO: set $self->{standalone} true if only internal subset
1763     // with no param ref
1764    
1765     // ~ (MDC) { }
1766     } // _DocumentTypeDeclaration_
1767    
1768     rule PubidLiteral ($doc) {
1769     ~ (LIT) {
1770     ?lexmode SystemLiteral;
1771    
1772     ~? (STRING) {
1773     // TODO: Charrange check & normalization is required
1774    
1775     }
1776    
1777     ~ (LIT) {
1778     ?lexmode MarkupDeclaration;
1779     } else {
1780     ?lexmode MarkupDeclaration;
1781     }
1782     } (LITA) {
1783     ?lexmode SystemLiteralA;
1784    
1785     ~? (STRING) {
1786     // TODO: Charrange check & normalization is required
1787    
1788     }
1789    
1790     ~ (LITA) {
1791     ?lexmode MarkupDeclaration;
1792     } else {
1793     ?lexmode MarkupDeclaration;
1794     }
1795     }
1796     } // PubidLiteral
1797    
1798     rule SystemLiteral ($doc) {
1799     ~ (LIT) {
1800     ?lexmode SystemLiteral;
1801     &_SystemLiteral ($doc => $doc);
1802     } (LITA) {
1803     ?lexmode SystemLiteralA;
1804     &_SystemLiteral ($doc => $doc);
1805     }
1806     } // SystemLiteral
1807    
1808     rule _SystemLiteral ($doc) {
1809     ~? (STRING) {
1810    
1811     }
1812    
1813     ~ (LIT) {
1814     ?lexmode MarkupDeclaration;
1815     } (LITA) {
1816     ?lexmode MarkupDeclaration;
1817     } else {
1818     ?lexmode MarkupDeclaration;
1819     }
1820     } // _SystemLiteral
1821    
1822     /*
1823     DTD Internal Subset
1824    
1825     intSubset := *(markupdecl / DeclSep) ;; [28b]
1826     */
1827 wakaba 1.6 rule InternalSubset ($doc, $doctype) {
1828 wakaba 1.4 ?lexmode DTD;
1829    
1830     ~* (MDO) {
1831     ?lexmode MarkupDeclaration;
1832    
1833     ~ (Name == 'ELEMENT') {
1834     &_ElementDeclaration ($doc => $doc);
1835     } (Name == 'ATTLIST') {
1836     &_AttlistDeclaration ($doc => $doc);
1837     } (Name == 'ENTITY') {
1838     &_EntityDeclaration ($doc => $doc);
1839     } (Name == 'NOTATION') {
1840     &_NotationDeclaration ($doc => $doc);
1841     }
1842     } (S) {
1843     //
1844     } (CDO) {
1845     &_CommentDeclarationDTD ($doc => $doc);
1846     } (PIO) {
1847 wakaba 1.6 &_ProcessingInstructionDTD ($doc => $doc, $doctype => $doctype);
1848 wakaba 1.4 } (PERO) {
1849     ?lexmode EntityReference;
1850    
1851     ~ (Name) {
1852    
1853     }
1854    
1855     ~ (REFC) {
1856     ?lexmode DTD;
1857     } else {
1858     ?lexmode DTD;
1859     }
1860     }
1861     } // InternalSubset
1862    
1863     rule _ElementDeclaration ($doc) {
1864     // ~ (MDO) { }
1865     // ?lexmode MarkupDeclaration
1866     // ~ (Name == 'Element') { }
1867    
1868     ~ (S) { }
1869    
1870     ~ (Name) {
1871    
1872     }
1873    
1874     ?lexmode ElementDeclaration;
1875    
1876     ~ (S) { }
1877    
1878     // contentspec
1879     ~ (MGO) {
1880     &_ContentModel ($doc => $doc);
1881     } (Name == 'EMPTY') {
1882    
1883     } (Name == 'ANY') {
1884    
1885     }
1886    
1887     ~? (S) { }
1888    
1889     ~ (MDC) {
1890     ?lexmode DTD;
1891     } else {
1892     ?lexmode DTD;
1893     }
1894     } // _ElementDeclaration
1895    
1896     rule _ContentModel ($doc) {
1897     // ~ (MGO) { }
1898    
1899     ~? (S) { }
1900    
1901     ~ (Name) {
1902     // Element content model
1903     &_ModelGroup ($doc => $doc)
1904     : unshift-current-token;
1905    
1906     ~? (OPT) {
1907    
1908     } (REP) {
1909    
1910     } (PLUS) {
1911    
1912     }
1913    
1914     } (MDO) {
1915     &_ModelGroup ($doc => $doc)
1916     : unshift-current-token;
1917    
1918     ~? (OPT) {
1919    
1920     } (REP) {
1921    
1922     } (PLUS) {
1923    
1924     }
1925    
1926     } (PCDATA) {
1927     // Mixed content declaration
1928     ~? (S) { }
1929    
1930     ~* (OR) {
1931     ~? (S) { }
1932    
1933     ~ (Name) {
1934    
1935     }
1936    
1937     ~? (S) { }
1938     }
1939    
1940     ~ (MGC) { }
1941    
1942     ~? (REP) {
1943    
1944     } else {
1945     // TODO: error if |Name|
1946     }
1947    
1948    
1949     }
1950     } // _ContentModel
1951    
1952     rule _ModelGroup ($doc)
1953     : standalone
1954     : recursive
1955     {
1956     // ~ (MGO) { }
1957     // ~? (S) { }
1958    
1959     &ContentParticle ($doc => $doc);
1960    
1961     ~? (S) { }
1962    
1963     ~? (OR) {
1964     ~? (S) { }
1965    
1966     &ContentParticle ($doc => $doc);
1967    
1968     ~? (S) { }
1969    
1970     ~* (OR) {
1971     ~? (S) { }
1972    
1973     &ContentParticle ($doc => $doc);
1974    
1975     ~? (S) { }
1976     }
1977    
1978     } (SEQ) {
1979     ~? (S) { }
1980    
1981     &ContentParticle ($doc => $doc);
1982    
1983     ~? (S) { }
1984    
1985     ~* (SEQ) {
1986     ~? (S) { }
1987    
1988     &ContentParticle ($doc => $doc);
1989    
1990     ~? (S) { }
1991     }
1992    
1993     }
1994    
1995     ~ (MGC) { }
1996     } // _ModelGroup
1997    
1998     rule ContentParticle ($doc) {
1999     ~ (Name) {
2000    
2001     } (MGO) {
2002     ~? (S) { }
2003    
2004     &_ModelGroup ($doc => $doc);
2005     }
2006    
2007     ~? (OPT) {
2008    
2009     } (REP) {
2010    
2011     } (PLUS) {
2012    
2013     }
2014     } // ContentParticle
2015    
2016     rule _AttlistDeclaration ($doc) {
2017     // ~ (MDO) { }
2018     // ~ (Name == 'ATTLIST') { }
2019    
2020     ?lexmode AttlistDeclaration;
2021    
2022     ~ (S) { }
2023    
2024     ~ (Name) {
2025    
2026     }
2027    
2028     ~? (S) { }
2029    
2030     ~* (Name) {
2031    
2032     ~ (S) { }
2033    
2034     // AttType
2035     ~ (Name == 'NOTATION') {
2036     ~ (S) { }
2037    
2038     ~ (EGO) {
2039     ~? (S) { }
2040    
2041     ~ (Name) {
2042    
2043     ~? (S) { }
2044     } (OR) : separator {
2045     ~? (S) { }
2046     }
2047    
2048     ~ (EGC) { }
2049     }
2050     } (Name) {
2051     // TODO: check keyword
2052    
2053     } (EGO) {
2054     ?lexmode Enumeration;
2055    
2056     ~? (S) { }
2057    
2058     ~ (Nmtoken) {
2059    
2060     ~? (S) { }
2061     } (OR) : separator {
2062     ~? (S) { }
2063     }
2064    
2065     ~ (EGC) {
2066     ?lexmode AttlistDeclaration;
2067     } else {
2068     ?lexmode AttlistDeclaration;
2069     }
2070    
2071     }
2072    
2073     ~ (S) { }
2074    
2075     // DefaultDecl
2076     ~ (RNI) {
2077     ~ (Name == 'REQUIRED') {
2078    
2079     } (Name == 'IMPLIED') {
2080    
2081     } (Name == 'FIXED') {
2082     ~ (S) { }
2083    
2084     ~ (LIT) {
2085     my $vals;
2086     lang:Perl {
2087     $vals = [];
2088     }
2089    
2090     &_AttributeValueSpecification_ ($doc => $doc, $vals => $vals);
2091    
2092     ~ (LIT) {
2093     ?lexmode AttlistDeclaration;
2094     } else {
2095     ?lexmode AttlistDeclaration;
2096     }
2097     } (LITA) {
2098     my $vals;
2099     lang:Perl {
2100     $vals = [];
2101     }
2102    
2103     &_AttributeValueSpecificationA_ ($doc => $doc, $vals => $vals);
2104    
2105     ~ (LIT) {
2106     ?lexmode AttlistDeclaration;
2107     } else {
2108     ?lexmode AttlistDeclaration;
2109     }
2110     }
2111     }
2112    
2113     } (LIT) {
2114     my $vals;
2115     lang:Perl {
2116     $vals = [];
2117     }
2118    
2119     &_AttributeValueSpecification_ ($doc => $doc, $vals => $vals);
2120    
2121     ~ (LIT) {
2122     ?lexmode AttlistDeclaration;
2123     } else {
2124     ?lexmode AttlistDeclaration;
2125     }
2126     } (LITA) {
2127     my $vals;
2128     lang:Perl {
2129     $vals = [];
2130     }
2131    
2132     &_AttributeValueSpecificationA_ ($doc => $doc, $vals => $vals);
2133    
2134     ~ (LITA) {
2135     ?lexmode AttlistDeclaration;
2136     } else {
2137     ?lexmode AttlistDeclaration;
2138     }
2139     }
2140    
2141     } (S) : separator : terminator? {
2142     //
2143     }
2144    
2145     ~ (MDC) {
2146     ?lexmode DTD;
2147     } else {
2148     ?lexmode DTD;
2149     }
2150     } // _AttlistDeclaration
2151    
2152     rule _EntityDeclaration ($doc) {
2153     // ~ (MDO) { }
2154     // ~ (Name == ENTITY) { }
2155    
2156     ~ (S) { }
2157    
2158 wakaba 1.6 my $decl;
2159     lang:Perl { $decl = {}; }
2160    
2161 wakaba 1.4 ~? (PERO) {
2162     ~ (S) { }
2163 wakaba 1.6 lang:Perl {
2164     $decl->{is_param_entity} = true;
2165     }
2166 wakaba 1.4 }
2167    
2168     ~ (Name) {
2169 wakaba 1.6 lang:Perl ($v => $token.value) {
2170     $decl->{name} = $v;
2171     }
2172 wakaba 1.4 }
2173    
2174     ~ (S) { }
2175    
2176     ~ (LIT) {
2177 wakaba 1.6 &_EntityValue ($doc => $doc, $decl => $decl);
2178 wakaba 1.4 } (LITA) {
2179 wakaba 1.6 &_EntityValueA ($doc => $doc, $decl => $decl);
2180 wakaba 1.4 } (Name == 'PUBLIC') {
2181     ~ (S) { }
2182    
2183 wakaba 1.6 &PubidLiteral ($doc => $doc, $decl => $decl);
2184 wakaba 1.4
2185     ~ (S) { }
2186    
2187 wakaba 1.6 &SystemLiteral ($doc => $doc, $decl => $decl);
2188 wakaba 1.4 } (Name == 'SYSTEM') {
2189     ~ (S) { }
2190    
2191 wakaba 1.6 &SystemLiteral ($doc => $doc, $decl => $decl);
2192 wakaba 1.4 }
2193    
2194     ~? (S) {
2195     ~? (Name == 'NDATA') {
2196     // TODO: error if parameter entity
2197    
2198     ~ (S) { }
2199    
2200     ~ (Name) {
2201 wakaba 1.6 lang:Perl ($v => $token.value) {
2202     $decl->{notation} = $v;
2203     }
2204 wakaba 1.4 }
2205    
2206     ~? (S) { }
2207     }
2208     }
2209    
2210 wakaba 1.6 lang:Perl {
2211     if ($self->{$decl->{is_param_entity} ? 'param_entity' : 'general_entity'}
2212     ->{$decl->{name}}) {
2213     ## TODO: error
2214     } else {
2215     $self->{$decl->{is_param_entity} ? 'param_entity' : 'general_entity'}
2216     ->{$decl->{name}} = $decl;
2217     }
2218     }
2219    
2220 wakaba 1.4 ~ (MDC) {
2221     ?lexmode DTD;
2222     } else {
2223     ?lexmode DTD;
2224     }
2225     } // _EntityDeclaration
2226    
2227     rule _NotationDeclaration ($doc) {
2228     // ~ (MDO) { }
2229     // ~ (Name == NOTATION) { }
2230    
2231     ~ (S) { }
2232    
2233     ~ (Name) {
2234    
2235     }
2236    
2237     ~ (S) { }
2238    
2239     ~ (Name == 'PUBLIC') {
2240     ~ (S) { }
2241    
2242     &PubidLiteral ($doc => $doc);
2243    
2244     ~? (S) {
2245     ~? (LIT) {
2246     ?lexmode SystemLiteral;
2247     &_SystemLiteral ($doc => $doc);
2248    
2249     ~? (S) { }
2250     } (LITA) {
2251     ?lexmode SystemLiteralA;
2252     &_SystemLiteral ($doc => $doc);
2253    
2254     ~? (S) { }
2255     }
2256     }
2257     } (Name == 'SYSTEM') {
2258     ~ (S) { }
2259    
2260     &SystemLiteral ($doc => $doc);
2261    
2262     ~? (S) { }
2263     }
2264    
2265     ~ (MDC) {
2266     ?lexmode DTD;
2267     } else {
2268     ?lexmode DTD;
2269     }
2270     } // _NotationDeclaration
2271    
2272 wakaba 1.6 rule _EntityValue ($doc, $decl) {
2273 wakaba 1.4 ?lexmode EntityValue;
2274    
2275     my $vals;
2276 wakaba 1.6 my $reptxt;
2277 wakaba 1.4 lang:Perl {
2278     $vals = [];
2279 wakaba 1.6 $reptxt = '';
2280 wakaba 1.4 }
2281    
2282     ~* (STRING) {
2283 wakaba 1.6 lang:Perl ($v => $token.value) {
2284     $reptxt .= $v;
2285     }
2286 wakaba 1.4 } (PERO) {
2287     ?lexmode EntityDeclaration;
2288 wakaba 1.6
2289     // TODO: Expand or wferror if internal subset
2290 wakaba 1.4
2291     ~ (Name) {
2292    
2293     }
2294    
2295     ~ (REFC) {
2296     ?lexmode EntityValue;
2297     } else {
2298     ?lexmode EntityValue;
2299     }
2300     } (HCRO) {
2301     &_HexadecimalCharacterReferenceV_
2302     ($doc => $doc, $vals => $vals);
2303    
2304 wakaba 1.6 lang:Perl {
2305     $reptxt .= $vals->[0]-><AG::Node.textContent>;
2306     $vals = [];
2307     }
2308    
2309 wakaba 1.4 ~ (REFC) {
2310     ?lexmode EntityValue;
2311     } else {
2312     ?lexmode EntityValue;
2313     }
2314     } (CRO) {
2315     &_NumericCharacterReferenceV_
2316     ($doc => $doc, $vals => $vals);
2317 wakaba 1.6
2318     lang:Perl {
2319     $reptxt .= $vals->[0]-><AG::Node.textContent>;
2320     $vals = [];
2321     }
2322 wakaba 1.4
2323     ~ (REFC) {
2324     ?lexmode EntityValue;
2325     } else {
2326     ?lexmode EntityValue;
2327     }
2328     } (ERO) {
2329 wakaba 1.6 &_GeneralEntityReferenceEV_
2330 wakaba 1.4 ($doc => $doc, $vals => $vals);
2331    
2332 wakaba 1.6 lang:Perl {
2333     $reptxt .= '&' . $vals->[0] . ';';
2334     $vals = [];
2335     }
2336    
2337 wakaba 1.4 ~ (REFC) {
2338     ?lexmode EntityValue;
2339     } else {
2340     ?lexmode EntityValue;
2341     }
2342     }
2343    
2344     ~ (LIT) {
2345     ?lexmode MarkupDeclaration;
2346     } (LITA) {
2347     ?lexmode MarkupDeclaration;
2348     } else {
2349     ?lexmode MarkupDeclaration;
2350     }
2351 wakaba 1.6
2352     lang:Perl {
2353     $decl->{reptxt} = \$reptxt;
2354     }
2355 wakaba 1.4 } // _EntityValue
2356    
2357 wakaba 1.6 rule _EntityValueA ($doc, $decl) {
2358 wakaba 1.4 ?lexmode EntityValueA;
2359    
2360     my $vals;
2361 wakaba 1.6 my $reptxt;
2362 wakaba 1.4 lang:Perl {
2363     $vals = [];
2364 wakaba 1.6 $reptxt = '';
2365 wakaba 1.4 }
2366    
2367     ~* (STRING) {
2368 wakaba 1.6 lang:Perl ($v => $token.value) {
2369     $reptxt .= $v;
2370     }
2371 wakaba 1.4 } (PERO) {
2372     ?lexmode EntityDeclaration;
2373    
2374 wakaba 1.6 // TODO: implement this
2375 wakaba 1.4 ~ (Name) {
2376    
2377     }
2378    
2379     ~ (REFC) {
2380     ?lexmode EntityValueA;
2381     } else {
2382     ?lexmode EntityValueA;
2383     }
2384     } (HCRO) {
2385     &_HexadecimalCharacterReferenceV_
2386     ($doc => $doc, $vals => $vals);
2387    
2388 wakaba 1.6 lang:Perl {
2389     $reptxt .= $vals->[0]-><AG::Node.textContent>;
2390     $vals = [];
2391     }
2392    
2393    
2394 wakaba 1.4 ~ (REFC) {
2395     ?lexmode EntityValueA;
2396     } else {
2397     ?lexmode EntityValueA;
2398     }
2399     } (CRO) {
2400     &_NumericCharacterReferenceV_
2401     ($doc => $doc, $vals => $vals);
2402    
2403 wakaba 1.6 lang:Perl {
2404     $reptxt .= $vals->[0]-><AG::Node.textContent>;
2405     $vals = [];
2406     }
2407    
2408 wakaba 1.4 ~ (REFC) {
2409     ?lexmode EntityValueA;
2410     } else {
2411     ?lexmode EntityValueA;
2412     }
2413     } (ERO) {
2414 wakaba 1.6 &_GeneralEntityReferenceEV_
2415 wakaba 1.4 ($doc => $doc, $vals => $vals);
2416    
2417 wakaba 1.6 lang:Perl {
2418     $reptxt .= '&' . $vals->[0] . ';';
2419     $vals = [];
2420     }
2421    
2422 wakaba 1.4 ~ (REFC) {
2423     ?lexmode EntityValueA;
2424     } else {
2425     ?lexmode EntityValueA;
2426     }
2427     }
2428    
2429     ~ (LITA) {
2430     ?lexmode MarkupDeclaration;
2431     } else {
2432     ?lexmode MarkupDeclaration;
2433     }
2434 wakaba 1.6
2435     lang:Perl {
2436     $decl->{reptxt} = \$reptxt;
2437     }
2438 wakaba 1.4 } // _EntityValueA
2439    
2440    
2441     /*
2442     XML Name
2443     */
2444     lexmode NameChar {
2445     $NameStartChar10 := [
2446     '_' ':'
2447     // Letter
2448     // BaseChar
2449     U+0041..U+005A U+0061..U+007A U+00C0..U+00D6
2450     U+00D8..U+00F6 U+00F8..U+00FF U+0100..U+0131
2451     U+0134..U+013E U+0141..U+0148 U+014A..U+017E
2452     U+0180..U+01C3 U+01CD..U+01F0 U+01F4..U+01F5
2453     U+01FA..U+0217 U+0250..U+02A8 U+02BB..U+02C1
2454     U+0386 U+0388..U+038A U+038C U+038E..U+03A1
2455     U+03A3..U+03CE U+03D0..U+03D6 U+03DA U+03DC
2456     U+03DE U+03E0 U+03E2..U+03F3 U+0401..U+040C
2457     U+040E..U+044F U+0451..U+045C U+045E..U+0481
2458     U+0490..U+04C4 U+04C7..U+04C8 U+04CB..U+04CC
2459     U+04D0..U+04EB U+04EE..U+04F5 U+04F8..U+04F9
2460     U+0531..U+0556 U+0559 U+0561..U+0586
2461     U+05D0..U+05EA U+05F0..U+05F2 U+0621..U+063A
2462     U+0641..U+064A U+0671..U+06B7 U+06BA..U+06BE
2463     U+06C0..U+06CE U+06D0..U+06D3 U+06D5
2464     U+06E5..U+06E6 U+0905..U+0939 U+093D
2465     U+0958..U+0961 U+0985..U+098C U+098F..U+0990
2466     U+0993..U+09A8 U+09AA..U+09B0 U+09B2
2467     U+09B6..U+09B9 U+09DC..U+09DD U+09DF..U+09E1
2468     U+09F0..U+09F1 U+0A05..U+0A0A U+0A0F..U+0A10
2469     U+0A13..U+0A28 U+0A2A..U+0A30 U+0A32..U+0A33
2470     U+0A35..U+0A36 U+0A38..U+0A39 U+0A59..U+0A5C
2471     U+0A5E U+0A72..U+0A74 U+0A85..U+0A8B U+0A8D
2472     U+0A8F..U+0A91 U+0A93..U+0AA8 U+0AAA..U+0AB0
2473     U+0AB2..U+0AB3 U+0AB5..U+0AB9 U+0ABD U+0AE0
2474     U+0B05..U+0B0C U+0B0F..U+0B10 U+0B13..U+0B28
2475     U+0B2A..U+0B30 U+0B32..U+0B33 U+0B36..U+0B39
2476     U+0B3D U+0B5C..U+0B5D U+0B5F..U+0B61
2477     U+0B85..U+0B8A U+0B8E..U+0B90 U+0B92..U+0B95
2478     U+0B99..U+0B9A U+0B9C U+0B9E..U+0B9F
2479     U+0BA3..U+0BA4 U+0BA8..U+0BAA U+0BAE..U+0BB5
2480     U+0BB7..U+0BB9 U+0C05..U+0C0C U+0C0E..U+0C10
2481     U+0C12..U+0C28 U+0C2A..U+0C33 U+0C35..U+0C39
2482     U+0C60..U+0C61 U+0C85..U+0C8C U+0C8E..U+0C90
2483     U+0C92..U+0CA8 U+0CAA..U+0CB3 U+0CB5..U+0CB9
2484     U+0CDE U+0CE0..U+0CE1 U+0D05..U+0D0C
2485     U+0D0E..U+0D10 U+0D12..U+0D28 U+0D2A..U+0D39
2486     U+0D60..U+0D61 U+0E01..U+0E2E U+0E30
2487     U+0E32..U+0E33 U+0E40..U+0E45 U+0E81..U+0E82
2488     U+0E84 U+0E87..U+0E88 U+0E8A U+0E8D
2489     U+0E94..U+0E97 U+0E99..U+0E9F U+0EA1..U+0EA3
2490     U+0EA5 U+0EA7 U+0EAA..U+0EAB U+0EAD..U+0EAE
2491     U+0EB0 U+0EB2..U+0EB3 U+0EBD U+0EC0..U+0EC4
2492     U+0F40..U+0F47 U+0F49..U+0F69 U+10A0..U+10C5
2493     U+10D0..U+10F6 U+1100 U+1102..U+1103
2494     U+1105..U+1107 U+1109 U+110B..U+110C
2495     U+110E..U+1112 U+113C U+113E U+1140 U+114C
2496     U+114E U+1150 U+1154..U+1155 U+1159
2497     U+115F..U+1161 U+1163 U+1165 U+1167 U+1169
2498     U+116D..U+116E U+1172..U+1173 U+1175 U+119E
2499     U+11A8 U+11AB U+11AE..U+11AF U+11B7..U+11B8
2500     U+11BA U+11BC..U+11C2 U+11EB U+11F0 U+11F9
2501     U+1E00..U+1E9B U+1EA0..U+1EF9 U+1F00..U+1F15
2502     U+1F18..U+1F1D U+1F20..U+1F45 U+1F48..U+1F4D
2503     U+1F50..U+1F57 U+1F59 U+1F5B U+1F5D
2504     U+1F5F..U+1F7D U+1F80..U+1FB4 U+1FB6..U+1FBC
2505     U+1FBE U+1FC2..U+1FC4 U+1FC6..U+1FCC
2506     U+1FD0..U+1FD3 U+1FD6..U+1FDB U+1FE0..U+1FEC
2507     U+1FF2..U+1FF4 U+1FF6..U+1FFC U+2126
2508     U+212A..U+212B U+212E U+2180..U+2182
2509     U+3041..U+3094 U+30A1..U+30FA U+3105..U+312C
2510     U+AC00..U+D7A3
2511     // Ideographic
2512     U+4E00..U+9FA5 U+3007 U+3021..U+3029
2513     ];
2514     $NameChar10 := [
2515     '.' '-' '_' ':'
2516     // Letter
2517     // BaseChar
2518     U+0041..U+005A U+0061..U+007A U+00C0..U+00D6
2519     U+00D8..U+00F6 U+00F8..U+00FF U+0100..U+0131
2520     U+0134..U+013E U+0141..U+0148 U+014A..U+017E
2521     U+0180..U+01C3 U+01CD..U+01F0 U+01F4..U+01F5
2522     U+01FA..U+0217 U+0250..U+02A8 U+02BB..U+02C1
2523     U+0386 U+0388..U+038A U+038C U+038E..U+03A1
2524     U+03A3..U+03CE U+03D0..U+03D6 U+03DA U+03DC
2525     U+03DE U+03E0 U+03E2..U+03F3 U+0401..U+040C
2526     U+040E..U+044F U+0451..U+045C U+045E..U+0481
2527     U+0490..U+04C4 U+04C7..U+04C8 U+04CB..U+04CC
2528     U+04D0..U+04EB U+04EE..U+04F5 U+04F8..U+04F9
2529     U+0531..U+0556 U+0559 U+0561..U+0586
2530     U+05D0..U+05EA U+05F0..U+05F2 U+0621..U+063A
2531     U+0641..U+064A U+0671..U+06B7 U+06BA..U+06BE
2532     U+06C0..U+06CE U+06D0..U+06D3 U+06D5
2533     U+06E5..U+06E6 U+0905..U+0939 U+093D
2534     U+0958..U+0961 U+0985..U+098C U+098F..U+0990
2535     U+0993..U+09A8 U+09AA..U+09B0 U+09B2
2536     U+09B6..U+09B9 U+09DC..U+09DD U+09DF..U+09E1
2537     U+09F0..U+09F1 U+0A05..U+0A0A U+0A0F..U+0A10
2538     U+0A13..U+0A28 U+0A2A..U+0A30 U+0A32..U+0A33
2539     U+0A35..U+0A36 U+0A38..U+0A39 U+0A59..U+0A5C
2540     U+0A5E U+0A72..U+0A74 U+0A85..U+0A8B U+0A8D
2541     U+0A8F..U+0A91 U+0A93..U+0AA8 U+0AAA..U+0AB0
2542     U+0AB2..U+0AB3 U+0AB5..U+0AB9 U+0ABD U+0AE0
2543     U+0B05..U+0B0C U+0B0F..U+0B10 U+0B13..U+0B28
2544     U+0B2A..U+0B30 U+0B32..U+0B33 U+0B36..U+0B39
2545     U+0B3D U+0B5C..U+0B5D U+0B5F..U+0B61
2546     U+0B85..U+0B8A U+0B8E..U+0B90 U+0B92..U+0B95
2547     U+0B99..U+0B9A U+0B9C U+0B9E..U+0B9F
2548     U+0BA3..U+0BA4 U+0BA8..U+0BAA U+0BAE..U+0BB5
2549     U+0BB7..U+0BB9 U+0C05..U+0C0C U+0C0E..U+0C10
2550     U+0C12..U+0C28 U+0C2A..U+0C33 U+0C35..U+0C39
2551     U+0C60..U+0C61 U+0C85..U+0C8C U+0C8E..U+0C90
2552     U+0C92..U+0CA8 U+0CAA..U+0CB3 U+0CB5..U+0CB9
2553     U+0CDE U+0CE0..U+0CE1 U+0D05..U+0D0C
2554     U+0D0E..U+0D10 U+0D12..U+0D28 U+0D2A..U+0D39
2555     U+0D60..U+0D61 U+0E01..U+0E2E U+0E30
2556     U+0E32..U+0E33 U+0E40..U+0E45 U+0E81..U+0E82
2557     U+0E84 U+0E87..U+0E88 U+0E8A U+0E8D
2558     U+0E94..U+0E97 U+0E99..U+0E9F U+0EA1..U+0EA3
2559     U+0EA5 U+0EA7 U+0EAA..U+0EAB U+0EAD..U+0EAE
2560     U+0EB0 U+0EB2..U+0EB3 U+0EBD U+0EC0..U+0EC4
2561     U+0F40..U+0F47 U+0F49..U+0F69 U+10A0..U+10C5
2562     U+10D0..U+10F6 U+1100 U+1102..U+1103
2563     U+1105..U+1107 U+1109 U+110B..U+110C
2564     U+110E..U+1112 U+113C U+113E U+1140 U+114C
2565     U+114E U+1150 U+1154..U+1155 U+1159
2566     U+115F..U+1161 U+1163 U+1165 U+1167 U+1169
2567     U+116D..U+116E U+1172..U+1173 U+1175 U+119E
2568     U+11A8 U+11AB U+11AE..U+11AF U+11B7..U+11B8
2569     U+11BA U+11BC..U+11C2 U+11EB U+11F0 U+11F9
2570     U+1E00..U+1E9B U+1EA0..U+1EF9 U+1F00..U+1F15
2571     U+1F18..U+1F1D U+1F20..U+1F45 U+1F48..U+1F4D
2572     U+1F50..U+1F57 U+1F59 U+1F5B U+1F5D
2573     U+1F5F..U+1F7D U+1F80..U+1FB4 U+1FB6..U+1FBC
2574     U+1FBE U+1FC2..U+1FC4 U+1FC6..U+1FCC
2575     U+1FD0..U+1FD3 U+1FD6..U+1FDB U+1FE0..U+1FEC
2576     U+1FF2..U+1FF4 U+1FF6..U+1FFC U+2126
2577     U+212A..U+212B U+212E U+2180..U+2182
2578     U+3041..U+3094 U+30A1..U+30FA U+3105..U+312C
2579     U+AC00..U+D7A3
2580     // Ideographic
2581     U+4E00..U+9FA5 U+3007 U+3021..U+3029
2582     // Digit
2583     U+0030..U+0039 U+0660..U+0669 U+06F0..U+06F9
2584     U+0966..U+096F U+09E6..U+09EF U+0A66..U+0A6F
2585     U+0AE6..U+0AEF U+0B66..U+0B6F U+0BE7..U+0BEF
2586     U+0C66..U+0C6F U+0CE6..U+0CEF U+0D66..U+0D6F
2587     U+0E50..U+0E59 U+0ED0..U+0ED9 U+0F20..U+0F29
2588     // CombiningChar
2589     U+0300..U+0345 U+0360..U+0361 U+0483..U+0486
2590     U+0591..U+05A1 U+05A3..U+05B9 U+05BB..U+05BD
2591     U+05BF U+05C1..U+05C2 U+05C4 U+064B..U+0652
2592     U+0670 U+06D6..U+06DC U+06DD..U+06DF
2593     U+06E0..U+06E4 U+06E7..U+06E8 U+06EA..U+06ED
2594     U+0901..U+0903 U+093C U+093E..U+094C U+094D
2595     U+0951..U+0954 U+0962..U+0963 U+0981..U+0983
2596     U+09BC U+09BE U+09BF U+09C0..U+09C4
2597     U+09C7..U+09C8 U+09CB..U+09CD U+09D7
2598     U+09E2..U+09E3 U+0A02 U+0A3C U+0A3E U+0A3F
2599     U+0A40..U+0A42 U+0A47..U+0A48 U+0A4B..U+0A4D
2600     U+0A70..U+0A71 U+0A81..U+0A83 U+0ABC
2601     U+0ABE..U+0AC5 U+0AC7..U+0AC9 U+0ACB..U+0ACD
2602     U+0B01..U+0B03 U+0B3C U+0B3E..U+0B43
2603     U+0B47..U+0B48 U+0B4B..U+0B4D U+0B56..U+0B57
2604     U+0B82..U+0B83 U+0BBE..U+0BC2 U+0BC6..U+0BC8
2605     U+0BCA..U+0BCD U+0BD7 U+0C01..U+0C03
2606     U+0C3E..U+0C44 U+0C46..U+0C48 U+0C4A..U+0C4D
2607     U+0C55..U+0C56 U+0C82..U+0C83 U+0CBE..U+0CC4
2608     U+0CC6..U+0CC8 U+0CCA..U+0CCD U+0CD5..U+0CD6
2609     U+0D02..U+0D03 U+0D3E..U+0D43 U+0D46..U+0D48
2610     U+0D4A..U+0D4D U+0D57 U+0E31 U+0E34..U+0E3A
2611     U+0E47..U+0E4E U+0EB1 U+0EB4..U+0EB9
2612     U+0EBB..U+0EBC U+0EC8..U+0ECD U+0F18..U+0F19
2613     U+0F35 U+0F37 U+0F39 U+0F3E U+0F3F
2614     U+0F71..U+0F84 U+0F86..U+0F8B U+0F90..U+0F95
2615     U+0F97 U+0F99..U+0FAD U+0FB1..U+0FB7 U+0FB9
2616     U+20D0..U+20DC U+20E1 U+302A..U+302F U+3099
2617     U+309A
2618     // Extender
2619     U+00B7 U+02D0 U+02D1 U+0387 U+0640 U+0E46
2620     U+0EC6 U+3005 U+3031..U+3035 U+309D..U+309E
2621     U+30FC..U+30FE
2622     ];
2623 wakaba 1.1
2624     $NameStartChar11 := [
2625     ':' '_'
2626     'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M'
2627     'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z'
2628     'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm'
2629     'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z'
2630     U+00C0..U+00D6 U+00D8..U+00F6 U+00F8..U+02FF
2631     U+0370..U+037D U+037F..U+1FFF U+200C..U+200D
2632     U+2070..U+218F U+2C00..U+2FEF U+3001..U+D7FF
2633     U+F900..U+FDCF U+FDF0..U+FFFD U+10000..U+EFFFF
2634     ];
2635     $NameChar11 := [
2636     '-' '.' '0' '1' '2' '3' '4' '5' '6' '7' '8' '9'
2637     U+00B7 U+0300..U+036F U+203F..U+2040
2638     // NameStartChar
2639     ':' '_'
2640     'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M'
2641     'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z'
2642     'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm'
2643     'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z'
2644     U+00C0..U+00D6 U+00D8..U+00F6 U+00F8..U+02FF
2645     U+0370..U+037D U+037F..U+1FFF U+200C..U+200D
2646     U+2070..U+218F U+2C00..U+2FEF U+3001..U+D7FF
2647     U+F900..U+FDCF U+FDF0..U+FFFD U+10000..U+EFFFF
2648     ];
2649 wakaba 1.4 } // NameChar
2650    
2651     lexmode Name
2652     : extends => 'NameChar'
2653     {
2654 wakaba 1.1 Name : value := $NameStartChar11 $NameChar11*;
2655     } // Name
2656 wakaba 1.4
2657     lexmode Nmtoken
2658     : extends => 'NameChar'
2659     {
2660     Nmtoken : value := $NameChar11*;
2661     } // Nmtoken
2662 wakaba 1.1
2663     /*
2664     Space
2665     */
2666     lexmode S {
2667     S := [U+0009 U+000A U+000D U+0020]+;
2668     } // S
2669    
2670     /*
2671     Document end scanning mode
2672     */
2673     lexmode DocumentEnd
2674     : standalone
2675     : extends => 'S'
2676     {
2677     /*
2678     Processing instruction
2679     */
2680     PIO := ['<'] ['?'];
2681    
2682     /*
2683     Comment declaration
2684     */
2685     CDO := ['<'] ['!'] ['-'] ['-'];
2686     } // DocumentEnd
2687    
2688     /*
2689     Document misc scanning mode
2690    
2691     This mode scans |Misc| constructions as well
2692     as document element's start tag.
2693     */
2694     lexmode DocumentMisc
2695     : standalone
2696     : extends => 'DocumentEnd'
2697     {
2698     /*
2699     Document element start tag
2700     */
2701     STAGO := ['<'];
2702     } // DocumentMisc
2703    
2704     /*
2705     Document prolog scanning mode
2706     */
2707     lexmode DocumentProlog
2708     : standalone
2709     : extends => 'DocumentMisc'
2710     {
2711     /*
2712     |DOCTYPE| declaration
2713     */
2714     MDO := ['<'] ['!'];
2715     } // DocumentProlog
2716    
2717     /*
2718     Document start scanning mode
2719     */
2720     lexmode DocumentStart
2721     : initial
2722     : standalone
2723     : extends => 'DocumentProlog'
2724     {
2725     /*
2726     XML declaration
2727     */
2728     XDO := ['<'] ['?'] ['x'] ['m'] ['l'];
2729     } // DocumentStart
2730    
2731     /*
2732     Comment declaration scanning mode
2733     */
2734     lexmode CommentDeclaration
2735     : standalone
2736     {
2737     /*
2738     Comment close
2739     */
2740     COM := ['-'] ['-'];
2741    
2742     /*
2743     Comment data
2744     */
2745     $string := ['-']? [^'-'];
2746     STRING : value := $string+;
2747     } // CommentDeclaration
2748    
2749     /*
2750     Processing instruction name and |S| scanning mode
2751     */
2752     lexmode PIName
2753     : standalone
2754     : extends => 'Name'
2755     : extends => 'S'
2756     {
2757     /*
2758     Processing instruction close
2759     */
2760     PIC := ['?'] ['>'];
2761     } // PIName
2762    
2763     /*
2764     Processing instruction data scanning mode
2765     */
2766     lexmode PIData
2767     : standalone
2768     {
2769     /*
2770     Processing instruction close
2771     */
2772     PIC := ['?'] ['>'];
2773    
2774     /*
2775     Processing instruction target data
2776     */
2777 wakaba 1.2 ?default-token DATA : value;
2778 wakaba 1.1 } // PIData
2779    
2780     /*
2781     Content of element scanning mode
2782     */
2783     lexmode ElementContent
2784     : standalone
2785     {
2786     /*
2787     Start tag open
2788     */
2789     STAGO := ['<'];
2790    
2791     /*
2792     End tag open
2793     */
2794     ETAGO := ['<'] ['/'];
2795    
2796     /*
2797     Hexadecimal character reference open
2798     */
2799     HCRO := ['&'] ['#'] ['x'];
2800    
2801     /*
2802     Numeric character reference open
2803     */
2804     CRO := ['&'] ['#'];
2805    
2806     /*
2807     General entity reference open
2808     */
2809     ERO := ['&'];
2810    
2811     /*
2812     Comment declaration open
2813     */
2814     CDO := ['<'] ['!'] ['-'] ['-'];
2815    
2816     /*
2817     CDATA section open
2818     */
2819     CDSO := ['<'] ['!'] ['[']
2820     ['C'] ['D'] ['A'] ['T'] ['A'] ['['];
2821    
2822     /*
2823     Processing instruction open
2824     */
2825     PIO := ['<'] ['?'];
2826 wakaba 1.2
2827     /*
2828     Markup section end
2829     */
2830     MSE := [']'] [']'] ['>'];
2831    
2832     /*
2833     Character data
2834     */
2835     /*
2836     Character data and/or |MSE|
2837     */
2838     ?default-token CharData : value;
2839 wakaba 1.1 } // ElementContent
2840    
2841     /*
2842     CDATA section content scanning mode
2843     */
2844     lexmode CDATASectionContent
2845     : standalone
2846     {
2847     /*
2848     Markup section end
2849     */
2850     MSE := [']'] [']'] ['>'];
2851    
2852     /*
2853     Character data
2854 wakaba 1.2 */
2855     ?default-token CData : value;
2856 wakaba 1.1 } // CDATASectionContent
2857    
2858     lexmode EntityReference
2859     : standalone
2860     : extends => 'Name'
2861     {
2862     /*
2863     Reference close
2864     */
2865     REFC := [';'];
2866     } // EntityReference
2867    
2868     lexmode NumericCharacterReference
2869     : standalone
2870     {
2871     /*
2872     Decimal number
2873     */
2874     $digit := ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9'];
2875     NUMBER : value := $digit+;
2876    
2877     /*
2878     Reference close
2879     */
2880     REFC := [';'];
2881     } // NumericCharacterReference
2882    
2883     lexmode HexadecimalCharacterReference
2884     : standalone
2885     {
2886     /*
2887     Hexadecimal number
2888     */
2889     $hexdigit := ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9'
2890     'A' 'B' 'C' 'D' 'E' 'F'
2891     'a' 'b' 'c' 'd' 'e' 'f'];
2892     Hex : value := $hexdigit+;
2893    
2894     /*
2895     Reference close
2896     */
2897     REFC := [';'];
2898     } // HexadecimalCharacterReference
2899    
2900 wakaba 1.3 lexmode XMLDeclaration
2901     : standalone
2902     : extends => 'Name'
2903     : extends => 'S'
2904     {
2905    
2906     /*
2907     Value indicator
2908     */
2909     VI := ['='];
2910    
2911     /*
2912     Literal open
2913     */
2914     LIT := ['"'];
2915     LITA := [U+0027];
2916    
2917     /*
2918     Processing instruction close
2919     */
2920     PIC := ['?'] ['>'];
2921     } // XMLDeclaration
2922    
2923 wakaba 1.1 lexmode StartTag
2924     : standalone
2925     : extends => 'Name'
2926     : extends => 'S'
2927     {
2928    
2929     /*
2930     Value indicator
2931     */
2932     VI := ['='];
2933    
2934     /*
2935     Literal open
2936     */
2937     LIT := ['"'];
2938     LITA := [U+0027];
2939    
2940     /*
2941     Tag close
2942     */
2943     TAGC := ['>'];
2944    
2945     /*
2946 wakaba 1.6 Null end-tag enabled start-tag close
2947 wakaba 1.1 */
2948 wakaba 1.6 NESTC := ['/'];
2949 wakaba 1.1 } // StartTag
2950    
2951     lexmode EndTag
2952     : standalone
2953     : extends => 'Name'
2954     : extends => 'S'
2955     {
2956     /*
2957     Tag close
2958     */
2959     TAGC := ['>'];
2960     } // EndTag
2961    
2962     lexmode AttributeValueLiteral_ {
2963     ERO := ['&'];
2964     CRO := ['&'] ['#'];
2965     HCRO := ['&'] ['#'] ['x'];
2966     } // AttributeValueLiteral_
2967    
2968     lexmode AttributeValueLiteral
2969     : standalone
2970     : extends => 'AttributeValueLiteral_'
2971     {
2972     LIT := ['"'];
2973 wakaba 1.3 STRING : value := [^'"' '&' '<']+;
2974 wakaba 1.1 } // AttributeValueLiteral
2975    
2976     lexmode AttributeValueLiteralA
2977     : standalone
2978     : extends => 'AttributeValueLiteral_'
2979     {
2980 wakaba 1.4 LITA := [U+0027];
2981 wakaba 1.3 STRING : value := [^U+0027 '&' '<']+;
2982 wakaba 1.1 } // AttributeValueLiteralA
2983 wakaba 1.4
2984     lexmode EntityValue_ {
2985     PERO := ['%'];
2986     ERO := ['&'];
2987     CRO := ['&'] ['#'];
2988     HCRO := ['&'] ['#'] ['x'];
2989     } // EntityValue_
2990    
2991     lexmode EntityValue
2992     : standalone
2993     : extends => 'EntityValue_'
2994     {
2995     LIT := ['"'];
2996     STRING : value := [^'"' '&' '%']+;
2997     } // EntityValue
2998    
2999     lexmode EntityValueA
3000     : standalone
3001     : extends => 'EntityValue_'
3002     {
3003     LITA := [U+0027];
3004     STRING : value := [^U+0027 '&' '%']+;
3005     } // EntityValueA
3006    
3007     lexmode SystemLiteral
3008     : standalone
3009     {
3010     LIT := ['"'];
3011     ?default-token STRING : value;
3012     } // SystemLiteral
3013    
3014     lexmode SystemLiteralA
3015     : standalone
3016     {
3017     LITA := [U+0027];
3018     ?default-token STRING : value;
3019     } // SystemLiteralA
3020    
3021     lexmode DTD
3022     : standalone
3023     : extends => 'S'
3024     {
3025     /*
3026     Markup declaration open
3027     */
3028     MDO := ['<'] ['!'];
3029    
3030     /*
3031     Comment declaration open
3032     */
3033     CDO := ['<'] ['!'] ['-'] ['-'];
3034    
3035     /*
3036     Processing instruction open
3037     */
3038     PIO := ['<'] ['?'];
3039    
3040     /*
3041     Parameter entity reference open
3042     */
3043     PERO := ['%'];
3044    
3045     /*
3046     Declaration subset close
3047     */
3048     DSC := [']'];
3049    
3050     /*
3051     Contition section open
3052     */
3053     CSO := ['<'] ['!'] ['['];
3054     } // DTD
3055    
3056     /*
3057     Markup declaration scanning mode
3058    
3059     This mode is used to recognize |MDC| that terminates
3060     a comment declaration as well as the base |lexmode|
3061     for e.g. document type declaration scanning mode.
3062     */
3063     lexmode MarkupDeclaration
3064     : standalone
3065     : extends => 'Name'
3066     : extends => 'S'
3067     {
3068     /*
3069     Markup declaration close
3070     */
3071     MDC := ['>'];
3072    
3073     /*
3074     Literal open
3075     */
3076     LIT := ['"'];
3077    
3078     /*
3079     Alternative literal open
3080     */
3081     LITA := [U+0027];
3082    
3083     /*
3084     Parameter entity reference open
3085     */
3086     PERO := ['%'];
3087    
3088     /*
3089     Declaration subset open
3090     */
3091     DSO := ['['];
3092     } // MarkupDeclaration
3093    
3094     lexmode ElementDeclaration
3095     : standalone
3096     : extends => 'MarkupDeclaration'
3097     {
3098     /*
3099     Model group open
3100     */
3101     MGO := ['('];
3102    
3103     /*
3104     Model group close
3105     */
3106     MGC := [')'];
3107    
3108     /*
3109     Reserved name indicator
3110     */
3111     RNI := ['#'];
3112    
3113     /*
3114     Occurence indicators
3115     */
3116     OPT := ['?'];
3117     REP := ['*'];
3118     PLUS := ['+'];
3119    
3120     /*
3121     Connectors
3122     */
3123     OR := ['|'];
3124     SEQ := [','];
3125     } // ElementDeclaration
3126    
3127     lexmode AttlistDeclaration
3128     : standalone
3129     : extends => 'MarkupDeclaration'
3130     {
3131     /*
3132     Enumeration group open
3133     */
3134     EGO := ['('];
3135    
3136     /*
3137     Enumeration group close
3138     */
3139     EGC := [')'];
3140    
3141     /*
3142     Enumeration choice delimiter
3143     */
3144     OR := ['|'];
3145    
3146     /*
3147     Reserved name indicator
3148     */
3149     RNI := ['#'];
3150     } // AttlistDeclaration
3151    
3152     lexmode Enumeration
3153     : standalone
3154     : extends => 'Nmtoken'
3155     : extends => 'S'
3156     {
3157     /*
3158     Enumeration group close
3159     */
3160     EGC := [')'];
3161    
3162     /*
3163     Enumeration choice delimiter
3164     */
3165     OR := ['|'];
3166     } // Enumeration
3167    
3168 wakaba 1.1
3169     token-error default : default {
3170     lang:Perl {
3171 wakaba 1.3 my $location;
3172     __CODE{xp|get-location-from-token::
3173     $token => {$token},
3174     $result => {$location},
3175     }__;
3176     my $continue = __DOMCore:ERROR{xp|wf-syntax-error::
3177 wakaba 1.1 xp|error-token => {$token},
3178     DOMCore|location => {$location},
3179     }__;
3180     unless ($continue) {
3181     __EXCEPTION{DOMLS|PARSE_ERR::
3182     }__;
3183     }
3184     $self->{has_error} = true;
3185     }
3186     } // default
3187 wakaba 1.5
3188     @XMLTests:
3189     @@XMLTest:
3190 wakaba 1.6 @@@QName: xp.only.docel.test
3191 wakaba 1.5 @@@DEnt:
3192     @@@@test:value:
3193     <p></p>
3194     @@@test:domTree:
3195     document {
3196     xml-version: '1.0';
3197 wakaba 1.6 xml-encoding: null;
3198     xml-standalone: false;
3199     element {
3200     namespace-uri: null;
3201     prefix: null;
3202     local-name: 'p';
3203     text-content: '';
3204     }
3205     }
3206     @@XMLTest:
3207     @@@QName: xp.only.docel.emptyelemtag.test
3208     @@@DEnt:
3209     @@@@test:value:
3210     <p/>
3211     @@@test:domTree:
3212     document {
3213     xml-version: '1.0';
3214     xml-encoding: null;
3215     xml-standalone: false;
3216 wakaba 1.5 element {
3217     namespace-uri: null;
3218     prefix: null;
3219     local-name: 'p';
3220     text-content: '';
3221     }
3222     }
3223 wakaba 1.6
3224     @@XMLTest:
3225     @@@QName: xp.doctype.empty.test
3226     @@@DEnt:
3227     @@@@test:value:
3228     <!DOCTYPE a>
3229     <a></a>
3230     @@@test:domTree:
3231     document {
3232     xml-version: '1.0';
3233     xml-encoding: null;
3234     xml-standalone: false;
3235     document-type {
3236     node-name: 'a';
3237     }
3238     element {
3239     namespace-uri: null;
3240     prefix: null;
3241     local-name: 'a';
3242     text-content: '';
3243     }
3244     }
3245    
3246     @@XMLTest:
3247     @@@QName: xp.doctype.intsubset.empty.test
3248     @@@DEnt:
3249     @@@@test:value:
3250     <!DOCTYPE a [
3251    
3252     ]>
3253     <a></a>
3254     @@@test:domTree:
3255     document {
3256     xml-version: '1.0';
3257     xml-encoding: null;
3258     xml-standalone: false;
3259     document-type {
3260     node-name: 'a';
3261     }
3262     element {
3263     namespace-uri: null;
3264     prefix: null;
3265     local-name: 'a';
3266     text-content: '';
3267     }
3268     }
3269    
3270     @@XMLTest:
3271     @@@QName: xp.doctype.intsubset.pi.test
3272     @@@DEnt:
3273     @@@@test:value:
3274     <!DOCTYPE a [
3275     <?pi data ?>
3276     ]>
3277     <a></a>
3278     @@@test:domTree:
3279     document {
3280     xml-version: '1.0';
3281     xml-encoding: null;
3282     xml-standalone: false;
3283     document-type {
3284     node-name: 'a';
3285     pi {
3286     target: 'pi';
3287     data: 'data ';
3288     }
3289     }
3290     element {
3291     namespace-uri: null;
3292     prefix: null;
3293     local-name: 'a';
3294     text-content: '';
3295     }
3296     }
3297     @@@enImplNote:
3298     A DOM PI node in doctype node is a manakai extension.
3299    
3300    
3301     @@XMLTest:
3302     @@@QName: xp.doctype.intsubset.entity.general.internal.test
3303     @@@DEnt:
3304     @@@@test:value:
3305     <!DOCTYPE a [
3306     <!ENTITY entity "entity value">
3307     ]>
3308     <a></a>
3309     @@@test:domTree:
3310     document {
3311     xml-version: '1.0';
3312     xml-encoding: null;
3313     xml-standalone: false;
3314     document-type {
3315     node-name: 'a';
3316     }
3317     element {
3318     namespace-uri: null;
3319     prefix: null;
3320     local-name: 'a';
3321     text-content: '';
3322     }
3323     }
3324     @@XMLTest:
3325     @@@QName: xp.doctype.intsubset.entity.parameter.internal.test
3326     @@@DEnt:
3327     @@@@test:value:
3328     <!DOCTYPE a [
3329     <!ENTITY % entity "entity value">
3330     ]>
3331     <a></a>
3332     @@@test:domTree:
3333     document {
3334     xml-version: '1.0';
3335     xml-encoding: null;
3336     xml-standalone: false;
3337     document-type {
3338     node-name: 'a';
3339     }
3340     element {
3341     namespace-uri: null;
3342     prefix: null;
3343     local-name: 'a';
3344     text-content: '';
3345     }
3346     }
3347    
3348     @@XMLTest:
3349     @@@QName: xp.doctype.internal.entity.root.element.text.only.test
3350     @@@DEnt:
3351     @@@@test:value:
3352     <!DOCTYPE a [
3353     <!ENTITY entity "entity value">
3354     ]>
3355     <a>&entity;</a>
3356     @@@test:domTree:
3357     document {
3358     xml-version: '1.0';
3359     xml-encoding: null;
3360     xml-standalone: false;
3361     document-type {
3362     node-name: 'a';
3363     }
3364     element {
3365     namespace-uri: null;
3366     prefix: null;
3367     local-name: 'a';
3368     general-entity-reference {
3369     node-name: 'entity';
3370     text {
3371     data: 'entity value';
3372     }
3373     }
3374     }
3375     }
3376     @@XMLTest:
3377     @@@QName: xp.doctype.internal.entity.root.element.text.mult.test
3378     @@@DEnt:
3379     @@@@test:value:
3380     <!DOCTYPE a [
3381     <!ENTITY entity "entity value">
3382     ]>
3383     <a>&entity; and &entity;</a>
3384     @@@test:domTree:
3385     document {
3386     xml-version: '1.0';
3387     xml-encoding: null;
3388     xml-standalone: false;
3389     document-type {
3390     node-name: 'a';
3391     }
3392     element {
3393     namespace-uri: null;
3394     prefix: null;
3395     local-name: 'a';
3396     general-entity-reference {
3397     node-name: 'entity';
3398     text {
3399     data: 'entity value';
3400     }
3401     }
3402     text { data: ' and '; }
3403     general-entity-reference {
3404     node-name: 'entity';
3405     text {
3406     data: 'entity value';
3407     }
3408     }
3409     }
3410     }
3411     @@XMLTest:
3412     @@@QName: xp.doctype.internal.entity.root.element.text.element.test
3413     @@@DEnt:
3414     @@@@test:value:
3415     <!DOCTYPE a [
3416     <!ENTITY entity "entity <p>value</p> with <e>element</e> ">
3417     ]>
3418     <a>&entity;</a>
3419     @@@test:domTree:
3420     document {
3421     xml-version: '1.0';
3422     xml-encoding: null;
3423     xml-standalone: false;
3424     document-type {
3425     node-name: 'a';
3426     }
3427     element {
3428     namespace-uri: null;
3429     prefix: null;
3430     local-name: 'a';
3431     general-entity-reference {
3432     node-name: 'entity';
3433     text {
3434     data: 'entity ';
3435     }
3436     element {
3437     namespace-uri: null;
3438     prefix: null;
3439     local-name: 'p';
3440     text {
3441     data: 'value';
3442     }
3443     }
3444     text {
3445     data: ' with ';
3446     }
3447     element {
3448     namespace-uri: null;
3449     prefix: null;
3450     local-name: 'e';
3451     text {
3452     data: 'element';
3453     }
3454     }
3455     text {
3456     data: ' ';
3457     }
3458     }
3459     }
3460     }
3461     @@XMLTest:
3462     @@@QName: xp.doctype.internal.entity.root.element.text.in.ent.test
3463     @@@DEnt:
3464     @@@@test:value:
3465     <!DOCTYPE a [
3466     <!ENTITY entity1 "entity value">
3467     <!ENTITY entity2 "e&entity1;n">
3468     ]>
3469     <a>&entity2;</a>
3470     @@@test:domTree:
3471     document {
3472     xml-version: '1.0';
3473     xml-encoding: null;
3474     xml-standalone: false;
3475     document-type {
3476     node-name: 'a';
3477     }
3478     element {
3479     namespace-uri: null;
3480     prefix: null;
3481     local-name: 'a';
3482     general-entity-reference {
3483     node-name: 'entity2';
3484     text { data: 'e'; }
3485     general-entity-reference {
3486     node-name: 'entity1';
3487     text {
3488     data: 'entity value';
3489     }
3490     }
3491     text { data: 'n'; }
3492     }
3493     }
3494     }
3495    
3496 wakaba 1.5 @@PerlDef:
3497     my $impl = $Message::DOM::ImplementationRegistry->get_implementation ({
3498     'Core' => '3.0',
3499     'XML' => '3.0',
3500     'XMLVersion' => ['1.0', '1.1'],
3501     });
3502     my $parser = <Class::ManakaiXMLParser>->new ($impl);
3503    
3504     for my $test_data (@$TestData) {
3505     $test->start_new_test ($test_data->{uri});
3506     my $doc_ent = $test_data->{entity}->{$test_data->{root_uri}};
3507     my $doc = $parser->parse_string ($doc_ent->{<H::test:value>});
3508     try {
3509     $test->assert_dom_tree_equals
3510     (actual_value => $doc,
3511     expected_hash => $test_data->{dom_tree});
3512     $test->ok;
3513     } catch Message::Util::IF::DTException with { };
3514     }
3515 wakaba 1.6
3516     @ResourceDef:
3517     @@QName: getCopyOfEntityState
3518     @@rdf:type: DISPerl|InlineCode
3519     @@ForCheck: ManakaiDOM|ForClass
3520     @@PerlDef:
3521     {%{$self->{$entity_type}->{$entity_name}},
3522     line => 1, column => 1, pos => 0}
3523 wakaba 1.1 ##ManakaiXMLParser
3524 wakaba 1.5
3525     ElementTypeBinding:
3526     @Name: XMLTests
3527     @ElementType:
3528     dis:ResourceDef
3529     @ShadowContent:
3530     @@ForCheck: ManakaiDOM|ForClass
3531     @@rdf:type: test|ParserTestSet
3532    
3533     ElementTypeBinding:
3534     @Name: XMLTest
3535     @ElementType:
3536     dis:ResourceDef
3537     @ShadowContent:
3538     @@ForCheck: ManakaiDOM|ForClass
3539     @@rdf:type: test|ParserTest
3540    
3541     ElementTypeBinding:
3542     @Name: DEnt
3543     @ElementType:
3544     dis:ResourceDef
3545     @ShadowContent:
3546     @@rdf:type: test|RootEntity
3547    
3548     ElementTypeBinding:
3549     @Name: Ent
3550     @ElementType:
3551     dis:ResourceDef
3552     @ShadowContent:
3553     @@rdf:type: test|Entity
3554 wakaba 1.1
3555 wakaba 1.3 ResourceDef:
3556     @QName: xp|get-location-from-token
3557     @rdf:type: DISPerl|BlockCode
3558     @enDesc:
3559     Creates a <IF::DOMCore:DOMLocator> object from a token.
3560     @PerlDef:
3561     $result = {
3562 wakaba 1.6 utf32_offset => $self->{entity}->[-1]->{pos},
3563 wakaba 1.3 };
3564     @For: ManakaiDOM|ManakaiDOM3
3565 wakaba 1.1
3566     ElementTypeBinding:
3567     @Name: RuleDef
3568     @ElementType:
3569     dis:ResourceDef
3570     @ShadowContent:
3571     @@ForCheck: ManakaiDOM|ForClass
3572     @@rdf:type: Muf2003|RuleDefClass
3573    
3574     ElementTypeBinding:
3575     @Name: RuleParam
3576     @ElementType:
3577     dis:ResourceDef
3578     @ShadowContent:
3579     @@rdf:type: Muf2003|RuleParameter
3580    
3581     ElementTypeBinding:
3582     @Name: enImplNote
3583     @ElementType:
3584     dis:ImplNote
3585     @ShadowContent:
3586     @@lang:en
3587    
3588     ElementTypeBinding:
3589     @Name: ErrDef
3590     @ElementType:
3591     dis:ResourceDef
3592     @ShadowContent:
3593     @@rdf:type: DOMCore|DOMErrorType
3594     @@For: ManakaiDOM|DOM3
3595     @@ecore:textFormatter: ManakaiXMLParserExceptionFormatter
3596    
3597 wakaba 1.3 ElementTypeBinding:
3598     @Name: WFErrDef
3599     @ElementType:
3600     dis:ResourceDef
3601     @ShadowContent:
3602     @@rdf:type: DOMCore|DOMErrorType
3603     @@For: ManakaiDOM|DOM3
3604     @@ecore:textFormatter: ManakaiXMLParserExceptionFormatter
3605    
3606     WFErrDef:
3607     @QName: xp|wf-syntax-error
3608     @enDesc:
3609     The entity does not match to the production rule; it is not
3610     well-formed.
3611     @DOMCore:severity: DOMCore|SEVERITY_FATAL_ERROR
3612     @enMufDef:
3613     |%xp-error-token-type;|%xp-error-token-value
3614     (prefix => { (|}, suffix => {|)}); is not
3615     allowed%xp-error-lines (prefix => { (|}, suffix => {|)});
3616     @ecore:hasParameter:
3617     @@@: xp|error-token
3618     @@enDesc:
3619     The token that is not allowed.
3620    
3621     WFErrDef:
3622     @QName: xp|wf-pi-target-is-xml
3623     @enDesc:
3624     A processing instruction has its <CODE::PITarget> of
3625     <XML::xml> (in any case) which is not allowed.
3626     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3627     @enMufDef:
3628     Processing instruction target name cannot be |%p
3629     (name => {<Q::xp|name>});|
3630     @ecore:hasParameter:
3631     @@@: xp|error-token
3632     @@enDesc:
3633     The token that contains the name.
3634     @ecore:hasParameter:
3635     @@@: xp|name
3636     @@enDesc:
3637     A string that is specified as target name of the
3638     processing instruction.
3639     @ecore:hasParameter: xp|parent
3640    
3641     WFErrDef:
3642     @QName: xp|wf-no-end-tag
3643     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3644     @enDesc:
3645     An end-tag is not found.
3646     @enMufDef:
3647     End-tag |</%p (name => {<Q::xp|expected-element-type>});>| is required
3648     @ecore:hasParameter: xp|error-token
3649     @ecore:hasParameter:
3650     @@@: xp|node
3651     @@enDesc:
3652     The element node that is not closed.
3653     @ecore:hasParameter:
3654     @@@: xp|expected-element-type
3655     @@enDesc:
3656     The element type name of the element that is not closed.
3657    
3658     WFErrDef:
3659     @QName: xp|wf-unsupported-xml-version
3660     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3661     @enDesc:
3662     The XML version specified in the version declaration is not supported.
3663     @enMufDef:
3664     XML version |%p (name => {<Q::infoset|version>});| is not supported
3665     @ecore:hasParameter: xp|bad-token
3666     @ecore:hasParameter:
3667     @@@: xp|parent
3668     @@enDesc:
3669     The document node.
3670     @ecore:hasParameter:
3671     @@@: infoset|version
3672     @@enDesc:
3673     The specified XML version.
3674    
3675     WFErrDef:
3676     @QName: xp|wf-malformed-enc-name
3677     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3678     @enDesc:
3679     An <XA::encoding> pseudo-attribute value does not match
3680     to the procduction rule <CODE::EncName>.
3681     @enMufDef:
3682     Encoding name |%p (name => {<Q::xp|name>});| is not allowed
3683     @ecore:hasParameter: xp|error-token
3684     @ecore:hasParameter:
3685     @@@: xp|parent
3686     @@enDesc: The document node.
3687     @ecore:hasParameter:
3688     @@@: xp|name
3689     @@enDesc:
3690     The <XA::encoding> value.
3691    
3692     WFErrDef:
3693     @QName: xp|wf-malformed-xml-standalone
3694     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3695     @enDesc:
3696     An <XA::standalone> pseudo-attribute value is neither <XML::yes>
3697     or <XML::no>.
3698     @enMufDef:
3699     |standalone| pseudo-attribute value |%p (name => {<Q::xp|name>});|
3700     is not allowed
3701     @ecore:hasParameter: xp|error-token
3702     @ecore:hasParameter:
3703     @@@: xp|parent
3704     @@enDesc: The document node.
3705     @ecore:hasParameter:
3706     @@@: xp|name
3707     @@enDesc:
3708     The <XA::standalone> value.
3709    
3710     WFErrDef:
3711     @QName: xp|wf-legal-literal-character
3712     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3713     @enDesc:
3714     Each character in XML entity must match to the production
3715     rule <CODE::Char - RestrictedChar>.
3716     @enMufDef:
3717     Character %character-code-point
3718     (v => {<Q::xp|character-number>}); is not allowed
3719     @ecore:hasParameter:
3720     @@@: xp|character-number
3721     @@enDesc:
3722     The code position of the character being referred.
3723    
3724     WFErrDef:
3725     @QName: xp|wf-element-type-match
3726     @DOMCore:severity: DOMCore|SEVERITY_FATAL_ERROR
3727 wakaba 1.1 @enDesc:
3728 wakaba 1.3 The <CODE::Name> in an element's end-tag must match the element type
3729     in the start-tag.
3730     @enMufDef:
3731     End-tag |</%p (name => {<Q::xp|actual-element-type>});>| does
3732     not match to start-tag |<%p (name => {<Q::xp|expected-element-type>});>|
3733     @ecore:hasParameter: xp|error-token
3734     @ecore:hasParameter:
3735     @@@: xp|node
3736     @@enDesc:
3737     The current opening element node.
3738     @ecore:hasParameter:
3739     @@@: xp|expected-element-type
3740     @@enDesc:
3741     The element type name of the current element.
3742     @ecore:hasParameter:
3743     @@@: xp|actual-element-type
3744     @@enDesc:
3745     The <CODE::Name> occurs in the end-tag.
3746    
3747     WFErrDef:
3748     @QName: xp|wf-unique-att-spec
3749 wakaba 1.1 @DOMCore:severity: DOMCore|SEVERITY_ERROR
3750 wakaba 1.3 @enDesc:
3751     An attribute name <kwd:MUST-NOT> appear more than once in
3752     the same start-tag or empty-element tag.
3753 wakaba 1.1 @enMufDef:
3754 wakaba 1.3 Attribute |%p (name => {<Q::xp|name>});| is specified more
3755     than once in the same tag
3756     @ecore:hasParameter: xp|error-token
3757     @ecore:hasParameter:
3758     @@@: xp|name
3759     @@enDesc:
3760     The name of the attribute.
3761    
3762     WFErrDef:
3763     @QName: xp|wf-legal-character
3764     @DOMCore:severity: DOMCore|SEVERITY_ERROR
3765     @enDesc:
3766     Characters referred to using character references <kwd:MUST>
3767     match the production for <CODE::Char>.
3768     @enMufDef:
3769     Reference to character %character-code-point
3770     (v => {<Q::xp|character-number>}); is not allowed
3771     @ecore:hasParameter: xp|error-token
3772     @ecore:hasParameter:
3773     @@@: xp|character-number
3774     @@enDesc:
3775     The code position of the character being referred.
3776     @ecore:hasParameter:
3777     @@@: xp|parent
3778     @@enDesc:
3779     The parent node in which the character reference has
3780     occurred, if available.
3781 wakaba 1.1
3782 wakaba 1.3 XWParam:
3783 wakaba 1.1 @QName: xp|error-token
3784     @enDesc:
3785     The token where the parser found an error.
3786    
3787 wakaba 1.3 XWParam:
3788     @QName: xp|name
3789     @enDesc:
3790     A name.
3791    
3792     XWParam:
3793     @QName: xp|parent
3794     @enDesc:
3795     The parent node in which the error occurs.
3796    
3797     XWParam:
3798     @QName: xp|node
3799     @enDesc:
3800     The current node.
3801    
3802     XWParam:
3803     @QName: xp|actual-element-type
3804     @enDesc:
3805     The actual element type name occured in the source.
3806    
3807     XWParam:
3808     @QName: xp|expected-element-type
3809 wakaba 1.1 @enDesc:
3810 wakaba 1.3 The element type name expected.
3811    
3812     XWParam:
3813     @QName: xp|character-number
3814     @enDesc:
3815     The character code position.
3816    
3817     ElementTypeBinding:
3818     @Name: XWParam
3819     @ElementType:
3820     dis:ResourceDef
3821     @ShadowContent:
3822     @@For: =ManakaiDOM|all
3823     @@rdf:type: ecore|Parameter
3824 wakaba 1.1
3825     ElementTypeBinding:
3826     @Name:enMufDef
3827     @ElementType:
3828     ecore:defaultMessage
3829     @ShadowContent:
3830     @@lang:en
3831     @@ContentType:
3832     lang:muf
3833    
3834     ResourceDef:
3835     @QName: DOMImpl
3836     @AliasFor: DOMCore|DOMImplementation
3837     @For: ManakaiDOM|DOM
3838    
3839     ElementTypeBinding:
3840     @Name: Attr
3841     @ElementType:
3842     dis:ResourceDef
3843     @ShadowContent:
3844     @@rdf:type: DISLang|Attribute
3845     @@ForCheck: !=ManakaiDOM|ManakaiDOM
3846    
3847     ElementTypeBinding:
3848     @Name: Get
3849     @ElementType:
3850     dis:ResourceDef
3851     @ShadowContent:
3852     @@rdf:type: DISLang|AttributeGet
3853    
3854     ElementTypeBinding:
3855     @Name: Set
3856     @ElementType:
3857     dis:ResourceDef
3858     @ShadowContent:
3859     @@rdf:type: DISLang|AttributeSet
3860    
3861     ElementTypeBinding:
3862     @Name: enDesc
3863     @ElementType:
3864     dis:Description
3865     @ShadowContent:
3866     @@lang:en
3867    
3868     ElementTypeBinding:
3869     @Name: Method
3870     @ElementType:
3871     dis:ResourceDef
3872     @ShadowContent:
3873     @@rdf:type: DISLang|Method
3874     @@For: !=ManakaiDOM|ManakaiDOM
3875    
3876     ElementTypeBinding:
3877     @Name: Return
3878     @ElementType:
3879     dis:ResourceDef
3880     @ShadowContent:
3881     @@rdf:type: DISLang|MethodReturn
3882    
3883     ElementTypeBinding:
3884     @Name: Param
3885     @ElementType:
3886     dis:ResourceDef
3887     @ShadowContent:
3888     @@rdf:type: DISLang|MethodParameter
3889    
3890     ElementTypeBinding:
3891     @Name: PerlDef
3892     @ElementType:
3893     dis:Def
3894     @ShadowContent:
3895     @@ContentType: lang|Perl
3896    
3897     ElementTypeBinding:
3898     @Name: PropDef
3899     @ElementType:
3900     dis:ResourceDef
3901     @ShadowContent:
3902     @@rdf:type: rdf|Property
3903    
3904     ClsDef:
3905     @ClsQName: ManakaiXMLParserExceptionFormatter
3906    
3907     @ClsISA: ecore|MUErrorFormatter||ManakaiDOM|Perl
3908    
3909     @RuleDef:
3910     @@Name: xp-error-token-type
3911     @@enDesc:
3912     The type of the token the parser is encountered.
3913    
3914     @@Method:
3915     @@@Name: after
3916     @@@Param:
3917     @@@@Name: name
3918     @@@@Type: DOMString
3919     @@@@enDesc: The name of the method.
3920     @@@Param:
3921     @@@@Name: p
3922     @@@@Type: DISPerl|HASH
3923     @@@@enDesc: The set of the parameters to the method.
3924     @@@Param:
3925     @@@@Name: o
3926     @@@@Type: DISPerl|HASH
3927     @@@@enDesc: The option value.
3928     @@@Return:
3929     @@@@PerlDef:
3930     $p->{-result} = $o->{<H::xp|error-token>}->{type}
3931     if defined $o->{<H::xp|error-token>}->{type};
3932    
3933     @RuleDef:
3934     @@Name: xp-error-token-value
3935     @@enDesc:
3936     The value of the token the parser is encountered, if any.
3937    
3938     @@Method:
3939     @@@Name: after
3940     @@@Param:
3941     @@@@Name: name
3942     @@@@Type: DOMString
3943     @@@@enDesc: The name of the method.
3944     @@@Param:
3945     @@@@Name: p
3946     @@@@Type: DISPerl|HASH
3947     @@@@enDesc: The set of the parameters to the method.
3948     @@@Param:
3949     @@@@Name: o
3950     @@@@Type: DISPerl|HASH
3951     @@@@enDesc: The option value.
3952     @@@Return:
3953     @@@@PerlDef:
3954     $p->{-result} = $o->{<H::xp|error-token>}->{value}
3955     if defined $o->{<H::xp|error-token>}->{value};
3956    
3957     @RuleDef:
3958     @@Name: xp-error-lines
3959     @@enDesc:
3960     A copy of fragment of the source text that contains the line
3961     where the error occurred, if available.
3962    
3963     @@Method:
3964     @@@Name: after
3965     @@@Param:
3966     @@@@Name: name
3967     @@@@Type: DOMString
3968     @@@@enDesc: The name of the method.
3969     @@@Param:
3970     @@@@Name: p
3971     @@@@Type: DISPerl|HASH
3972     @@@@enDesc: The set of the parameters to the method.
3973     @@@Param:
3974     @@@@Name: o
3975     @@@@Type: DISPerl|HASH
3976     @@@@enDesc: The option value.
3977     @@@Return:
3978     @@@@PerlDef:
3979     my $pos = $o-><AG::DOMCore|DOMError.location>
3980     -><AG::DOMCore|DOMLocator.utf32Offset>;
3981     if ($pos > -1) {
3982 wakaba 1.6 my $src = $o->{<H::ecore|object>}->{entity}->[-1]->{reptxt};
3983 wakaba 1.1 my $start = $pos;
3984     $start = rindex ($$src, "\x0A", $start - 1) for 0..2;
3985     $start++;
3986     my $end = $pos;
3987     $end = index ($$src, "\x0A", $end + 1) for 0..2;
3988     $end = length $$src if $end < 0;
3989     $p->{-result} = substr $$src, $start, $end - $start;
3990     }
3991 wakaba 1.3
3992     @RuleDef:
3993     @@Name: character-code-point
3994     @@enDesc:
3995     The character code position, in <CODE::U+<VAR::HHHH>> notation.
3996    
3997     @@Method:
3998     @@@Name: after
3999     @@@Param:
4000     @@@@Name: name
4001     @@@@Type: DOMString
4002     @@@@enDesc: The name of the method.
4003     @@@Param:
4004     @@@@Name: p
4005     @@@@Type: DISPerl|HASH
4006     @@@@enDesc: The set of the parameters to the method.
4007     @@@Param:
4008     @@@@Name: o
4009     @@@@Type: DISPerl|HASH
4010     @@@@enDesc: The option value.
4011     @@@RuleParam:
4012     @@@@Name: v
4013     @@@@Type: DISPerl|Number
4014     @@@@enDesc:
4015     The name of the error parameter that contains the character code.
4016     @@@Return:
4017     @@@@PerlDef:
4018     $p->{-result} = sprintf 'U+%04X', $o->{$p->{v}};
4019 wakaba 1.1 ##XMLParserExceptionFormatter

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24