/[suikacvs]/messaging/manakai/lib/Message/DOM/XMLParser.dis
Suika

Contents of /messaging/manakai/lib/Message/DOM/XMLParser.dis

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Thu Dec 29 10:21:42 2005 UTC (18 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +757 -64 lines
++ manakai/lib/Message/Util/DIS/ChangeLog	29 Dec 2005 06:12:53 -0000
2005-12-29  Wakaba  <wakaba@suika.fam.cx>

	* DPG.dis (bad-token-error): Severity changed
	to |SEVERITY_FATAL_ERROR|.

++ manakai/lib/Message/DOM/ChangeLog	29 Dec 2005 06:12:17 -0000
2005-12-29  Wakaba  <wakaba@suika.fam.cx>

	* XMLParser.dis (shiftChar): Checks characters are legal
	or not.  Normalize end-of-lines.
	(rule _XMLDeclaration_): Implemented.
	(WFErrDef): Well-formedness error |wf-syntax-error|,
	|wf-pi-target-is-xml|, |wf-no-end-tag|,
	|wf-unsupported-xml-version|, |wf-malformed-enc-name|,
	|wf-malformed-xml-standalone|, |wf-legal-literal-character|,
	|wf-element-type-match|, |wf-unique-att-spec|,
	|wf-legal-character| added.
	(%character-code-point): New formatter rule.

	* Tree.dis (Document.xmlEncoding): It is now read-write attribute.

	* DOMCore.dis (DOMError.stringify): Added.
	(error-handler.default): Returns |false| (don't continue)
	when the error severity is |SEVERITY_FATAL_ERROR|.

1 wakaba 1.1 Module:
2     @QName: MDOM|XMLParser
3     @Namespace:
4     http://suika.fam.cx/~wakaba/archive/2004/dom/xml-parser#
5    
6     @FullName:
7     @@lang:en
8     @@@: XML Parser
9    
10     @DISCore:author: DISCore|Wakaba
11     @License: license|Perl+MPL
12     @Date:
13 wakaba 1.3 $Date: 2005/12/28 11:10:56 $
14 wakaba 1.1
15     @DefaultFor: ManakaiDOM|ManakaiDOMLatest
16    
17     @Require:
18     @@Module:
19     @@@QName: MDOM|DOMLS
20     @@@WithFor: ManakaiDOM|ManakaiDOMLatest
21    
22     Namespace:
23     @dis:
24     http://suika.fam.cx/~wakaba/archive/2004/8/18/lang#dis--
25     @DOMCore:
26     http://suika.fam.cx/~wakaba/archive/2004/8/18/dom-core#
27     @DOMMain:
28     http://suika.fam.cx/~wakaba/archive/2004/dom/main#
29     @dx:
30     http://suika.fam.cx/~wakaba/archive/2005/manakai/Util/Error/DOMException#
31     @ecore:
32     http://suika.fam.cx/~wakaba/archive/2005/manakai/Util/Error/Core/
33     @f:
34     http://suika.fam.cx/~wakaba/archive/2004/dom/feature#
35     @idl:
36     http://suika.fam.cx/~wakaba/archive/2004/dis/IDL#
37     @infoset:
38     http://www.w3.org/2001/04/infoset#
39     @lang:
40     http://suika.fam.cx/~wakaba/archive/2004/8/18/lang#
41     @license:
42     http://suika.fam.cx/~wakaba/archive/2004/8/18/license#
43     @LSEV:
44     http://www.w3.org/2002/DOMLS
45     @ManakaiDOM:
46     http://suika.fam.cx/~wakaba/archive/2004/8/18/manakai-dom#
47     @ManakaiDOMLS:
48     http://suika.fam.cx/~wakaba/archive/2004/mdom-ls#
49     @MDOM:
50     http://suika.fam.cx/~wakaba/archive/2004/8/18/manakai-dom#ManakaiDOM.
51     @MDOMX:
52     http://suika.fam.cx/~wakaba/archive/2004/8/4/manakai-dom-exception#
53     @rdf:
54     http://www.w3.org/1999/02/22-rdf-syntax-ns#
55     @rdfs:
56     http://www.w3.org/2000/01/rdf-schema#
57     @t:
58     http://suika.fam.cx/~wakaba/archive/2004/dom/tree#
59     @xml:
60     http://www.w3.org/XML/1998/namespace
61     @xmlns:
62     http://www.w3.org/2000/xmlns/
63     @xp:
64     http://suika.fam.cx/~wakaba/archive/2004/dom/xml-parser#
65    
66     ## -- Features
67    
68     ElementTypeBinding:
69     @Name: FeatureDef
70     @ElementType:
71     dis:ResourceDef
72     @ShadowContent:
73     @@rdf:type: f|Feature
74     @@For: =ManakaiDOM|all
75    
76     ElementTypeBinding:
77     @Name: FeatureVerDef
78     @ElementType:
79     dis:ResourceDef
80     @ShadowContent:
81     @@rdf:type: f|Feature
82    
83     ElementTypeBinding:
84     @Name: featureQName
85     @ElementType:
86     f:name
87     @ShadowContent:
88     @@ContentType: DISCore|QName
89    
90     ResourceDef:
91     @QName: DOMString
92     @AliasFor: DOMMain|DOMString
93     @For: ManakaiDOM|DOM
94    
95     ResourceDef:
96     @QName: Node
97     @AliasFor: t|Node
98     @For: ManakaiDOM|DOM
99    
100     ResourceDef:
101     @QName: Element
102     @AliasFor: t|Element
103     @For: ManakaiDOM|DOM
104    
105     ResourceDef:
106     @QName: Document
107     @AliasFor: t|Document
108     @For: ManakaiDOM|DOM
109    
110     ElementTypeBinding:
111     @Name: ClsDef
112     @ElementType:
113     dis:ResourceDef
114     @ShadowContent:
115     @@rdf:type:
116     @@@@: dis|MultipleResource
117     @@@ForCheck: !ManakaiDOM|ForIF !ManakaiDOM|ForClass
118     @@resourceFor:
119     @@@@: ManakaiDOM|ForClass
120     @@@ForCheck: ManakaiDOM|ManakaiDOM !=ManakaiDOM|ManakaiDOM
121     @@For: ManakaiDOM|DOM3
122     @@For: =ManakaiDOM|ManakaiDOM
123    
124     @@rdf:type:
125     @@@@: DISLang|Class
126     @@@ForCheck: ManakaiDOM|ForClass
127    
128     ElementTypeBinding:
129     @Name: ClsQName
130     @ElementType:
131     dis:QName
132     @ShadowContent:
133     @@ForCheck: ManakaiDOM|ForClass
134    
135     ElementTypeBinding:
136     @Name: ClsISA
137     @ElementType:
138     dis:ISA
139     @ShadowContent:
140     @@ForCheck: ManakaiDOM|ForClass
141    
142     ElementTypeBinding:
143     @Name: nullCase
144     @ElementType:
145     dis:ResourceDef
146     @ShadowContent:
147     @@rdf:type: ManakaiDOM|InCase
148     @@Value:
149     @@@is-null:1
150    
151     ResourceDef:
152     @QName: LSParser
153     @AliasFor: DOMLS|LSParser
154     @For: ManakaiDOM|DOM3
155    
156     ClsDef:
157     @ClsQName: ManakaiXMLParser
158    
159     @Implement: DOMLS|LSParser
160    
161     @f:implements:
162     @@@: DOMLS|LSFeature30
163     @@For: ManakaiDOM|DOM3
164    
165     @DISLang:role: DOMLS|ParserRole
166    
167 wakaba 1.3 @enDesc:
168     Note that the <Class::ManakaiXMLParser> reports any XML errors
169     (syntax errors and / or well-formedness constraint errors)
170     via the <IF::DOMCore:error-handler> registered to
171     the <A::DOMLS:LSParser.domConfig> object. Each error has
172     its <A::DOMCore:DOMError.severity>, either <C::DOMCore:SEVERITY_ERROR>
173     or <C::DOMCore:SEVERITY_FATAL_ERROR>. However, their semantics
174     are slight different from the ones of <QUOTE::error> and
175     <QUOTE::fatal error> in XML; in this implemenetation,
176     <C::DOMCore:SEVERITY_ERROR> implies that the parsing process
177     can effectively be continued to detect more errors while
178     <C::DOMCore:SEVERITY_FATAL_ERROR> implies that the error
179     is serious so that the result document tree and any errors
180     might be far from the ones obtained when the error would not
181     be found.
182    
183 wakaba 1.1 @Attr:
184     @@Name: domConfig
185     @@enDesc:
186     The configuration of the parser.
187    
188     @@Get:
189     @@@Type: DOMCore|DOMConfiguration
190     @@@enDesc: The DOM configuration object.
191     @@@PerlDef:
192     __CODE{DOMCore|getConfigObject::
193     $target => $self,
194     $targetHash => $self,
195     $targetType => {<IFName::LSParser>},
196     $result => $r,
197     }__;
198    
199     @Method:
200     @@ManakaiDOM:isForInternal:1
201     @@ForCheck: ManakaiDOM|ForClass
202     @@Operator: DISPerl|NewMethod
203     @@enDesc:
204     Creates a new instance of the object.
205     @@Param:
206     @@@Name: impl
207     @@@Type: DOMLS|GLSImplementation
208     @@@enDesc:
209     The implementation from which the parser is created.
210     @@Param:
211     @@@Name: features
212     @@@Type: DOMString
213     @@@dis:actualType: f|FeaturesString
214     @@@enDesc:
215     The set of features requested for the parser.
216     @@Return:
217     @@@Type: DOMMain|DOMObject
218     @@@dis:actualType: LSParser
219     @@@enDesc:
220     The newly created parser.
221     @@@PerlDef:
222     $r = bless {
223     <H::DOMCore:implementation> => $impl,
224     }, $self;
225    
226     @Method:
227     @@Name: parseString
228     @@enImplNote:
229     Non-standard - to be removed
230    
231     @@Param:
232     @@@Name: sourceText
233     @@@Type: DOMString
234     @@Return:
235     @@@Type: Document
236     @@@PerlDef:
237    
238     $self->{char} = [];
239     $self->{token} = [];
240     $self->{source} = $sourceText;
241 wakaba 1.3 $self->{xml_version} = '1.0';
242     $self->{standalone} = false;
243     ## Well-formedness constraint Entity Declared takes effect?
244     $self->{line} = 1;
245     $self->{column} = 1;
246 wakaba 1.1
247     __DEEP{
248     $r = $self->_parse_DocumentEntity
249     ($self->{<H::DOMCore:implementation>});
250     }__;
251    
252     @Method:
253     @@Name: shiftChar
254     @@ManakaiDOM:isForInternal:1
255     @@ForCheck: ManakaiDOM|ForClass
256     @@enDesc:
257     Returns the next character.
258     @@Return:
259     @@@Type: idl|long||ManakaiDOM|all
260     @@@enDesc:
261     The code position number of the next character, if any,
262     or <CODE::-2>.
263     @@@PerlDef:
264     if (@{$self->{char}}) {
265     $r = shift @{$self->{char}};
266     } else {
267     my $char = substr ($self->{source}, pos ($self->{source}), 1);
268     pos ($self->{source})++;
269    
270     if (length $char) {
271     $r = ord $char;
272 wakaba 1.3 if ($r == 0x000A) {
273     $self->{line}++;
274     $self->{column} = 1;
275     } elsif ($r == 0x000D) {
276     my $next_char = substr ($self->{source},
277     pos ($self->{source}), 1);
278     if ($next_char eq "\x0A") {
279     pos ($self->{source})++;
280     $self->{column} = 1;
281     } elsif ($next_char eq "\x85") {
282     if ($self->{xml_version} eq '1.1') {
283     pos ($self->{source})++;
284     $self->{column} = 1;
285     } else {
286     $self->{column} = 0;
287     }
288     } else {
289     $self->{column} = 1;
290     }
291     $r = 0x000A;
292     $self->{line}++;
293     } elsif (
294     not ((0x0020 <= $r and $r <= 0x007E) or
295     (0x00A0 <= $r and $r <= 0xD7FF) or
296     (0xE000 <= $r and $r <= 0xFFFD) or
297     (0x10000 <= $r and $r <= 0x10FFFF)) and
298     $r != 0x0009 and $r != 0x0085 and
299     not ($self->{xml_version} eq '1.0' and
300     (0x007F <= $r and $r <= 0x009F))
301     ) {
302     my $location = {
303     utf32_offset => pos ($self->{source}),
304     line_number => $self->{line},
305     column_number => $self->{column},
306     };
307     my $continue = __DOMCore:ERROR{xp|wf-legal-literal-character::
308     DOMCore|location => {$location},
309     xp|character-number => {$r},
310     }__;
311     unless ($continue) {
312     __EXCEPTION{DOMLS|PARSE_ERR}__;
313     }
314     $self->{has_error} = true;
315     $self->{column}++;
316     } elsif ($r == 0x0085 or $r == 0x2028) {
317     $r = 0x000A if $self->{xml_version} eq '1.1';
318     $self->{line}++;
319     $self->{column} = 1;
320     } else {
321     $self->{column}++;
322     }
323 wakaba 1.1 } else {
324     $r = -1;
325     }
326     }
327    
328     @Method:
329     @@ManakaiDOM:isForInternal: 1
330     @@Operator: ManakaiDOM|MUErrorHandler
331     @@enDesc:
332     When a <IF::ecore|ErrorInterface||ManakaiDOM|Perl> is <Perl::report>ed,
333     then this method is invoked.
334    
335     The method calls the <cfg::DOMCore|error-handler> if the error is of
336     <IF::DOMCore|DOMError>. Otherwise, the error is re-thrown so that
337     corresponding <Perl::catch> clause, if any, can catch the error.
338     @@Param:
339     @@@Name: err
340     @@@Type: ecore|ErrorInterface||ManakaiDOM|Perl
341     @@@enDesc:
342     The reported error object.
343     @@Return:
344     @@@Type: DISPerl|Any
345     @@@enDesc:
346     If the <P::err> is a <IF::DOMCore|DOMError>, then the return value
347     of the error handler.
348    
349     {NOTE:: If the error is thrown, the method never returns.
350     }
351     @@@nullCase:
352     @@@@enDesc:
353     No error handler.
354     @@@PerlDef:
355     if ($err->isa (<IFName::DOMCore|DOMError||ManakaiDOM|ManakaiDOM>)) {
356     __DEEP{
357     A: {
358     my $cfg = $self-><AG::LSParser.domConfig>;
359     my $h = $cfg-><M::DOMCore|DOMConfiguration.getParameter>
360     ('error-handler');
361     $r = $h-><M::DOMCore|DOMErrorHandler.handleError> ($err);
362     } # A
363     }__;
364     } else {
365     $err-><M::ecore|ErrorInterface||ManakaiDOM|Perl.throw>;
366     }
367    
368     @DISPerl:dpgDef:
369    
370     /*
371     XML Document Entity
372    
373     document := prolog element *Misc
374     - *Char RestrictedChar *Char ;; [1]
375     */
376     rule DocumentEntity ($impl) : standalone {
377     my $doc : return;
378    
379     lang:Perl {
380     $doc = $impl-><M::DOMImpl.createDocument>;
381     $doc-><AS::Document.strictErrorChecking> (false);
382     }
383    
384     /*
385     prolog := XMLDecl? *Misc [doctypedecl *Misc] ;; [22]
386     */
387     ?lexmode 'DocumentStart';
388    
389 wakaba 1.3 ~? (XDO) {
390     &_XMLDeclaration_ ($doc => $doc);
391    
392     ~ (PIC) {
393     ?lexmode DocumentProlog;
394     } else {
395     ?lexmode DocumentProlog;
396     }
397     } else {
398     ?lexmode 'DocumentProlog';
399     }
400 wakaba 1.1
401     // *Misc
402 wakaba 1.2 ~* (CDO) {
403 wakaba 1.1 &_CommentDeclaration_ ($doc => $doc, $parent => $doc);
404    
405     ~ (MDC) {
406     ?lexmode DocumentProlog;
407     } else {
408     ?lexmode DocumentProlog;
409     }
410     } (PIO) {
411     &_ProcessingInstruction_ ($doc => $doc, $parent => $doc);
412    
413     ~ (PIC) {
414     ?lexmode 'DocumentProlog';
415     } else {
416     ?lexmode DocumentProlog;
417     }
418     } (S) {
419     //
420     }
421    
422     // doctypedecl
423     ~? (MDO) {
424     &_DocumentTypeDeclaration_ ($doc => $doc);
425    
426 wakaba 1.3 ~ (MDC) {
427     ?lexmode DocumentMisc;
428     } else {
429     ?lexmode DocumentMisc;
430     }
431     } else {
432     lang:Perl {
433     $self->{standalone} = true;
434     }
435     ?lexmode DocumentMisc;
436 wakaba 1.1 }
437    
438     // *Misc
439 wakaba 1.2 ~* (CDO) {
440 wakaba 1.1 &_CommentDeclaration_ ($doc => $doc, $parent => $doc);
441    
442     ~ (MDC) {
443     ?lexmode DocumentMisc;
444     } else {
445     ?lexmode DocumentMisc;
446     }
447     } (PIO) {
448     &_ProcessingInstruction_ ($doc => $doc, $parent => $doc);
449    
450     ~ (PIC) {
451     ?lexmode 'DocumentMisc';
452     } else {
453     ?lexmode DocumentMisc;
454     }
455     } (S) {
456     //
457     }
458    
459     // Document element
460     ~ (STAGO) {
461     &Element_ ($doc => $doc, $parent => $doc)
462     : unshift-current-token;
463     ~ (TAGC) {
464     ?lexmode DocumentEnd;
465     } else {
466     ?lexmode DocumentEnd;
467     }
468     } else {
469     ?lexmode 'DocumentEnd';
470     }
471    
472     // *Misc
473 wakaba 1.2 ~* (CDO) {
474 wakaba 1.1 &_CommentDeclaration_ ($doc => $doc, $parent => $doc);
475    
476     ~ (MDC) {
477     ?lexmode DocumentEnd;
478     } else {
479     ?lexmode DocumentEnd;
480     }
481     } (PIO) {
482     &_ProcessingInstruction_ ($doc => $doc, $parent => $doc);
483     ~ (PIC) {
484     ?lexmode 'DocumentEnd';
485     } else {
486     ?lexmode DocumentEnd;
487     }
488     } (S) {
489     //
490     }
491    
492     ~ (#EOF) { }
493    
494     lang:Perl {
495     if ($self->{has_error}) {
496     __EXCEPTION{DOMLS|PARSE_ERR::
497     }__;
498     }
499    
500     $doc-><AS::Document.strictErrorChecking> (true);
501     }
502     } // DocumentEntity
503    
504     /*
505     XML Declaration
506    
507     XMLDecl := '<?xml' VersionInfo
508     [EncodingDecl]
509     [SDDecl]
510     [S] '?>' ;; [23]
511    
512     NOTE: XML declaration is optional in XML 1.0
513     while it is required in XML 1.1.
514     */
515 wakaba 1.3 rule _XMLDeclaration_ ($doc) {
516     ?lexmode XMLDeclaration;
517    
518     ~ (S) { }
519    
520     ~ (Name == 'version') {
521     ~? (S) { }
522     ~ (VI) { }
523     ~? (S) { }
524 wakaba 1.1
525 wakaba 1.3 my $ver;
526     my $bad_token;
527    
528     ~ (LIT) {
529     ?lexmode AttributeValueLiteral;
530    
531     ~ (STRING) {
532     lang:Perl ($version => $token.value) {
533     $ver = $version;
534     $bad_token = $token;
535     }
536     }
537    
538     ~ (LIT) {
539     ?lexmode XMLDeclaration;
540     }
541     } (LITA) {
542     ?lexmode AttributeValueLiteralA;
543    
544     ~ (STRING) {
545     lang:Perl ($version => $token.value) {
546     $ver = $version;
547     $bad_token = $token;
548     }
549     }
550    
551     ~ (LITA) {
552     ?lexmode XMLDeclaration;
553     }
554     }
555    
556     lang:Perl : has-error {
557     unless ($ver eq '1.0' or $ver eq '1.1') {
558     my $location;
559     __CODE{xp|get-location-from-token::
560     $token => {$bad_token},
561     $result => {$location},
562     }__;
563     my $continue = __DOMCore:ERROR{xp|wf-unsupported-xml-version::
564     DOMCore|location => {$location},
565     xp|parent => {$doc},
566     infoset|version => {$ver},
567     xp|error-token => {$bad_token},
568     }__;
569     unless ($continue) {
570     __EXCEPTION{DOMLS|PARSE_ERR}__;
571     }
572     $self->{has_error} = true;
573     }
574     $doc-><AS::Document.xmlVersion> ($ver);
575     $self->{xml_version} = $ver;
576     }
577    
578     ~? (S) { }
579 wakaba 1.1 }
580    
581 wakaba 1.3 ~? (Name == 'encoding') {
582     ~? (S) { }
583     ~ (VI) { }
584     ~? (S) { }
585    
586     my $ver;
587     my $bad_token;
588    
589     ~ (LIT) {
590     ?lexmode AttributeValueLiteral;
591    
592     ~ (STRING) {
593     lang:Perl ($version => $token.value) {
594     $ver = $version;
595     $bad_token = $token;
596     }
597     }
598    
599     ~ (LIT) {
600     ?lexmode XMLDeclaration;
601     }
602     } (LITA) {
603     ?lexmode AttributeValueLiteralA;
604    
605     ~ (STRING) {
606     lang:Perl ($version => $token.value) {
607     $ver = $version;
608     $bad_token = $token;
609     }
610     }
611    
612     ~ (LITA) {
613     ?lexmode XMLDeclaration;
614     }
615     }
616    
617     lang:Perl : has-error {
618     unless ($ver =~ /\A[A-Za-z][A-Za-z0-9._-]*\z/) {
619     my $location;
620     __CODE{xp|get-location-from-token::
621     $token => {$bad_token},
622     $result => {$location},
623     }__;
624     my $continue = __DOMCore:ERROR{xp|wf-malformed-enc-name::
625     DOMCore|location => {$location},
626     xp|parent => {$doc},
627     xp|name => {$ver},
628     xp|error-token => {$bad_token},
629     }__;
630     unless ($continue) {
631     __EXCEPTION{DOMLS|PARSE_ERR}__;
632     }
633     $self->{has_error} = true;
634     }
635     $doc-><AS::Document.xmlEncoding> ($ver);
636     }
637    
638     ~? (S) { }
639 wakaba 1.1 }
640 wakaba 1.3
641     ~? (Name == 'standalone') {
642     ~? (S) { }
643     ~ (VI) { }
644     ~? (S) { }
645    
646     my $ver;
647     my $bad_token;
648    
649     ~ (LIT) {
650     ?lexmode AttributeValueLiteral;
651    
652     ~ (STRING) {
653     lang:Perl ($version => $token.value) {
654     $ver = $version;
655     $bad_token = $token;
656     }
657     }
658    
659     ~ (LIT) {
660     ?lexmode XMLDeclaration;
661     }
662     } (LITA) {
663     ?lexmode AttributeValueLiteralA;
664    
665     ~ (STRING) {
666     lang:Perl ($version => $token.value) {
667     $ver = $version;
668     $bad_token = $token;
669     }
670     }
671 wakaba 1.1
672 wakaba 1.3 ~ (LITA) {
673     ?lexmode XMLDeclaration;
674     }
675     }
676    
677     lang:Perl : has-error {
678     unless ($ver eq 'yes' or $ver eq 'no') {
679     my $location;
680     __CODE{xp|get-location-from-token::
681     $token => {$bad_token},
682     $result => {$location},
683     }__;
684     my $continue = __DOMCore:ERROR{xp|wf-malformed-xml-standalone::
685     DOMCore|location => {$location},
686     xp|parent => {$doc},
687     xp|name => {$ver},
688     xp|error-token => {$bad_token},
689     }__;
690     unless ($continue) {
691     __EXCEPTION{DOMLS|PARSE_ERR}__;
692     }
693     $self->{has_error} = true;
694     }
695     $doc-><AS::Document.xmlStandalone> ($ver);
696     $self->{standalone} = true if $ver eq 'yes';
697     }
698    
699     ~? (S) { }
700     }
701    
702     // ~ (PIC) { }
703     } // _XMLDeclaration_
704 wakaba 1.1
705     /*
706     Document Type Declaration
707     */
708     rule _DocumentTypeDeclaration_ ($doc) {
709     ?lexmode 'DocumentTypeDeclaration';
710    
711     ~ (Name == 'DOCTYPE') { }
712    
713     ~ (S) { }
714    
715     // Document type name
716     ~ (Name) {
717    
718     }
719    
720     // TODO: Implement this
721    
722 wakaba 1.3 // TODO: set $self->{standalone} true if only internal subset
723     // with no param ref
724    
725 wakaba 1.1 // ~ (MDC) { }
726     } // _DocumentTypeDeclaration_
727    
728     /*
729     Comment Declaration
730    
731     Comment := '<!--' *(Char - '-' / '-' (Char - '-'))
732     '-->' ;; [15]
733     */
734     rule _CommentDeclaration_ ($doc, $parent) {
735     ?lexmode 'CommentDeclaration';
736    
737     ~? (STRING) {
738     lang:Perl ($data => $token.value) {
739     my $com = $doc-><M::Document.createComment> ($data);
740     $parent-><M::Node.appendChild> ($com);
741     }
742     } else {
743     lang:Perl {
744     my $com = $doc-><M::Document.createComment> ('');
745     $parent-><M::Node.appendChild> ($com);
746     }
747     }
748    
749     ~ (COM) {
750     ?lexmode MarkupDeclaration;
751     } else {
752     ?lexmode MarkupDeclaration;
753     }
754    
755     // ~ (MDC) { }
756     } // _CommentDeclaration_
757    
758     /*
759     Processing Instruction
760    
761     PI := '<?' PITarget [S *Char - *Char '?>' *Char]
762     '?>' ;; [16]
763     */
764     rule _ProcessingInstruction_ ($doc, $parent) {
765     ?lexmode 'PIName';
766    
767     my $pi;
768    
769     ~ (Name) {
770 wakaba 1.3 lang:Perl ($name => $token.value) : has-error {
771 wakaba 1.1 if (lc $name eq 'xml') {
772 wakaba 1.3 my $location;
773     __CODE{xp|get-location-from-token::
774     $token => {$token},
775     $result => {$location},
776     }__;
777     my $continue = __DOMCore:ERROR{xp|wf-pi-target-is-xml::
778     xp|name => {$name},
779     DOMCore|location => {$location},
780     xp|parent => {$parent},
781     }__;
782     unless ($continue) {
783     __EXCEPTION{DOMLS|PARSE_ERR::
784     }__;
785     }
786     $self->{has_error} = true;
787 wakaba 1.1 }
788     ## TODO: Namespace well-formedness
789     $pi = $doc-><M::Document.createProcessingInstruction>
790     ($name);
791     }
792     }
793    
794     ~ (S) {
795     ?lexmode 'PIData';
796    
797     my $tdata;
798    
799     ~? (DATA) {
800     lang:Perl ($data => $token.value) {
801     $tdata = $data;
802     }
803     } else {
804     lang:Perl {
805     $tdata = '';
806     }
807     }
808    
809     lang:Perl {
810     $pi-><AS::Node.nodeValue> ($tdata);
811     }
812     }
813    
814     lang:Perl {
815     $parent-><M::Node.appendChild> ($pi);
816     ## TODO: PIs in document type declaration subsets
817     }
818    
819     // ~ (PIC) { }
820     } // _ProcessingInstruction_
821    
822     /*
823     Element content parsing mode
824    
825     element := EmptyElemTag /
826     STag content ETag ;; [39]
827     content := (CharData / element / Reference / CDSect /
828     PI / Comment) ;; [43]
829     */
830     rule Element_ ($doc, $parent) : standalone {
831     ?lexmode 'ElementContent';
832    
833     my $node; // Current "parent" node
834     my $nodes; // Node stack (w/o $current_node)
835     my $type; // Current "parent" element type QName
836     my $types; // Element type stack (w/o $current_type)
837     my $ns; // Current in-scope namespace bindings
838     my $nses; // Namespace binding stack (w/o $current_ns)
839    
840     lang:Perl {
841     $node = $parent;
842     $nodes = [];
843     $type = '';
844     $types = [];
845     $ns = {
846     xml => <Q::xml:>,
847     xmlns => <Q::xmlns:>,
848     };
849     $nses = [];
850     }
851    
852     ~* : name => CONTENT
853     (CharData) {
854     // Character data
855     lang:Perl ($data => $token.value) {
856     $node-><M::Node.appendChild>
857     ($doc-><M::Document.createTextNode> ($data));
858     }
859     } (STAGO) {
860     // Start tag or empty element tag
861    
862     ?lexmode 'StartTag';
863    
864     ~ (Name) {
865     my $attrs;
866     lang:Perl ($name => $token.value) {
867     push @{$types}, $type;
868     $type = $name;
869     $attrs = {};
870     }
871    
872     ~? (S) {
873     &AttributeSpecificationList
874     ($doc => $doc, $attrs => $attrs);
875     }
876    
877     my $el;
878    
879     lang:Perl {
880     push @{$nses}, $ns;
881     $ns = {%$ns};
882    
883     my %gattr;
884     my %lattr;
885     for my $atqname (keys %$attrs) {
886     my ($pfx, $lname) = split /:/, $atqname;
887     if (defined $lname) { ## Global attribute
888     ## TODO: Namespace well-formedness (lname is NCName)
889     if ($pfx eq 'xmlns') {
890     my $nsuri = $attrs->{$atqname}->{value};
891     if ($lname eq 'xml' and
892     $nsuri ne <Q::xml:>) {
893     ## TODO: error
894     } elsif ($lname eq 'xmlns') {
895     ## TODO: error
896     }
897     if ($nsuri eq '') {
898     ## TODO: error in XML 1.0
899     } elsif ($nsuri eq <Q::xml:> and
900     $lname ne 'xml') {
901     ## TODO: error
902     } elsif ($nsuri eq <Q::xmlns:>) {
903     ## TODO: error
904     }
905     $ns->{$lname} = $attrs->{$atqname}->{value};
906     delete $ns->{$lname} unless length $ns->{$lname};
907     } elsif ($pfx eq '') {
908     ## TODO: pfx is not NCName error
909     } else {
910     if ($gattr{$pfx}->{$lname}) {
911     ## TODO: Namespace well-formedness error
912     }
913     }
914     $gattr{$pfx}->{$lname} = $attrs->{$atqname};
915     } else { ## Local attribute
916     if ($pfx eq 'xmlns') {
917     $ns->{''} = $attrs->{xmlns}->{value};
918     delete $ns->{''} unless length $ns->{''};
919     } else {
920     $lattr{$pfx} = $attrs->{$atqname};
921     }
922     }
923     }
924    
925     my ($pfx, $lname) = split /:/, $type;
926     my $nsuri;
927     ## TODO: lname is NCName?
928     if (defined $lname) { ## Prefixed namespace
929     if ($pfx eq '') {
930     ## TODO: pfx is not NCName error
931     }
932     if (defined $ns->{$pfx}) {
933     $nsuri = $ns->{$pfx};
934     } else {
935     ## TODO: namespace ill-formed
936     }
937     } else { ## Default namespace
938     $nsuri = $ns->{''};
939     }
940    
941     $el = $doc-><M::Document.createElementNS>
942     ($nsuri, $type);
943    
944     if ($attrs->{xmlns}) {
945     my $attr = $doc-><M::Document.createAttributeNS>
946     (<Q::xmlns:>, 'xmlns');
947     for (@{$attrs->{xmlns}->{nodes}}) {
948     $attr-><M::Node.appendChild> ($_);
949     }
950     $el-><M::Element.setAttributeNodeNS> ($attr);
951     }
952    
953     for my $lname (keys %lattr) {
954     my $attr = $doc-><M::Document.createAttributeNS>
955     (null, $lname);
956     for (@{$lattr{$lname}->{nodes}}) {
957     $attr-><M::Node.appendChild> ($_);
958     }
959     $el-><M::Element.setAttributeNodeNS> ($attr);
960     }
961    
962     for my $pfx (keys %gattr) {
963     for my $lname (keys %{$gattr{$pfx}}) {
964     my $attr = $doc-><M::Document.createAttributeNS>
965     ($ns->{$pfx}, $pfx.':'.$lname);
966     for (@{$gattr{$pfx}->{$lname}->{nodes}}) {
967     $attr-><M::Node.appendChild> ($_);
968     }
969     $el-><M::Element.setAttributeNodeNS> ($attr);
970     }
971     }
972    
973     $node-><M::Node.appendChild> ($el);
974     }
975    
976     ~ (TAGC) {
977     lang:Perl {
978     push @{$nodes}, $node;
979     $node = $el;
980     }
981     ?lexmode ElementContent;
982     } (MTAGC) {
983     lang:Perl {
984     $ns = pop @{$nses};
985     $type = pop @{$types};
986     }
987     ?lexmode ElementContent;
988     } else {
989     ?lexmode ElementContent;
990     }
991     } else {
992     ?lexmode ElementContent;
993     }
994    
995     } (ETAGO) {
996     // End tag
997    
998     ?lexmode 'EndTag';
999    
1000     my $is_docel;
1001    
1002     ~ (Name) {
1003 wakaba 1.3 lang:Perl ($name => $token.value) : has-error {
1004 wakaba 1.1 if ($name eq $type) {
1005     $type = pop @{$types};
1006     if ($type eq '') {
1007     $is_docel = true;
1008     }
1009     $node = pop @{$nodes};
1010     $ns = pop @{$nses};
1011     } else {
1012 wakaba 1.3 my $location;
1013     __CODE{xp|get-location-from-token::
1014     $token => $token,
1015     $result => $location,
1016     }__;
1017     my $continue = __DOMCore:ERROR{xp|wf-element-type-match::
1018     DOMCore:location => {$location},
1019     xp|token => {$token},
1020     xp|expected-element-type => {$type},
1021     xp|actual-element-type => {$name},
1022     xp|node => {$node},
1023     }__;
1024     unless ($continue) {
1025     __EXCEPTION{DOMLS|PARSE_ERR}__;
1026     }
1027     $self->{has_error} = true;
1028 wakaba 1.1 }
1029     }
1030     }
1031    
1032     ~? (S) { }
1033    
1034     if-true ($is_docel) {
1035 wakaba 1.3 lang:Perl : has-error {
1036 wakaba 1.1 if (@{$types}) {
1037 wakaba 1.3 my $location;
1038     __CODE{xp|get-location-from-token::
1039     $token => $token,
1040     $result => $location,
1041     }__;
1042     for my $type (reverse @{$types}) {
1043     my $continue = __DOMCore:ERROR{xp|wf-no-end-tag::
1044     DOMCore:location => {$location},
1045     xp|token => {$token},
1046     xp|expected-element-type => {$type},
1047     xp|node => {$node},
1048     }__;
1049     unless ($continue) {
1050     __EXCEPTION{DOMLS|PARSE_ERR}__;
1051     }
1052     $node = shift @{$nodes};
1053     }
1054     $self->{has_error} = true;
1055 wakaba 1.1 }
1056     }
1057     return;
1058     }
1059    
1060     ~ (TAGC) {
1061     ?lexmode ElementContent;
1062     } else {
1063     ?lexmode 'ElementContent';
1064     }
1065    
1066     } (HCRO) {
1067     &_HexadecimalCharacterReference_
1068     ($doc => $doc, $parent => $node);
1069    
1070     ~ (REFC) {
1071     ?lexmode 'ElementContent';
1072     } else {
1073     ?lexmode ElementContent;
1074     }
1075     } (CRO) {
1076     &_NumericCharacterReference_
1077     ($doc => $doc, $parent => $node);
1078    
1079     ~ (REFC) {
1080     ?lexmode 'ElementContent';
1081     } else {
1082     ?lexmode ElementContent;
1083     }
1084     } (ERO) {
1085     &_GeneralEntityReference_
1086     ($doc => $doc, $parent => $node);
1087    
1088     ~ (REFC) {
1089     ?lexmode 'ElementContent';
1090     } else {
1091     ?lexmode ElementContent;
1092     }
1093     } (CDO) {
1094     &_CommentDeclaration_ ($doc => $doc, $parent => $node);
1095    
1096     ~ (MDC) {
1097     ?lexmode ElementContent;
1098     } else {
1099     ?lexmode ElementContent;
1100     }
1101     } (CDSO) {
1102     &_CDATASection_ ($doc => $doc, $parent => $node);
1103    
1104     ~ (MSE) {
1105     ?lexmode 'ElementContent';
1106     } else {
1107     ?lexmode ElementContent;
1108     }
1109     } (PIO) {
1110     &_ProcessingInstruction_ ($doc => $doc, $parent => $node);
1111    
1112     ~ (PIC) {
1113     ?lexmode 'ElementContent';
1114     } else {
1115     ?lexmode ElementContent;
1116     }
1117     }
1118 wakaba 1.3
1119     ~ (#NONE) { }
1120 wakaba 1.1 } // Element_
1121    
1122     rule AttributeSpecificationList ($doc, $attrs)
1123     : standalone
1124     {
1125     ?lexmode 'StartTag';
1126    
1127     my $i;
1128     lang:Perl {
1129     $i = 0;
1130     }
1131    
1132     ~* (Name) {
1133     my $atqname;
1134     lang:Perl ($name => $token.value) {
1135     $atqname = $name;
1136     }
1137    
1138     my $vals;
1139     lang:Perl {
1140     if ($attrs->{$atqname}) {
1141 wakaba 1.3 my $location;
1142     __CODE{xp|get-location-from-token::
1143     $token => $token,
1144     $result => $location,
1145     }__;
1146     my $continue = __DOMCore:ERROR{xp|wf-unique-att-spec::
1147     DOMCore:location => {$location},
1148     xp|token => {$token},
1149     xp|name => {$atqname},
1150     }__;
1151     unless ($continue) {
1152     __EXCEPTION{DOMLS|PARSE_ERR}__;
1153     }
1154     $self->{has_error} = true;
1155 wakaba 1.1 }
1156    
1157     $vals = $attrs->{$atqname} = {
1158     nodes => [],
1159     value => '',
1160     index => $i++,
1161     };
1162     }
1163 wakaba 1.3
1164     ~? (S) { }
1165     ~ (VI) { }
1166     ~? (S) { }
1167 wakaba 1.1
1168     ~ (LIT) {
1169     &_AttributeValueSpecification_
1170     ($doc => $doc, $vals => $vals);
1171    
1172     ~ (LIT) {
1173     ?lexmode StartTag;
1174     } else {
1175     ?lexmode StartTag;
1176     }
1177     } (LITA) {
1178     &_AttributeValueSpecificationA_
1179     ($doc => $doc, $vals => $vals);
1180    
1181     ~ (LITA) {
1182     ?lexmode StartTag;
1183     } else {
1184     ?lexmode StartTag;
1185     }
1186     }
1187     } (S) : separator : terminator? { }
1188     } // AttributeSpecificationList
1189    
1190     rule _AttributeValueSpecification_ ($doc, $vals) {
1191     // ~ (LIT) { }
1192     ?lexmode 'AttributeValueLiteral';
1193    
1194     ~* (STRING) {
1195     lang:Perl ($value => $token.value) {
1196     $value =~ s/[\x09\x0A\x0D]/ /g;
1197     my $text = $doc-><M::Document.createTextNode> ($value);
1198     push @{$vals->{nodes}}, $text;
1199     $vals->{value} .= $value;
1200     }
1201     } (HCRO) {
1202     &_HexadecimalCharacterReferenceV_
1203     ($doc => $doc, $vals => $vals);
1204    
1205     ~ (REFC) {
1206     ?lexmode AttributeValueLiteral;
1207     } else {
1208     ?lexmode AttributeValueLiteral;
1209     }
1210     } (CRO) {
1211     &_NumericCharacterReferenceV_
1212     ($doc => $doc, $vals => $vals);
1213    
1214     ~ (REFC) {
1215     ?lexmode AttributeValueLiteral;
1216     } else {
1217     ?lexmode AttributeValueLiteral;
1218     }
1219     } (ERO) {
1220     // TODO: Attribute value normalization
1221     &_GeneralEntityReferenceV_
1222     ($doc => $doc, $vals => $vals);
1223    
1224     ~ (REFC) {
1225     ?lexmode AttributeValueLiteral;
1226     } else {
1227     ?lexmode AttributeValueLiteral;
1228     }
1229     }
1230    
1231     // ~ (LIT) { } (LITA) { }
1232     } // _AttributeValueSpecification_
1233    
1234     rule _AttributeValueSpecificationA_ ($doc, $vals) {
1235     // ~ (LITA) { }
1236     ?lexmode 'AttributeValueLiteralA';
1237    
1238     ~* (STRING) {
1239     lang:Perl ($value => $token.value) {
1240     $value =~ s/[\x09\x0A\x0D]/ /g;
1241     my $text = $doc-><M::Document.createTextNode> ($value);
1242     push @{$vals->{nodes}}, $text;
1243     $vals->{value} .= $value;
1244     }
1245     } (HCRO) {
1246     &_HexadecimalCharacterReferenceV_
1247     ($doc => $doc, $vals => $vals);
1248    
1249     ~ (REFC) {
1250     ?lexmode AttributeValueLiteralA;
1251     } else {
1252     ?lexmode AttributeValueLiteralA;
1253     }
1254     } (CRO) {
1255     &_NumericCharacterReferenceV_
1256     ($doc => $doc, $vals => $vals);
1257    
1258     ~ (REFC) {
1259     ?lexmode AttributeValueLiteralA;
1260     } else {
1261     ?lexmode AttributeValueLiteralA;
1262     }
1263     } (ERO) {
1264     // TODO: Attribute value normalization
1265     &_GeneralEntityReferenceV_
1266     ($doc => $doc, $vals => $vals);
1267    
1268     ~ (REFC) {
1269     ?lexmode AttributeValueLiteralA;
1270     } else {
1271     ?lexmode AttributeValueLiteralA;
1272     }
1273     }
1274    
1275     // ~ (LITA) { }
1276     } // _AttributeValueSpecificationA_
1277    
1278     /*
1279     CDATA Section Content Parsing Mode
1280     */
1281     rule _CDATASection_ ($doc, $parent) {
1282     ?lexmode 'CDATASectionContent';
1283    
1284     my $cdata;
1285    
1286 wakaba 1.2 ~? (CData) {
1287 wakaba 1.1 lang:Perl ($data => $token.value) {
1288     $cdata = $data;
1289     }
1290     } else {
1291     lang:Perl {
1292     $cdata = '';
1293     }
1294     }
1295    
1296     lang:Perl {
1297     my $cdsect = $doc-><M::Document.createCDATASection>
1298     ($cdata);
1299     $parent-><M::Node.appendChild> ($cdsect);
1300     }
1301    
1302     // ~ (MSE) { }
1303     } // _CDATASection_
1304    
1305     rule _NumericCharacterReference_ ($doc, $parent) {
1306     ?lexmode 'NumericCharacterReference';
1307    
1308     ~ (NUMBER) {
1309 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1310     $num += 0;
1311     unless (
1312     ($self->{xml_version} eq '1.0' and
1313     ((0x0020 <= $num and $num <= 0xD7FF) or
1314     (0xE000 <= $num and $num <= 0xFFFD) or
1315     (0x10000 <= $num and $num <= 0x10FFFF) or
1316     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1317     ($self->{xml_version} eq '1.1' and
1318     ((0x0001 <= $num and $num <= 0xD7FF) or
1319     (0xE000 <= $num and $num <= 0xFFFD) or
1320     (0x10000 <= $num and $num <= 0x10FFFF)))
1321     ) {
1322     my $location;
1323     __CODE{xp|get-location-from-token::
1324     $token => $token,
1325     $result => $location,
1326     }__;
1327     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1328     DOMCore:location => {$location},
1329     xp|token => {$token},
1330     xp|character-number => {$num},
1331     xp|parent => {$parent},
1332     }__;
1333     unless ($continue) {
1334     __EXCEPTION{DOMLS|PARSE_ERR}__;
1335     }
1336     $self->{has_error} = true;
1337     }
1338     my $ncr = $doc-><M::Document.createTextNode> (chr $num);
1339 wakaba 1.1 (chr (0+$num));
1340     $parent-><M::Node.appendChild> ($ncr);
1341     }
1342     }
1343    
1344     // ~ (REFC) { }
1345     } // _NumericCharacterReference_
1346    
1347     rule _NumericCharacterReferenceV_ ($doc, $vals) {
1348     ?lexmode 'NumericCharacterReference';
1349    
1350     ~ (NUMBER) {
1351 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1352     $num += 0;
1353     unless (
1354     ($self->{xml_version} eq '1.0' and
1355     ((0x0020 <= $num and $num <= 0xD7FF) or
1356     (0xE000 <= $num and $num <= 0xFFFD) or
1357     (0x10000 <= $num and $num <= 0x10FFFF) or
1358     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1359     ($self->{xml_version} eq '1.1' and
1360     ((0x0001 <= $num and $num <= 0xD7FF) or
1361     (0xE000 <= $num and $num <= 0xFFFD) or
1362     (0x10000 <= $num and $num <= 0x10FFFF)))
1363     ) {
1364     my $location;
1365     __CODE{xp|get-location-from-token::
1366     $token => $token,
1367     $result => $location,
1368     }__;
1369     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1370     DOMCore:location => {$location},
1371     xp|token => {$token},
1372     xp|character-number => {$num},
1373     }__;
1374     unless ($continue) {
1375     __EXCEPTION{DOMLS|PARSE_ERR}__;
1376     }
1377     $self->{has_error} = true;
1378     }
1379 wakaba 1.1 my $ncr = $doc-><M::Document.createTextNode>
1380     (my $char = chr (0+$num));
1381     push @{$vals->{nodes}}, $ncr;
1382     $vals->{value} .= $char;
1383     }
1384     }
1385    
1386     // ~ (REFC) { }
1387     } // _NumericCharacterReferenceV_
1388    
1389     rule _HexadecimalCharacterReference_ ($doc, $parent) {
1390     ?lexmode 'HexadecimalCharacterReference';
1391    
1392     ~ (Hex) {
1393 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1394     $num += 0;
1395     unless (
1396     ($self->{xml_version} eq '1.0' and
1397     ((0x0020 <= $num and $num <= 0xD7FF) or
1398     (0xE000 <= $num and $num <= 0xFFFD) or
1399     (0x10000 <= $num and $num <= 0x10FFFF) or
1400     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1401     ($self->{xml_version} eq '1.1' and
1402     ((0x0001 <= $num and $num <= 0xD7FF) or
1403     (0xE000 <= $num and $num <= 0xFFFD) or
1404     (0x10000 <= $num and $num <= 0x10FFFF)))
1405     ) {
1406     my $location;
1407     __CODE{xp|get-location-from-token::
1408     $token => $token,
1409     $result => $location,
1410     }__;
1411     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1412     DOMCore:location => {$location},
1413     xp|token => {$token},
1414     xp|character-number => {$num},
1415     xp|parent => {$parent},
1416     }__;
1417     unless ($continue) {
1418     __EXCEPTION{DOMLS|PARSE_ERR}__;
1419     }
1420     $self->{has_error} = true;
1421     }
1422 wakaba 1.1 my $ncr = $doc-><M::Document.createTextNode>
1423     (chr hex $num);
1424     $parent-><M::Node.appendChild> ($ncr);
1425     }
1426     }
1427    
1428     // ~ (REFC) { }
1429     } // _HexadecimalCharacterReference_
1430    
1431 wakaba 1.3 rule _HexadecimalCharacterReferenceV_ ($doc, $vals) {
1432 wakaba 1.1 ?lexmode 'HexadecimalCharacterReference';
1433    
1434     ~ (Hex) {
1435 wakaba 1.3 lang:Perl ($num => $token.value) : has-error {
1436     $num += 0;
1437     unless (
1438     ($self->{xml_version} eq '1.0' and
1439     ((0x0020 <= $num and $num <= 0xD7FF) or
1440     (0xE000 <= $num and $num <= 0xFFFD) or
1441     (0x10000 <= $num and $num <= 0x10FFFF) or
1442     $num == 0x9 or $num == 0xA or $num == 0xD)) or
1443     ($self->{xml_version} eq '1.1' and
1444     ((0x0001 <= $num and $num <= 0xD7FF) or
1445     (0xE000 <= $num and $num <= 0xFFFD) or
1446     (0x10000 <= $num and $num <= 0x10FFFF)))
1447     ) {
1448     my $location;
1449     __CODE{xp|get-location-from-token::
1450     $token => $token,
1451     $result => $location,
1452     }__;
1453     my $continue = __DOMCore:ERROR{xp|wf-legal-character::
1454     DOMCore:location => {$location},
1455     xp|token => {$token},
1456     xp|character-number => {$num},
1457     }__;
1458     unless ($continue) {
1459     __EXCEPTION{DOMLS|PARSE_ERR}__;
1460     }
1461     $self->{has_error} = true;
1462     }
1463 wakaba 1.1 my $ncr = $doc-><M::Document.createTextNode>
1464     (my $char = chr hex $num);
1465     push @{$vals->{nodes}}, $ncr;
1466     $vals->{value} .= $char;
1467     }
1468     }
1469    
1470     // ~ (REFC) { }
1471     } // _HexadecimalCharacterReferenceV_
1472    
1473     rule _GeneralEntityReference_ ($doc, $parent) {
1474     // TODO: Expansion
1475     ?lexmode 'EntityReference';
1476    
1477     ~ (Name) {
1478     lang:Perl ($name => $token.value) {
1479     ## TODO: Namespace well-formedness
1480     ## TODO: Entity declared constraints
1481     my $er = $doc-><M::Document.createEntityReference>
1482     ($name);
1483     $parent-><M::Node.appendChild> ($er);
1484     }
1485     }
1486    
1487     // ~ (REFC) { }
1488     } // _GeneralEntityReference_
1489    
1490     rule _GeneralEntityReferenceV_ ($doc, $vals) {
1491     // TODO: Expansion
1492     ?lexmode 'EntityReference';
1493    
1494     ~ (Name) {
1495     lang:Perl ($name => $token.value) {
1496     ## TODO: Namespace well-formedness
1497     ## TODO: Entity declared constraints
1498     my $er = $doc-><M::Document.createEntityReference>
1499     ($name);
1500     push @{$vals->{nodes}}, $er;
1501     }
1502     }
1503    
1504     // ~ (REFC) { }
1505     } // _GeneralEntityReferenceV_
1506    
1507    
1508     /*
1509     XML Name
1510     */
1511     lexmode Name {
1512     $NameStartChar10 := [
1513     '_' ':'
1514     // Letter
1515     // BaseChar
1516     U+0041..U+005A U+0061..U+007A U+00C0..U+00D6
1517     U+00D8..U+00F6 U+00F8..U+00FF U+0100..U+0131
1518     U+0134..U+013E U+0141..U+0148 U+014A..U+017E
1519     U+0180..U+01C3 U+01CD..U+01F0 U+01F4..U+01F5
1520     U+01FA..U+0217 U+0250..U+02A8 U+02BB..U+02C1
1521     U+0386 U+0388..U+038A U+038C U+038E..U+03A1
1522     U+03A3..U+03CE U+03D0..U+03D6 U+03DA U+03DC
1523     U+03DE U+03E0 U+03E2..U+03F3 U+0401..U+040C
1524     U+040E..U+044F U+0451..U+045C U+045E..U+0481
1525     U+0490..U+04C4 U+04C7..U+04C8 U+04CB..U+04CC
1526     U+04D0..U+04EB U+04EE..U+04F5 U+04F8..U+04F9
1527     U+0531..U+0556 U+0559 U+0561..U+0586
1528     U+05D0..U+05EA U+05F0..U+05F2 U+0621..U+063A
1529     U+0641..U+064A U+0671..U+06B7 U+06BA..U+06BE
1530     U+06C0..U+06CE U+06D0..U+06D3 U+06D5
1531     U+06E5..U+06E6 U+0905..U+0939 U+093D
1532     U+0958..U+0961 U+0985..U+098C U+098F..U+0990
1533     U+0993..U+09A8 U+09AA..U+09B0 U+09B2
1534     U+09B6..U+09B9 U+09DC..U+09DD U+09DF..U+09E1
1535     U+09F0..U+09F1 U+0A05..U+0A0A U+0A0F..U+0A10
1536     U+0A13..U+0A28 U+0A2A..U+0A30 U+0A32..U+0A33
1537     U+0A35..U+0A36 U+0A38..U+0A39 U+0A59..U+0A5C
1538     U+0A5E U+0A72..U+0A74 U+0A85..U+0A8B U+0A8D
1539     U+0A8F..U+0A91 U+0A93..U+0AA8 U+0AAA..U+0AB0
1540     U+0AB2..U+0AB3 U+0AB5..U+0AB9 U+0ABD U+0AE0
1541     U+0B05..U+0B0C U+0B0F..U+0B10 U+0B13..U+0B28
1542     U+0B2A..U+0B30 U+0B32..U+0B33 U+0B36..U+0B39
1543     U+0B3D U+0B5C..U+0B5D U+0B5F..U+0B61
1544     U+0B85..U+0B8A U+0B8E..U+0B90 U+0B92..U+0B95
1545     U+0B99..U+0B9A U+0B9C U+0B9E..U+0B9F
1546     U+0BA3..U+0BA4 U+0BA8..U+0BAA U+0BAE..U+0BB5
1547     U+0BB7..U+0BB9 U+0C05..U+0C0C U+0C0E..U+0C10
1548     U+0C12..U+0C28 U+0C2A..U+0C33 U+0C35..U+0C39
1549     U+0C60..U+0C61 U+0C85..U+0C8C U+0C8E..U+0C90
1550     U+0C92..U+0CA8 U+0CAA..U+0CB3 U+0CB5..U+0CB9
1551     U+0CDE U+0CE0..U+0CE1 U+0D05..U+0D0C
1552     U+0D0E..U+0D10 U+0D12..U+0D28 U+0D2A..U+0D39
1553     U+0D60..U+0D61 U+0E01..U+0E2E U+0E30
1554     U+0E32..U+0E33 U+0E40..U+0E45 U+0E81..U+0E82
1555     U+0E84 U+0E87..U+0E88 U+0E8A U+0E8D
1556     U+0E94..U+0E97 U+0E99..U+0E9F U+0EA1..U+0EA3
1557     U+0EA5 U+0EA7 U+0EAA..U+0EAB U+0EAD..U+0EAE
1558     U+0EB0 U+0EB2..U+0EB3 U+0EBD U+0EC0..U+0EC4
1559     U+0F40..U+0F47 U+0F49..U+0F69 U+10A0..U+10C5
1560     U+10D0..U+10F6 U+1100 U+1102..U+1103
1561     U+1105..U+1107 U+1109 U+110B..U+110C
1562     U+110E..U+1112 U+113C U+113E U+1140 U+114C
1563     U+114E U+1150 U+1154..U+1155 U+1159
1564     U+115F..U+1161 U+1163 U+1165 U+1167 U+1169
1565     U+116D..U+116E U+1172..U+1173 U+1175 U+119E
1566     U+11A8 U+11AB U+11AE..U+11AF U+11B7..U+11B8
1567     U+11BA U+11BC..U+11C2 U+11EB U+11F0 U+11F9
1568     U+1E00..U+1E9B U+1EA0..U+1EF9 U+1F00..U+1F15
1569     U+1F18..U+1F1D U+1F20..U+1F45 U+1F48..U+1F4D
1570     U+1F50..U+1F57 U+1F59 U+1F5B U+1F5D
1571     U+1F5F..U+1F7D U+1F80..U+1FB4 U+1FB6..U+1FBC
1572     U+1FBE U+1FC2..U+1FC4 U+1FC6..U+1FCC
1573     U+1FD0..U+1FD3 U+1FD6..U+1FDB U+1FE0..U+1FEC
1574     U+1FF2..U+1FF4 U+1FF6..U+1FFC U+2126
1575     U+212A..U+212B U+212E U+2180..U+2182
1576     U+3041..U+3094 U+30A1..U+30FA U+3105..U+312C
1577     U+AC00..U+D7A3
1578     // Ideographic
1579     U+4E00..U+9FA5 U+3007 U+3021..U+3029
1580     ];
1581     $NameChar10 := [
1582     '.' '-' '_' ':'
1583     // Letter
1584     // BaseChar
1585     U+0041..U+005A U+0061..U+007A U+00C0..U+00D6
1586     U+00D8..U+00F6 U+00F8..U+00FF U+0100..U+0131
1587     U+0134..U+013E U+0141..U+0148 U+014A..U+017E
1588     U+0180..U+01C3 U+01CD..U+01F0 U+01F4..U+01F5
1589     U+01FA..U+0217 U+0250..U+02A8 U+02BB..U+02C1
1590     U+0386 U+0388..U+038A U+038C U+038E..U+03A1
1591     U+03A3..U+03CE U+03D0..U+03D6 U+03DA U+03DC
1592     U+03DE U+03E0 U+03E2..U+03F3 U+0401..U+040C
1593     U+040E..U+044F U+0451..U+045C U+045E..U+0481
1594     U+0490..U+04C4 U+04C7..U+04C8 U+04CB..U+04CC
1595     U+04D0..U+04EB U+04EE..U+04F5 U+04F8..U+04F9
1596     U+0531..U+0556 U+0559 U+0561..U+0586
1597     U+05D0..U+05EA U+05F0..U+05F2 U+0621..U+063A
1598     U+0641..U+064A U+0671..U+06B7 U+06BA..U+06BE
1599     U+06C0..U+06CE U+06D0..U+06D3 U+06D5
1600     U+06E5..U+06E6 U+0905..U+0939 U+093D
1601     U+0958..U+0961 U+0985..U+098C U+098F..U+0990
1602     U+0993..U+09A8 U+09AA..U+09B0 U+09B2
1603     U+09B6..U+09B9 U+09DC..U+09DD U+09DF..U+09E1
1604     U+09F0..U+09F1 U+0A05..U+0A0A U+0A0F..U+0A10
1605     U+0A13..U+0A28 U+0A2A..U+0A30 U+0A32..U+0A33
1606     U+0A35..U+0A36 U+0A38..U+0A39 U+0A59..U+0A5C
1607     U+0A5E U+0A72..U+0A74 U+0A85..U+0A8B U+0A8D
1608     U+0A8F..U+0A91 U+0A93..U+0AA8 U+0AAA..U+0AB0
1609     U+0AB2..U+0AB3 U+0AB5..U+0AB9 U+0ABD U+0AE0
1610     U+0B05..U+0B0C U+0B0F..U+0B10 U+0B13..U+0B28
1611     U+0B2A..U+0B30 U+0B32..U+0B33 U+0B36..U+0B39
1612     U+0B3D U+0B5C..U+0B5D U+0B5F..U+0B61
1613     U+0B85..U+0B8A U+0B8E..U+0B90 U+0B92..U+0B95
1614     U+0B99..U+0B9A U+0B9C U+0B9E..U+0B9F
1615     U+0BA3..U+0BA4 U+0BA8..U+0BAA U+0BAE..U+0BB5
1616     U+0BB7..U+0BB9 U+0C05..U+0C0C U+0C0E..U+0C10
1617     U+0C12..U+0C28 U+0C2A..U+0C33 U+0C35..U+0C39
1618     U+0C60..U+0C61 U+0C85..U+0C8C U+0C8E..U+0C90
1619     U+0C92..U+0CA8 U+0CAA..U+0CB3 U+0CB5..U+0CB9
1620     U+0CDE U+0CE0..U+0CE1 U+0D05..U+0D0C
1621     U+0D0E..U+0D10 U+0D12..U+0D28 U+0D2A..U+0D39
1622     U+0D60..U+0D61 U+0E01..U+0E2E U+0E30
1623     U+0E32..U+0E33 U+0E40..U+0E45 U+0E81..U+0E82
1624     U+0E84 U+0E87..U+0E88 U+0E8A U+0E8D
1625     U+0E94..U+0E97 U+0E99..U+0E9F U+0EA1..U+0EA3
1626     U+0EA5 U+0EA7 U+0EAA..U+0EAB U+0EAD..U+0EAE
1627     U+0EB0 U+0EB2..U+0EB3 U+0EBD U+0EC0..U+0EC4
1628     U+0F40..U+0F47 U+0F49..U+0F69 U+10A0..U+10C5
1629     U+10D0..U+10F6 U+1100 U+1102..U+1103
1630     U+1105..U+1107 U+1109 U+110B..U+110C
1631     U+110E..U+1112 U+113C U+113E U+1140 U+114C
1632     U+114E U+1150 U+1154..U+1155 U+1159
1633     U+115F..U+1161 U+1163 U+1165 U+1167 U+1169
1634     U+116D..U+116E U+1172..U+1173 U+1175 U+119E
1635     U+11A8 U+11AB U+11AE..U+11AF U+11B7..U+11B8
1636     U+11BA U+11BC..U+11C2 U+11EB U+11F0 U+11F9
1637     U+1E00..U+1E9B U+1EA0..U+1EF9 U+1F00..U+1F15
1638     U+1F18..U+1F1D U+1F20..U+1F45 U+1F48..U+1F4D
1639     U+1F50..U+1F57 U+1F59 U+1F5B U+1F5D
1640     U+1F5F..U+1F7D U+1F80..U+1FB4 U+1FB6..U+1FBC
1641     U+1FBE U+1FC2..U+1FC4 U+1FC6..U+1FCC
1642     U+1FD0..U+1FD3 U+1FD6..U+1FDB U+1FE0..U+1FEC
1643     U+1FF2..U+1FF4 U+1FF6..U+1FFC U+2126
1644     U+212A..U+212B U+212E U+2180..U+2182
1645     U+3041..U+3094 U+30A1..U+30FA U+3105..U+312C
1646     U+AC00..U+D7A3
1647     // Ideographic
1648     U+4E00..U+9FA5 U+3007 U+3021..U+3029
1649     // Digit
1650     U+0030..U+0039 U+0660..U+0669 U+06F0..U+06F9
1651     U+0966..U+096F U+09E6..U+09EF U+0A66..U+0A6F
1652     U+0AE6..U+0AEF U+0B66..U+0B6F U+0BE7..U+0BEF
1653     U+0C66..U+0C6F U+0CE6..U+0CEF U+0D66..U+0D6F
1654     U+0E50..U+0E59 U+0ED0..U+0ED9 U+0F20..U+0F29
1655     // CombiningChar
1656     U+0300..U+0345 U+0360..U+0361 U+0483..U+0486
1657     U+0591..U+05A1 U+05A3..U+05B9 U+05BB..U+05BD
1658     U+05BF U+05C1..U+05C2 U+05C4 U+064B..U+0652
1659     U+0670 U+06D6..U+06DC U+06DD..U+06DF
1660     U+06E0..U+06E4 U+06E7..U+06E8 U+06EA..U+06ED
1661     U+0901..U+0903 U+093C U+093E..U+094C U+094D
1662     U+0951..U+0954 U+0962..U+0963 U+0981..U+0983
1663     U+09BC U+09BE U+09BF U+09C0..U+09C4
1664     U+09C7..U+09C8 U+09CB..U+09CD U+09D7
1665     U+09E2..U+09E3 U+0A02 U+0A3C U+0A3E U+0A3F
1666     U+0A40..U+0A42 U+0A47..U+0A48 U+0A4B..U+0A4D
1667     U+0A70..U+0A71 U+0A81..U+0A83 U+0ABC
1668     U+0ABE..U+0AC5 U+0AC7..U+0AC9 U+0ACB..U+0ACD
1669     U+0B01..U+0B03 U+0B3C U+0B3E..U+0B43
1670     U+0B47..U+0B48 U+0B4B..U+0B4D U+0B56..U+0B57
1671     U+0B82..U+0B83 U+0BBE..U+0BC2 U+0BC6..U+0BC8
1672     U+0BCA..U+0BCD U+0BD7 U+0C01..U+0C03
1673     U+0C3E..U+0C44 U+0C46..U+0C48 U+0C4A..U+0C4D
1674     U+0C55..U+0C56 U+0C82..U+0C83 U+0CBE..U+0CC4
1675     U+0CC6..U+0CC8 U+0CCA..U+0CCD U+0CD5..U+0CD6
1676     U+0D02..U+0D03 U+0D3E..U+0D43 U+0D46..U+0D48
1677     U+0D4A..U+0D4D U+0D57 U+0E31 U+0E34..U+0E3A
1678     U+0E47..U+0E4E U+0EB1 U+0EB4..U+0EB9
1679     U+0EBB..U+0EBC U+0EC8..U+0ECD U+0F18..U+0F19
1680     U+0F35 U+0F37 U+0F39 U+0F3E U+0F3F
1681     U+0F71..U+0F84 U+0F86..U+0F8B U+0F90..U+0F95
1682     U+0F97 U+0F99..U+0FAD U+0FB1..U+0FB7 U+0FB9
1683     U+20D0..U+20DC U+20E1 U+302A..U+302F U+3099
1684     U+309A
1685     // Extender
1686     U+00B7 U+02D0 U+02D1 U+0387 U+0640 U+0E46
1687     U+0EC6 U+3005 U+3031..U+3035 U+309D..U+309E
1688     U+30FC..U+30FE
1689     ];
1690    
1691     $NameStartChar11 := [
1692     ':' '_'
1693     'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M'
1694     'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z'
1695     'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm'
1696     'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z'
1697     U+00C0..U+00D6 U+00D8..U+00F6 U+00F8..U+02FF
1698     U+0370..U+037D U+037F..U+1FFF U+200C..U+200D
1699     U+2070..U+218F U+2C00..U+2FEF U+3001..U+D7FF
1700     U+F900..U+FDCF U+FDF0..U+FFFD U+10000..U+EFFFF
1701     ];
1702     $NameChar11 := [
1703     '-' '.' '0' '1' '2' '3' '4' '5' '6' '7' '8' '9'
1704     U+00B7 U+0300..U+036F U+203F..U+2040
1705     // NameStartChar
1706     ':' '_'
1707     'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M'
1708     'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y' 'Z'
1709     'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm'
1710     'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z'
1711     U+00C0..U+00D6 U+00D8..U+00F6 U+00F8..U+02FF
1712     U+0370..U+037D U+037F..U+1FFF U+200C..U+200D
1713     U+2070..U+218F U+2C00..U+2FEF U+3001..U+D7FF
1714     U+F900..U+FDCF U+FDF0..U+FFFD U+10000..U+EFFFF
1715     ];
1716     Name : value := $NameStartChar11 $NameChar11*;
1717     } // Name
1718    
1719     /*
1720     Space
1721     */
1722     lexmode S {
1723     S := [U+0009 U+000A U+000D U+0020]+;
1724     } // S
1725    
1726     /*
1727     Document end scanning mode
1728     */
1729     lexmode DocumentEnd
1730     : standalone
1731     : extends => 'S'
1732     {
1733     /*
1734     Processing instruction
1735     */
1736     PIO := ['<'] ['?'];
1737    
1738     /*
1739     Comment declaration
1740     */
1741     CDO := ['<'] ['!'] ['-'] ['-'];
1742     } // DocumentEnd
1743    
1744     /*
1745     Document misc scanning mode
1746    
1747     This mode scans |Misc| constructions as well
1748     as document element's start tag.
1749     */
1750     lexmode DocumentMisc
1751     : standalone
1752     : extends => 'DocumentEnd'
1753     {
1754     /*
1755     Document element start tag
1756     */
1757     STAGO := ['<'];
1758     } // DocumentMisc
1759    
1760     /*
1761     Document prolog scanning mode
1762     */
1763     lexmode DocumentProlog
1764     : standalone
1765     : extends => 'DocumentMisc'
1766     {
1767     /*
1768     |DOCTYPE| declaration
1769     */
1770     MDO := ['<'] ['!'];
1771     } // DocumentProlog
1772    
1773     /*
1774     Document start scanning mode
1775     */
1776     lexmode DocumentStart
1777     : initial
1778     : standalone
1779     : extends => 'DocumentProlog'
1780     {
1781     /*
1782     XML declaration
1783     */
1784     XDO := ['<'] ['?'] ['x'] ['m'] ['l'];
1785     } // DocumentStart
1786    
1787     /*
1788     Markup declaration scanning mode
1789    
1790     This mode is used to recognize |MDC| that terminates
1791     a comment declaration as well as the base |lexmode|
1792     for e.g. document type declaration scanning mode.
1793     */
1794     lexmode MarkupDeclaration
1795     : standalone
1796     : extends => 'Name'
1797     {
1798     /*
1799     Markup declaration close
1800     */
1801     MDC := ['>'];
1802    
1803     /*
1804     Literal open
1805     */
1806     LIT := ['"'];
1807    
1808     /*
1809     Alternative literal open
1810     */
1811     LITA := [U+0027];
1812     } // MarkupDeclaration
1813    
1814     lexmode DocumentTypeDeclaration
1815     : standalone
1816     : extends => 'MarkupDeclaration'
1817     {
1818     /*
1819     Declaration subset close
1820     */
1821     DSO := ['['];
1822    
1823     /*
1824     Declaration subset close
1825     */
1826     DSC := [']'];
1827     } // DocumentTypeDeclaration
1828    
1829     /*
1830     Comment declaration scanning mode
1831     */
1832     lexmode CommentDeclaration
1833     : standalone
1834     {
1835     /*
1836     Comment close
1837     */
1838     COM := ['-'] ['-'];
1839    
1840     /*
1841     Comment data
1842     */
1843     $string := ['-']? [^'-'];
1844     STRING : value := $string+;
1845     } // CommentDeclaration
1846    
1847     /*
1848     Processing instruction name and |S| scanning mode
1849     */
1850     lexmode PIName
1851     : standalone
1852     : extends => 'Name'
1853     : extends => 'S'
1854     {
1855     /*
1856     Processing instruction close
1857     */
1858     PIC := ['?'] ['>'];
1859     } // PIName
1860    
1861     /*
1862     Processing instruction data scanning mode
1863     */
1864     lexmode PIData
1865     : standalone
1866     {
1867     /*
1868     Processing instruction close
1869     */
1870     PIC := ['?'] ['>'];
1871    
1872     /*
1873     Processing instruction target data
1874     */
1875 wakaba 1.2 ?default-token DATA : value;
1876 wakaba 1.1 } // PIData
1877    
1878     /*
1879     Content of element scanning mode
1880     */
1881     lexmode ElementContent
1882     : standalone
1883     {
1884     /*
1885     Start tag open
1886     */
1887     STAGO := ['<'];
1888    
1889     /*
1890     End tag open
1891     */
1892     ETAGO := ['<'] ['/'];
1893    
1894     /*
1895     Hexadecimal character reference open
1896     */
1897     HCRO := ['&'] ['#'] ['x'];
1898    
1899     /*
1900     Numeric character reference open
1901     */
1902     CRO := ['&'] ['#'];
1903    
1904     /*
1905     General entity reference open
1906     */
1907     ERO := ['&'];
1908    
1909     /*
1910     Comment declaration open
1911     */
1912     CDO := ['<'] ['!'] ['-'] ['-'];
1913    
1914     /*
1915     CDATA section open
1916     */
1917     CDSO := ['<'] ['!'] ['[']
1918     ['C'] ['D'] ['A'] ['T'] ['A'] ['['];
1919    
1920     /*
1921     Processing instruction open
1922     */
1923     PIO := ['<'] ['?'];
1924 wakaba 1.2
1925     /*
1926     Markup section end
1927     */
1928     MSE := [']'] [']'] ['>'];
1929    
1930     /*
1931     Character data
1932     */
1933     /*
1934     Character data and/or |MSE|
1935     */
1936     ?default-token CharData : value;
1937 wakaba 1.1 } // ElementContent
1938    
1939     /*
1940     CDATA section content scanning mode
1941     */
1942     lexmode CDATASectionContent
1943     : standalone
1944     {
1945     /*
1946     Markup section end
1947     */
1948     MSE := [']'] [']'] ['>'];
1949    
1950     /*
1951     Character data
1952 wakaba 1.2 */
1953     ?default-token CData : value;
1954 wakaba 1.1 } // CDATASectionContent
1955    
1956     lexmode EntityReference
1957     : standalone
1958     : extends => 'Name'
1959     {
1960     /*
1961     Reference close
1962     */
1963     REFC := [';'];
1964     } // EntityReference
1965    
1966     lexmode NumericCharacterReference
1967     : standalone
1968     {
1969     /*
1970     Decimal number
1971     */
1972     $digit := ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9'];
1973     NUMBER : value := $digit+;
1974    
1975     /*
1976     Reference close
1977     */
1978     REFC := [';'];
1979     } // NumericCharacterReference
1980    
1981     lexmode HexadecimalCharacterReference
1982     : standalone
1983     {
1984     /*
1985     Hexadecimal number
1986     */
1987     $hexdigit := ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9'
1988     'A' 'B' 'C' 'D' 'E' 'F'
1989     'a' 'b' 'c' 'd' 'e' 'f'];
1990     Hex : value := $hexdigit+;
1991    
1992     /*
1993     Reference close
1994     */
1995     REFC := [';'];
1996     } // HexadecimalCharacterReference
1997    
1998 wakaba 1.3 lexmode XMLDeclaration
1999     : standalone
2000     : extends => 'Name'
2001     : extends => 'S'
2002     {
2003    
2004     /*
2005     Value indicator
2006     */
2007     VI := ['='];
2008    
2009     /*
2010     Literal open
2011     */
2012     LIT := ['"'];
2013     LITA := [U+0027];
2014    
2015     /*
2016     Processing instruction close
2017     */
2018     PIC := ['?'] ['>'];
2019     } // XMLDeclaration
2020    
2021 wakaba 1.1 lexmode StartTag
2022     : standalone
2023     : extends => 'Name'
2024     : extends => 'S'
2025     {
2026    
2027     /*
2028     Value indicator
2029     */
2030     VI := ['='];
2031    
2032     /*
2033     Literal open
2034     */
2035     LIT := ['"'];
2036     LITA := [U+0027];
2037    
2038     /*
2039     Tag close
2040     */
2041     TAGC := ['>'];
2042    
2043     /*
2044     Empty element tag close
2045     */
2046     MTAGC := ['/'] ['>'];
2047     } // StartTag
2048    
2049     lexmode EndTag
2050     : standalone
2051     : extends => 'Name'
2052     : extends => 'S'
2053     {
2054     /*
2055     Tag close
2056     */
2057     TAGC := ['>'];
2058     } // EndTag
2059    
2060     lexmode AttributeValueLiteral_ {
2061     ERO := ['&'];
2062     CRO := ['&'] ['#'];
2063     HCRO := ['&'] ['#'] ['x'];
2064     } // AttributeValueLiteral_
2065    
2066     lexmode AttributeValueLiteral
2067     : standalone
2068     : extends => 'AttributeValueLiteral_'
2069     {
2070     LIT := ['"'];
2071 wakaba 1.3 STRING : value := [^'"' '&' '<']+;
2072 wakaba 1.1 } // AttributeValueLiteral
2073    
2074     lexmode AttributeValueLiteralA
2075     : standalone
2076     : extends => 'AttributeValueLiteral_'
2077     {
2078     LIT := [U+0027];
2079 wakaba 1.3 STRING : value := [^U+0027 '&' '<']+;
2080 wakaba 1.1 } // AttributeValueLiteralA
2081    
2082     token-error default : default {
2083     lang:Perl {
2084 wakaba 1.3 my $location;
2085     __CODE{xp|get-location-from-token::
2086     $token => {$token},
2087     $result => {$location},
2088     }__;
2089     my $continue = __DOMCore:ERROR{xp|wf-syntax-error::
2090 wakaba 1.1 xp|error-token => {$token},
2091     DOMCore|location => {$location},
2092     }__;
2093     unless ($continue) {
2094     __EXCEPTION{DOMLS|PARSE_ERR::
2095     }__;
2096     }
2097     $self->{has_error} = true;
2098     }
2099     } // default
2100     ##ManakaiXMLParser
2101    
2102 wakaba 1.3 ResourceDef:
2103     @QName: xp|get-location-from-token
2104     @rdf:type: DISPerl|BlockCode
2105     @enDesc:
2106     Creates a <IF::DOMCore:DOMLocator> object from a token.
2107     @PerlDef:
2108     $result = {
2109     utf32_offset => pos ($self->{source}),
2110     };
2111     @For: ManakaiDOM|ManakaiDOM3
2112 wakaba 1.1
2113     ElementTypeBinding:
2114     @Name: RuleDef
2115     @ElementType:
2116     dis:ResourceDef
2117     @ShadowContent:
2118     @@ForCheck: ManakaiDOM|ForClass
2119     @@rdf:type: Muf2003|RuleDefClass
2120    
2121     ElementTypeBinding:
2122     @Name: RuleParam
2123     @ElementType:
2124     dis:ResourceDef
2125     @ShadowContent:
2126     @@rdf:type: Muf2003|RuleParameter
2127    
2128     ElementTypeBinding:
2129     @Name: enImplNote
2130     @ElementType:
2131     dis:ImplNote
2132     @ShadowContent:
2133     @@lang:en
2134    
2135     ElementTypeBinding:
2136     @Name: ErrDef
2137     @ElementType:
2138     dis:ResourceDef
2139     @ShadowContent:
2140     @@rdf:type: DOMCore|DOMErrorType
2141     @@For: ManakaiDOM|DOM3
2142     @@ecore:textFormatter: ManakaiXMLParserExceptionFormatter
2143    
2144 wakaba 1.3 ElementTypeBinding:
2145     @Name: WFErrDef
2146     @ElementType:
2147     dis:ResourceDef
2148     @ShadowContent:
2149     @@rdf:type: DOMCore|DOMErrorType
2150     @@For: ManakaiDOM|DOM3
2151     @@ecore:textFormatter: ManakaiXMLParserExceptionFormatter
2152    
2153     WFErrDef:
2154     @QName: xp|wf-syntax-error
2155     @enDesc:
2156     The entity does not match to the production rule; it is not
2157     well-formed.
2158     @DOMCore:severity: DOMCore|SEVERITY_FATAL_ERROR
2159     @enMufDef:
2160     |%xp-error-token-type;|%xp-error-token-value
2161     (prefix => { (|}, suffix => {|)}); is not
2162     allowed%xp-error-lines (prefix => { (|}, suffix => {|)});
2163     @ecore:hasParameter:
2164     @@@: xp|error-token
2165     @@enDesc:
2166     The token that is not allowed.
2167    
2168     WFErrDef:
2169     @QName: xp|wf-pi-target-is-xml
2170     @enDesc:
2171     A processing instruction has its <CODE::PITarget> of
2172     <XML::xml> (in any case) which is not allowed.
2173     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2174     @enMufDef:
2175     Processing instruction target name cannot be |%p
2176     (name => {<Q::xp|name>});|
2177     @ecore:hasParameter:
2178     @@@: xp|error-token
2179     @@enDesc:
2180     The token that contains the name.
2181     @ecore:hasParameter:
2182     @@@: xp|name
2183     @@enDesc:
2184     A string that is specified as target name of the
2185     processing instruction.
2186     @ecore:hasParameter: xp|parent
2187    
2188     WFErrDef:
2189     @QName: xp|wf-no-end-tag
2190     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2191     @enDesc:
2192     An end-tag is not found.
2193     @enMufDef:
2194     End-tag |</%p (name => {<Q::xp|expected-element-type>});>| is required
2195     @ecore:hasParameter: xp|error-token
2196     @ecore:hasParameter:
2197     @@@: xp|node
2198     @@enDesc:
2199     The element node that is not closed.
2200     @ecore:hasParameter:
2201     @@@: xp|expected-element-type
2202     @@enDesc:
2203     The element type name of the element that is not closed.
2204    
2205     WFErrDef:
2206     @QName: xp|wf-unsupported-xml-version
2207     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2208     @enDesc:
2209     The XML version specified in the version declaration is not supported.
2210     @enMufDef:
2211     XML version |%p (name => {<Q::infoset|version>});| is not supported
2212     @ecore:hasParameter: xp|bad-token
2213     @ecore:hasParameter:
2214     @@@: xp|parent
2215     @@enDesc:
2216     The document node.
2217     @ecore:hasParameter:
2218     @@@: infoset|version
2219     @@enDesc:
2220     The specified XML version.
2221    
2222     WFErrDef:
2223     @QName: xp|wf-malformed-enc-name
2224     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2225     @enDesc:
2226     An <XA::encoding> pseudo-attribute value does not match
2227     to the procduction rule <CODE::EncName>.
2228     @enMufDef:
2229     Encoding name |%p (name => {<Q::xp|name>});| is not allowed
2230     @ecore:hasParameter: xp|error-token
2231     @ecore:hasParameter:
2232     @@@: xp|parent
2233     @@enDesc: The document node.
2234     @ecore:hasParameter:
2235     @@@: xp|name
2236     @@enDesc:
2237     The <XA::encoding> value.
2238    
2239     WFErrDef:
2240     @QName: xp|wf-malformed-xml-standalone
2241     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2242     @enDesc:
2243     An <XA::standalone> pseudo-attribute value is neither <XML::yes>
2244     or <XML::no>.
2245     @enMufDef:
2246     |standalone| pseudo-attribute value |%p (name => {<Q::xp|name>});|
2247     is not allowed
2248     @ecore:hasParameter: xp|error-token
2249     @ecore:hasParameter:
2250     @@@: xp|parent
2251     @@enDesc: The document node.
2252     @ecore:hasParameter:
2253     @@@: xp|name
2254     @@enDesc:
2255     The <XA::standalone> value.
2256    
2257     WFErrDef:
2258     @QName: xp|wf-legal-literal-character
2259     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2260     @enDesc:
2261     Each character in XML entity must match to the production
2262     rule <CODE::Char - RestrictedChar>.
2263     @enMufDef:
2264     Character %character-code-point
2265     (v => {<Q::xp|character-number>}); is not allowed
2266     @ecore:hasParameter:
2267     @@@: xp|character-number
2268     @@enDesc:
2269     The code position of the character being referred.
2270    
2271     WFErrDef:
2272     @QName: xp|wf-element-type-match
2273     @DOMCore:severity: DOMCore|SEVERITY_FATAL_ERROR
2274 wakaba 1.1 @enDesc:
2275 wakaba 1.3 The <CODE::Name> in an element's end-tag must match the element type
2276     in the start-tag.
2277     @enMufDef:
2278     End-tag |</%p (name => {<Q::xp|actual-element-type>});>| does
2279     not match to start-tag |<%p (name => {<Q::xp|expected-element-type>});>|
2280     @ecore:hasParameter: xp|error-token
2281     @ecore:hasParameter:
2282     @@@: xp|node
2283     @@enDesc:
2284     The current opening element node.
2285     @ecore:hasParameter:
2286     @@@: xp|expected-element-type
2287     @@enDesc:
2288     The element type name of the current element.
2289     @ecore:hasParameter:
2290     @@@: xp|actual-element-type
2291     @@enDesc:
2292     The <CODE::Name> occurs in the end-tag.
2293    
2294     WFErrDef:
2295     @QName: xp|wf-unique-att-spec
2296 wakaba 1.1 @DOMCore:severity: DOMCore|SEVERITY_ERROR
2297 wakaba 1.3 @enDesc:
2298     An attribute name <kwd:MUST-NOT> appear more than once in
2299     the same start-tag or empty-element tag.
2300 wakaba 1.1 @enMufDef:
2301 wakaba 1.3 Attribute |%p (name => {<Q::xp|name>});| is specified more
2302     than once in the same tag
2303     @ecore:hasParameter: xp|error-token
2304     @ecore:hasParameter:
2305     @@@: xp|name
2306     @@enDesc:
2307     The name of the attribute.
2308    
2309     WFErrDef:
2310     @QName: xp|wf-legal-character
2311     @DOMCore:severity: DOMCore|SEVERITY_ERROR
2312     @enDesc:
2313     Characters referred to using character references <kwd:MUST>
2314     match the production for <CODE::Char>.
2315     @enMufDef:
2316     Reference to character %character-code-point
2317     (v => {<Q::xp|character-number>}); is not allowed
2318     @ecore:hasParameter: xp|error-token
2319     @ecore:hasParameter:
2320     @@@: xp|character-number
2321     @@enDesc:
2322     The code position of the character being referred.
2323     @ecore:hasParameter:
2324     @@@: xp|parent
2325     @@enDesc:
2326     The parent node in which the character reference has
2327     occurred, if available.
2328 wakaba 1.1
2329 wakaba 1.3 XWParam:
2330 wakaba 1.1 @QName: xp|error-token
2331     @enDesc:
2332     The token where the parser found an error.
2333    
2334 wakaba 1.3 XWParam:
2335     @QName: xp|name
2336     @enDesc:
2337     A name.
2338    
2339     XWParam:
2340     @QName: xp|parent
2341     @enDesc:
2342     The parent node in which the error occurs.
2343    
2344     XWParam:
2345     @QName: xp|node
2346     @enDesc:
2347     The current node.
2348    
2349     XWParam:
2350     @QName: xp|actual-element-type
2351     @enDesc:
2352     The actual element type name occured in the source.
2353    
2354     XWParam:
2355     @QName: xp|expected-element-type
2356 wakaba 1.1 @enDesc:
2357 wakaba 1.3 The element type name expected.
2358    
2359     XWParam:
2360     @QName: xp|character-number
2361     @enDesc:
2362     The character code position.
2363    
2364     ElementTypeBinding:
2365     @Name: XWParam
2366     @ElementType:
2367     dis:ResourceDef
2368     @ShadowContent:
2369     @@For: =ManakaiDOM|all
2370     @@rdf:type: ecore|Parameter
2371 wakaba 1.1
2372     ElementTypeBinding:
2373     @Name:enMufDef
2374     @ElementType:
2375     ecore:defaultMessage
2376     @ShadowContent:
2377     @@lang:en
2378     @@ContentType:
2379     lang:muf
2380    
2381     ResourceDef:
2382     @QName: DOMImpl
2383     @AliasFor: DOMCore|DOMImplementation
2384     @For: ManakaiDOM|DOM
2385    
2386     ElementTypeBinding:
2387     @Name: Attr
2388     @ElementType:
2389     dis:ResourceDef
2390     @ShadowContent:
2391     @@rdf:type: DISLang|Attribute
2392     @@ForCheck: !=ManakaiDOM|ManakaiDOM
2393    
2394     ElementTypeBinding:
2395     @Name: Get
2396     @ElementType:
2397     dis:ResourceDef
2398     @ShadowContent:
2399     @@rdf:type: DISLang|AttributeGet
2400    
2401     ElementTypeBinding:
2402     @Name: Set
2403     @ElementType:
2404     dis:ResourceDef
2405     @ShadowContent:
2406     @@rdf:type: DISLang|AttributeSet
2407    
2408     ElementTypeBinding:
2409     @Name: enDesc
2410     @ElementType:
2411     dis:Description
2412     @ShadowContent:
2413     @@lang:en
2414    
2415     ElementTypeBinding:
2416     @Name: Method
2417     @ElementType:
2418     dis:ResourceDef
2419     @ShadowContent:
2420     @@rdf:type: DISLang|Method
2421     @@For: !=ManakaiDOM|ManakaiDOM
2422    
2423     ElementTypeBinding:
2424     @Name: Return
2425     @ElementType:
2426     dis:ResourceDef
2427     @ShadowContent:
2428     @@rdf:type: DISLang|MethodReturn
2429    
2430     ElementTypeBinding:
2431     @Name: Param
2432     @ElementType:
2433     dis:ResourceDef
2434     @ShadowContent:
2435     @@rdf:type: DISLang|MethodParameter
2436    
2437     ElementTypeBinding:
2438     @Name: PerlDef
2439     @ElementType:
2440     dis:Def
2441     @ShadowContent:
2442     @@ContentType: lang|Perl
2443    
2444     ElementTypeBinding:
2445     @Name: PropDef
2446     @ElementType:
2447     dis:ResourceDef
2448     @ShadowContent:
2449     @@rdf:type: rdf|Property
2450    
2451     ClsDef:
2452     @ClsQName: ManakaiXMLParserExceptionFormatter
2453    
2454     @ClsISA: ecore|MUErrorFormatter||ManakaiDOM|Perl
2455    
2456     @RuleDef:
2457     @@Name: xp-error-token-type
2458     @@enDesc:
2459     The type of the token the parser is encountered.
2460    
2461     @@Method:
2462     @@@Name: after
2463     @@@Param:
2464     @@@@Name: name
2465     @@@@Type: DOMString
2466     @@@@enDesc: The name of the method.
2467     @@@Param:
2468     @@@@Name: p
2469     @@@@Type: DISPerl|HASH
2470     @@@@enDesc: The set of the parameters to the method.
2471     @@@Param:
2472     @@@@Name: o
2473     @@@@Type: DISPerl|HASH
2474     @@@@enDesc: The option value.
2475     @@@Return:
2476     @@@@PerlDef:
2477     $p->{-result} = $o->{<H::xp|error-token>}->{type}
2478     if defined $o->{<H::xp|error-token>}->{type};
2479    
2480     @RuleDef:
2481     @@Name: xp-error-token-value
2482     @@enDesc:
2483     The value of the token the parser is encountered, if any.
2484    
2485     @@Method:
2486     @@@Name: after
2487     @@@Param:
2488     @@@@Name: name
2489     @@@@Type: DOMString
2490     @@@@enDesc: The name of the method.
2491     @@@Param:
2492     @@@@Name: p
2493     @@@@Type: DISPerl|HASH
2494     @@@@enDesc: The set of the parameters to the method.
2495     @@@Param:
2496     @@@@Name: o
2497     @@@@Type: DISPerl|HASH
2498     @@@@enDesc: The option value.
2499     @@@Return:
2500     @@@@PerlDef:
2501     $p->{-result} = $o->{<H::xp|error-token>}->{value}
2502     if defined $o->{<H::xp|error-token>}->{value};
2503    
2504     @RuleDef:
2505     @@Name: xp-error-lines
2506     @@enDesc:
2507     A copy of fragment of the source text that contains the line
2508     where the error occurred, if available.
2509    
2510     @@Method:
2511     @@@Name: after
2512     @@@Param:
2513     @@@@Name: name
2514     @@@@Type: DOMString
2515     @@@@enDesc: The name of the method.
2516     @@@Param:
2517     @@@@Name: p
2518     @@@@Type: DISPerl|HASH
2519     @@@@enDesc: The set of the parameters to the method.
2520     @@@Param:
2521     @@@@Name: o
2522     @@@@Type: DISPerl|HASH
2523     @@@@enDesc: The option value.
2524     @@@Return:
2525     @@@@PerlDef:
2526     my $pos = $o-><AG::DOMCore|DOMError.location>
2527     -><AG::DOMCore|DOMLocator.utf32Offset>;
2528     if ($pos > -1) {
2529 wakaba 1.3 my $src = \($o->{<H::ecore|object>}->{source});
2530 wakaba 1.1 my $start = $pos;
2531     $start = rindex ($$src, "\x0A", $start - 1) for 0..2;
2532     $start++;
2533     my $end = $pos;
2534     $end = index ($$src, "\x0A", $end + 1) for 0..2;
2535     $end = length $$src if $end < 0;
2536     $p->{-result} = substr $$src, $start, $end - $start;
2537     }
2538 wakaba 1.3
2539     @RuleDef:
2540     @@Name: character-code-point
2541     @@enDesc:
2542     The character code position, in <CODE::U+<VAR::HHHH>> notation.
2543    
2544     @@Method:
2545     @@@Name: after
2546     @@@Param:
2547     @@@@Name: name
2548     @@@@Type: DOMString
2549     @@@@enDesc: The name of the method.
2550     @@@Param:
2551     @@@@Name: p
2552     @@@@Type: DISPerl|HASH
2553     @@@@enDesc: The set of the parameters to the method.
2554     @@@Param:
2555     @@@@Name: o
2556     @@@@Type: DISPerl|HASH
2557     @@@@enDesc: The option value.
2558     @@@RuleParam:
2559     @@@@Name: v
2560     @@@@Type: DISPerl|Number
2561     @@@@enDesc:
2562     The name of the error parameter that contains the character code.
2563     @@@Return:
2564     @@@@PerlDef:
2565     $p->{-result} = sprintf 'U+%04X', $o->{$p->{v}};
2566 wakaba 1.1 ##XMLParserExceptionFormatter

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24