/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations) (download) (as text)
Fri Apr 25 13:42:51 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.5: +103 -14 lines
File MIME type: text/html
<script src> in parsing algorithm is now supported

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4     <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5     <style>
6     textarea {
7     display: block;
8     width: 80%;
9     margin-left: auto;
10     margin-right: auto;
11     min-height: 20em;
12     }
13     output {
14     display: block;
15     font-family: monospace;
16 wakaba 1.4 white-space: -moz-pre-wrap;
17     white-space: pre-wrap;
18 wakaba 1.1 }
19     </style>
20     <script>
21     function update () {
22     document.logElement.textContent = '';
23 wakaba 1.2 var p = new Parser (new InputStream (document.sourceElement.value));
24 wakaba 1.4 var doc = p.doc;
25 wakaba 1.2 p.parse ();
26 wakaba 1.4 log (dumpTree (doc, ''));
27 wakaba 1.1 } // update
28    
29 wakaba 1.6 var logIndentLevel = 0;
30 wakaba 1.1 function log (s) {
31 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
32     s = ' ' + s;
33     }
34 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
35     } // log
36    
37     function InputStream (s) {
38     this.s = s;
39     } // InputStream
40    
41 wakaba 1.4 function Parser (i, doc) {
42 wakaba 1.1 this.parseMode = 'pcdata';
43 wakaba 1.4 if (!doc) {
44     doc = new JSDocument (this);
45     doc.manakaiIsHTML = true;
46     }
47     this.doc = doc;
48     this.openElements = [doc];
49 wakaba 1.2 this.in = i;
50 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
51 wakaba 1.1 } // Parser
52    
53 wakaba 1.2 Parser.prototype.getNextToken = function () {
54 wakaba 1.3 var p = this;
55 wakaba 1.2 var i = this.in;
56 wakaba 1.1 if (this.parseMode == 'script') {
57     var token;
58 wakaba 1.3 if (p.insertionPoint <= 0) {
59     return {type: 'abort'};
60     }
61 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
62 wakaba 1.1 function (s, t) {
63 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
64     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
65     var ip = p.insertionPoint;
66     p.insertionPoint = 0;
67 wakaba 1.4 return t.substring (ip, t.length);
68 wakaba 1.3 }
69 wakaba 1.1 token = {type: 'char', value: t};
70 wakaba 1.4 p.insertionPoint -= t.length;
71     return '';
72 wakaba 1.1 });
73     if (token) return token;
74 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
75 wakaba 1.4 if (p.insertionPoint < s.length) {
76 wakaba 1.3 token = {type: 'abort'};
77     return s;
78     }
79 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
80 wakaba 1.3 p.insertionPoint -= s.length;
81 wakaba 1.1 return '';
82     });
83     if (token) return token;
84 wakaba 1.5 var m;
85     if ((p.insertionPoint < '</script'.length) &&
86     (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
87     var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
88     if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
89     return {type: 'abort'};
90     }
91     }
92 wakaba 1.4 i.s = i.s.replace (/^</,
93     function (s) {
94     token = {type: 'char', value: s};
95     p.insertionPoint -= s.length;
96     return '';
97     });
98     if (token) return token;
99 wakaba 1.1 return {type: 'eof'};
100     }
101    
102     var token;
103 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
104     if (p.insertionPoint < s.length ||
105     (p.insertionPoint <= s.length &&
106     s.substring (s.length - 1, 1) != '>')) {
107 wakaba 1.3 token = {type: 'abort'};
108     return s;
109     }
110 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
111 wakaba 1.3 p.insertionPoint -= s.length;
112 wakaba 1.1 return '';
113     });
114     if (token) return token;
115 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
116     if (p.insertionPoint < s.length ||
117     (p.insertionPoint <= s.length &&
118     s.substring (s.length - 1, 1) != '>')) {
119 wakaba 1.3 token = {type: 'abort'};
120     return s;
121     }
122 wakaba 1.4 var tagName;
123     var attrs = {};
124     e = e.replace (/^[\S]+/, function (v) {
125     tagName = v.toLowerCase ();
126     return '';
127     });
128 wakaba 1.6 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,
129 wakaba 1.4 function (x, attrName, attrValue1, attrValue2, attrValue3) {
130 wakaba 1.6 v = attrValue1 || attrValue2 || attrValue3;
131     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
132     .replace (/&amp;/g, '&');
133     attrs[attrName.toLowerCase ()] = v;
134 wakaba 1.4 return '';
135     });
136 wakaba 1.6 if (e.length) {
137     log ('Broken start tag: "' + e + '"');
138     }
139 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
140 wakaba 1.3 p.insertionPoint -= s.length;
141 wakaba 1.1 return '';
142     });
143     if (token) return token;
144 wakaba 1.3 if (p.insertionPoint <= 0) {
145     return {type: 'abort'};
146     }
147 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
148 wakaba 1.3 if (p.insertionPoint < s.length) {
149     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
150     var ip = p.insertionPoint;
151     p.insertionPoint = 0;
152     return s.substring (ip, s.length);
153     }
154 wakaba 1.1 token = {type: 'char', value: s};
155 wakaba 1.3 p.insertionPoint -= s.length;
156 wakaba 1.1 return '';
157     });
158     if (token) return token;
159     i.s = i.s.replace (/^[\s\S]/, function (s) {
160     token = {type: 'char', value: s};
161 wakaba 1.3 p.insertionPoint -= s.length;
162 wakaba 1.1 return '';
163     });
164     if (token) return token;
165     return {type: 'eof'};
166     } // getNextToken
167    
168 wakaba 1.2 Parser.prototype.parse = function () {
169 wakaba 1.6 logIndentLevel++;
170     log ('parse: start');
171 wakaba 1.1
172     while (true) {
173 wakaba 1.2 var token = this.getNextToken ();
174 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
175    
176     if (token.type == 'start-tag') {
177     if (token.value == 'script') {
178 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
179     var el = new JSElement (this.doc, token.value);
180 wakaba 1.4 if (token.attrs.async != null) el.async = true;
181     if (token.attrs.defer != null) el.defer = true;
182     if (token.attrs.src != null) el.src = token.attrs.src;
183 wakaba 1.2
184     // 2. Mark the element as being "parser-inserted".
185     el.manakaiParserInserted = true;
186    
187     // 3. Switch the tokeniser's content model flag to the CDATA state.
188 wakaba 1.1 this.parseMode = 'script';
189    
190 wakaba 1.2 // 4.1. Collect all the character tokens.
191 wakaba 1.1 while (true) {
192 wakaba 1.2 var token = this.getNextToken ();
193 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
194    
195     if (token.type == 'char') {
196 wakaba 1.2 // 5. Append a single Text node to the script element node.
197 wakaba 1.1 el.manakaiAppendText (token.value);
198 wakaba 1.2
199     // 4.2. Until it returns a token that is not a character token, or
200 wakaba 1.3 // until it stops tokenising.
201 wakaba 1.1 } else if (token.type == 'eof' ||
202 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
203     token.type == 'abort') {
204 wakaba 1.2 // 6. Switched back to the PCDATA state.
205 wakaba 1.1 this.parseMode = 'pcdata';
206 wakaba 1.2
207     // 7.1. If the next token is not an end tag token with ...
208     if (token.type != 'end-tag') {
209     // 7.2. This is a parse error.
210     log ('Parse error: no </' + 'script>');
211    
212     // 7.3. Mark the script element as "already executed".
213     el.manakaiAlreadyExecuted = true;
214     } else {
215     // 7.4. Ignore it.
216     //
217     }
218 wakaba 1.1 break;
219     }
220     }
221    
222 wakaba 1.2 // 8.1. If the parser were originally created for the ...
223     if (this.fragmentParsingMode) {
224     // 8.2. Mark the script element as "already executed" and ...
225     el.alreadyExecuted = true;
226     continue;
227     }
228    
229     // 9.1. Let the old insertion point have the same value as the ...
230 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
231 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
232 wakaba 1.3 this.setInsertionPoint (0);
233 wakaba 1.2
234     // 10. Append the new element to the current node.
235 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
236 wakaba 1.2
237     // 11. Let the insertion point have the value of the old ...
238 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
239 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
240 wakaba 1.2
241     // 12. If there is a script that will execute as soon as ...
242 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
243     // 12.1. If the tree construction stage is being called reentrantly
244     if (this.reentrant) {
245     log ('parse: abort (reentrance)');
246     logIndentLevel--;
247     return;
248    
249     // 12.2. Otherwise
250     } else {
251     // 1.
252     var script = this.scriptExecutedWhenParserResumes;
253     this.scriptExecutedWhenParserResumes = null;
254    
255     // 2. Pause until the script has completed loading.
256     //
257    
258     // 3. Let the insertion point to just before the next input char.
259     this.setInsertionPoint (0);
260    
261     // 4. Execute the script.
262     executeScript (this.doc, script);
263    
264     // 5. Let the insertion point be undefined again.
265     this.setInsertionPoint (undefined);
266 wakaba 1.2
267 wakaba 1.6 // 6. If there is once again a script that will execute ...
268     //
269     }
270     }
271 wakaba 1.1 } else {
272 wakaba 1.2 var el = new JSElement (this.doc, token.value);
273 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
274     this.openElements.push (el);
275     }
276     } else if (token.type == 'end-tag') {
277     if (this.openElements[this.openElements.length - 1].localName ==
278     token.value) {
279     this.openElements.pop ();
280     } else {
281     log ('parse error: unmatched end tag: ' + token.value);
282     }
283 wakaba 1.3 } else if (token.type == 'char') {
284     this.openElements[this.openElements.length - 1].manakaiAppendText
285     (token.value);
286 wakaba 1.1 } else if (token.type == 'eof') {
287     break;
288 wakaba 1.3 } else if (token.type == 'abort') {
289     log ('parse: abort');
290 wakaba 1.6 logIndentLevel--;
291 wakaba 1.3 return;
292 wakaba 1.1 }
293     }
294    
295     log ('stop parsing');
296 wakaba 1.4
297     // readyState = 'interactive'
298    
299     // "When a script completes loading" rules start applying.
300    
301     // TODO: Handles "list of scripts that will execute as soon as possible"
302     // and "list of scripts that will execute asynchronously"
303    
304     // Handle "list of scripts that will execute when the document has finished
305     // parsing".
306     var list = this.scriptsExecutedAfterParsing;
307     while (list.length > 0) {
308     // TODO: break unless completed loading
309    
310     // Step 1.
311     //
312    
313     // Step 2. and Step 3.
314     log ('Executing a |defer|red script...');
315     executeScript (this.doc, list.shift ());
316    
317     // Step 4.
318     }
319    
320     log ('DOMContentLoaded event fired');
321    
322     // "delays tha load event" things has completed:
323     // readyState = 'complete'
324     log ('load event fired');
325 wakaba 1.6
326     logIndentLevel--;
327 wakaba 1.1 } // parse
328    
329 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
330     if (ip == undefined || ip == null || isNaN (ip)) {
331     log ('insertion point: set to undefined');
332     this.insertionPoint = undefined;
333 wakaba 1.4 } else if (ip == this.in.s.length) {
334     log ('insertion point: end of file');
335     this.insertionPoint = ip;
336 wakaba 1.3 } else {
337     log ('insertion point: set to ' + ip +
338     ' (before "' + this.in.s.substring (0, 10) + '")');
339     this.insertionPoint = ip;
340     }
341     }; // setInsertionPoint
342    
343 wakaba 1.2 function JSDocument (p) {
344 wakaba 1.1 this.childNodes = [];
345 wakaba 1.2 this._parser = p;
346 wakaba 1.1 } // JSDocument
347    
348 wakaba 1.2 function JSElement (doc, localName) {
349 wakaba 1.1 this.localName = localName;
350 wakaba 1.2 this.ownerDocument = doc;
351 wakaba 1.1 this.childNodes = [];
352     } // JSElement
353    
354     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
355     function (e) {
356     this.childNodes.push (e);
357     e.parentNode = this;
358 wakaba 1.2
359     if (e.localName == 'script') {
360 wakaba 1.6 logIndentLevel++;
361 wakaba 1.4 log ('Running a script: start');
362 wakaba 1.2
363 wakaba 1.3 var doc = this.ownerDocument || this;
364 wakaba 1.2 var p = doc._parser;
365    
366     // 1. Script type
367     //
368    
369     // 2.1. If scripting is disabled
370     //
371     // 2.2. If the script element was created by an XML ... innerHTML ...
372     //
373     // 2.3. If the user agent does not support the scripting language ...
374     //
375     // 2.4. If the script element has its "already executed" flag set
376     if (e.manakaiAlreadyExecuted) {
377     // 2.5. Abort these steps at this point.
378 wakaba 1.4 log ('Running a script: aborted');
379 wakaba 1.6 logIndentLevel--;
380 wakaba 1.2 return e;
381     }
382    
383     // 3. Set the element's "already executed" flag.
384     e.manakaiAlreadyExecuted = true;
385    
386     // 4. If the element has a src attribute, then a load for ...
387     // TODO: load an external resource
388    
389     // 5. The first of the following options:
390    
391     // 5.1.
392     if (/* TODO: If the document is still being parsed && */
393     e.defer && !e.async) {
394 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
395     log ('Running a script: aborted (defer)');
396 wakaba 1.2 } else if (e.async && e.src != null) {
397     // TODO
398     } else if (e.async && e.src == null
399     /* && list of scripts that will execute asynchronously is not empty */) {
400     // TODO
401     } else if (e.src != null && e.manakaiParserInserted) {
402 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
403     log ('Error: There is a script that will execute as soon as the parser resumes.');
404     }
405     p.scriptExecutedWhenParserResumes = e;
406     log ('Running a script: aborted (src)');
407 wakaba 1.2 } else if (e.src != null) {
408     // TODO
409     } else {
410     executeScript (doc, e); // even if other scripts are already executing.
411     }
412    
413 wakaba 1.4 log ('Running a script: end');
414 wakaba 1.6 logIndentLevel--;
415 wakaba 1.2 }
416    
417 wakaba 1.1 return e;
418     }; // appendChild
419    
420 wakaba 1.2 function executeScript (doc, e) {
421     log ('executing a script block: start');
422    
423 wakaba 1.6 var s;
424     if (e.src != null) {
425     s = getExternalScript (e.src);
426    
427     // If the load resulted in an error, then ... firing an error event ...
428     if (s == null) {
429     log ('error event fired at the script element');
430     return;
431     }
432    
433     log ('External script loaded: "' + s + '"');
434     } else {
435     s = e.text;
436     }
437 wakaba 1.2
438     // If the load was successful
439     log ('load event fired at the script element');
440    
441     if (true) {
442     // Scripting is enabled, Document.designMode is disabled,
443     // Document is the active document in its browsing context
444    
445     parseAndRunScript (doc, s);
446     }
447    
448     log ('executing a script block: end');
449     } // executeScript
450    
451 wakaba 1.6 function getExternalScript (uri) {
452     if (uri.match (/^javascript:/i)) {
453     var m;
454     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
455     if (m[1]) {
456     return m[1];
457     } else if (m[2]) {
458     return m[2];
459     } else {
460     return null;
461     }
462     } else {
463     log ('Complex javascript: URI is not supported: <' + uri + '>');
464     return null;
465     }
466     } else {
467     log ('URI scheme not supported: <' + uri + '>');
468     return null;
469     }
470     } // getExternalScript
471    
472 wakaba 1.2 function parseAndRunScript (doc, s) {
473     while (true) {
474     var matched = false;
475     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
476     matched = true;
477     var args = [];
478     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
479     args.push (v.substring (1, v.length - 1));
480     return '';
481     });
482     doc.write.apply (doc, args);
483     return '';
484     });
485     if (s == '') break;
486     if (!matched) {
487     log ('Script parse error: "' + s + '"');
488     break;
489     }
490     }
491     } // parseAndRunScript
492    
493 wakaba 1.1 function JSText (data) {
494     this.data = data;
495     } // JSText
496    
497     JSDocument.prototype.manakaiAppendText =
498     JSElement.prototype.manakaiAppendText =
499     function (s) {
500     if (this.childNodes.length > 0 &&
501     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
502     this.childNodes[this.childNodes.length - 1].data += s;
503     } else {
504     this.childNodes.push (new JSText (s));
505     }
506     }; // manakaiAppendText
507 wakaba 1.2
508 wakaba 1.4 JSDocument.prototype.open = function () {
509     // Two or fewer arguments
510    
511     // Step 1.
512     var type = arguments[0] || 'text/html';
513    
514     // Step 2.
515     var replace = arguments[1] == 'replace';
516    
517     // Step 3.
518     if (this._parser &&
519     !this._parser.scriptCreated &&
520     this._parser.in.insertionPoint != undefined) {
521     log ('document.open () in parsing mode is ignored');
522     return this;
523     }
524    
525     // Step 4.
526     log ('onbeforeunload event fired');
527     log ('onunload event fired');
528    
529     // Step 5.
530     if (this._parser) {
531     // Discard the parser.
532     }
533    
534     // Step 6.
535     log ('document cleared by document.open ()');
536     this.childNodes = [];
537    
538     // Step 7.
539     this._parser = new Parser (new InputStream (''), this);
540     this._parser.scriptCreated = true;
541    
542     // Step 8.
543     this.manakaiIsHTML = true;
544    
545     // Step 9.
546     // If not text/html, ...
547    
548     // Step 10.
549     if (!replace) {
550     // History
551     }
552    
553     // Step 11.
554     this._parser.setInsertionPoint (this._parser.in.s.length);
555    
556     // Step 12.
557     return this;
558     }; // document.open
559    
560 wakaba 1.2 JSDocument.prototype.write = function () {
561 wakaba 1.6 logIndentLevel++;
562    
563 wakaba 1.3 var p = this._parser;
564    
565 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
566 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
567     this.open ();
568     p = this._parser;
569 wakaba 1.3 }
570 wakaba 1.2
571     // 2. ... inserted into the input stream just before the insertion point.
572 wakaba 1.3 var s = Array.join (arguments, '');
573     log ('document.write: insert "' + s + '"' +
574     ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
575     p.in.s = p.in.s.substring (0, p.insertionPoint) + s
576     + p.in.s.substring (p.insertionPoint, p.in.s.length);
577     p.insertionPoint += s.length;
578 wakaba 1.2
579     // 3. If there is a script that will execute as soon as the parser resumes
580 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
581     log ('document.write: processed later (there is an unprocessed <script src>)');
582     logIndentLevel--;
583     return;
584     }
585 wakaba 1.2
586     // 4. Process the characters that were inserted, ...
587 wakaba 1.6 var originalReentrant = p.reentrant;
588     p.reentrant = true;
589 wakaba 1.3 p.parse ();
590 wakaba 1.6 p.reentrant = originalReentrant;
591     // TODO: "Abort the processing of any nested invokations of the tokeniser,
592     // yielding control back to the caller." (<script> parsing). Do we need
593     // to do something here?
594 wakaba 1.2
595     // 5. Return
596     log ('document.write: return');
597 wakaba 1.6
598     logIndentLevel--;
599 wakaba 1.2 return;
600     }; // document.write
601    
602     JSElement.prototype.__defineGetter__ ('text', function () {
603     var r = '';
604     for (var i = 0; i < this.childNodes.length; i++) {
605     if (this.childNodes[i] instanceof JSText) {
606     r += this.childNodes[i].data;
607     }
608     }
609     return r;
610     });
611 wakaba 1.1
612     function dumpTree (n, indent) {
613     var r = '';
614     for (var i = 0; i < n.childNodes.length; i++) {
615     var node = n.childNodes[i];
616     if (node instanceof JSElement) {
617     r += '| ' + indent + node.localName + '\n';
618 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
619     if (node.defer) r += '| ' + indent + ' defer=""\n';
620     if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
621 wakaba 1.1 r += dumpTree (node, indent + ' ');
622     } else if (node instanceof JSText) {
623     r += '| ' + indent + '"' + node.data + '"\n';
624     } else {
625     r += '| ' + indent + node + '\n';
626     }
627     }
628     return r;
629     } // dumpTree
630     </script>
631     </head>
632     <body onload="
633     document.sourceElement = document.getElementsByTagName ('textarea')[0];
634     document.logElement = document.getElementsByTagName ('output')[0];
635     update ();
636     ">
637    
638     <textarea onchange=" update () ">&lt;html>
639     &lt;head>&lt;/head>&lt;body>
640     &lt;p>
641     &lt;script>
642 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
643 wakaba 1.1 &lt;/script>
644     &lt;p>
645     </textarea>
646    
647     <output></output>
648    
649     </body>
650     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24