/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download) (as text)
Sun Apr 20 12:19:13 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +134 -18 lines
File MIME type: text/html
Attributes support in tokenization; stop parsing support; document.write in deferred script implemenmted; deferred script support

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4     <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5     <style>
6     textarea {
7     display: block;
8     width: 80%;
9     margin-left: auto;
10     margin-right: auto;
11     min-height: 20em;
12     }
13     output {
14     display: block;
15     font-family: monospace;
16 wakaba 1.4 white-space: -moz-pre-wrap;
17     white-space: pre-wrap;
18 wakaba 1.1 }
19     </style>
20     <script>
21     function update () {
22     document.logElement.textContent = '';
23 wakaba 1.2 var p = new Parser (new InputStream (document.sourceElement.value));
24 wakaba 1.4 var doc = p.doc;
25 wakaba 1.2 p.parse ();
26 wakaba 1.4 log (dumpTree (doc, ''));
27 wakaba 1.1 } // update
28    
29     function log (s) {
30     document.logElement.appendChild (document.createTextNode (s + "\n"));
31     } // log
32    
33     function InputStream (s) {
34     this.s = s;
35     } // InputStream
36    
37 wakaba 1.4 function Parser (i, doc) {
38 wakaba 1.1 this.parseMode = 'pcdata';
39 wakaba 1.4 if (!doc) {
40     doc = new JSDocument (this);
41     doc.manakaiIsHTML = true;
42     }
43     this.doc = doc;
44     this.openElements = [doc];
45 wakaba 1.2 this.in = i;
46 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
47 wakaba 1.1 } // Parser
48    
49 wakaba 1.2 Parser.prototype.getNextToken = function () {
50 wakaba 1.3 var p = this;
51 wakaba 1.2 var i = this.in;
52 wakaba 1.1 if (this.parseMode == 'script') {
53     var token;
54 wakaba 1.3 if (p.insertionPoint <= 0) {
55     return {type: 'abort'};
56     }
57 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
58 wakaba 1.1 function (s, t) {
59 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
60     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
61     var ip = p.insertionPoint;
62     p.insertionPoint = 0;
63 wakaba 1.4 return t.substring (ip, t.length);
64 wakaba 1.3 }
65 wakaba 1.1 token = {type: 'char', value: t};
66 wakaba 1.4 p.insertionPoint -= t.length;
67     return '';
68 wakaba 1.1 });
69     if (token) return token;
70 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
71 wakaba 1.4 if (p.insertionPoint < s.length) {
72 wakaba 1.3 token = {type: 'abort'};
73     return s;
74     }
75 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
76 wakaba 1.3 p.insertionPoint -= s.length;
77 wakaba 1.1 return '';
78     });
79     if (token) return token;
80 wakaba 1.4 i.s = i.s.replace (/^</,
81     function (s) {
82     token = {type: 'char', value: s};
83     p.insertionPoint -= s.length;
84     return '';
85     });
86     if (token) return token;
87 wakaba 1.1 return {type: 'eof'};
88     }
89    
90     var token;
91     i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
92 wakaba 1.3 if (p.insertionPoint < s.length) {
93     token = {type: 'abort'};
94     return s;
95     }
96 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
97 wakaba 1.3 p.insertionPoint -= s.length;
98 wakaba 1.1 return '';
99     });
100     if (token) return token;
101     i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
102 wakaba 1.3 if (p.insertionPoint < s.length) {
103     token = {type: 'abort'};
104     return s;
105     }
106 wakaba 1.4 var tagName;
107     var attrs = {};
108     e = e.replace (/^[\S]+/, function (v) {
109     tagName = v.toLowerCase ();
110     return '';
111     });
112     e = e.replace (/^\s*(\S+)\s*(?:=\s*"([^"]*)"|'([^']*)'|([^"']+))?/,
113     function (x, attrName, attrValue1, attrValue2, attrValue3) {
114     attrs[attrName] = attrValue1 || attrValue2 || attrValue3;
115     return '';
116     });
117     token = {type: 'start-tag', value: tagName, attrs: attrs};
118 wakaba 1.3 p.insertionPoint -= s.length;
119 wakaba 1.1 return '';
120     });
121     if (token) return token;
122 wakaba 1.3 if (p.insertionPoint <= 0) {
123     return {type: 'abort'};
124     }
125 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
126 wakaba 1.3 if (p.insertionPoint < s.length) {
127     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
128     var ip = p.insertionPoint;
129     p.insertionPoint = 0;
130     return s.substring (ip, s.length);
131     }
132 wakaba 1.1 token = {type: 'char', value: s};
133 wakaba 1.3 p.insertionPoint -= s.length;
134 wakaba 1.1 return '';
135     });
136     if (token) return token;
137     i.s = i.s.replace (/^[\s\S]/, function (s) {
138     token = {type: 'char', value: s};
139 wakaba 1.3 p.insertionPoint -= s.length;
140 wakaba 1.1 return '';
141     });
142     if (token) return token;
143     return {type: 'eof'};
144     } // getNextToken
145    
146 wakaba 1.2 Parser.prototype.parse = function () {
147 wakaba 1.1 log ('start parsing');
148    
149     while (true) {
150 wakaba 1.2 var token = this.getNextToken ();
151 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
152    
153     if (token.type == 'start-tag') {
154     if (token.value == 'script') {
155 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
156     var el = new JSElement (this.doc, token.value);
157 wakaba 1.4 if (token.attrs.async != null) el.async = true;
158     if (token.attrs.defer != null) el.defer = true;
159     if (token.attrs.src != null) el.src = token.attrs.src;
160 wakaba 1.2
161     // 2. Mark the element as being "parser-inserted".
162     el.manakaiParserInserted = true;
163    
164     // 3. Switch the tokeniser's content model flag to the CDATA state.
165 wakaba 1.1 this.parseMode = 'script';
166    
167 wakaba 1.2 // 4.1. Collect all the character tokens.
168 wakaba 1.1 while (true) {
169 wakaba 1.2 var token = this.getNextToken ();
170 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
171    
172     if (token.type == 'char') {
173 wakaba 1.2 // 5. Append a single Text node to the script element node.
174 wakaba 1.1 el.manakaiAppendText (token.value);
175 wakaba 1.2
176     // 4.2. Until it returns a token that is not a character token, or
177 wakaba 1.3 // until it stops tokenising.
178 wakaba 1.1 } else if (token.type == 'eof' ||
179 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
180     token.type == 'abort') {
181 wakaba 1.2 // 6. Switched back to the PCDATA state.
182 wakaba 1.1 this.parseMode = 'pcdata';
183 wakaba 1.2
184     // 7.1. If the next token is not an end tag token with ...
185     if (token.type != 'end-tag') {
186     // 7.2. This is a parse error.
187     log ('Parse error: no </' + 'script>');
188    
189     // 7.3. Mark the script element as "already executed".
190     el.manakaiAlreadyExecuted = true;
191     } else {
192     // 7.4. Ignore it.
193     //
194     }
195 wakaba 1.1 break;
196     }
197     }
198    
199 wakaba 1.2 // 8.1. If the parser were originally created for the ...
200     if (this.fragmentParsingMode) {
201     // 8.2. Mark the script element as "already executed" and ...
202     el.alreadyExecuted = true;
203     continue;
204     }
205    
206     // 9.1. Let the old insertion point have the same value as the ...
207 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
208 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
209 wakaba 1.3 this.setInsertionPoint (0);
210 wakaba 1.2
211     // 10. Append the new element to the current node.
212 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
213 wakaba 1.2
214     // 11. Let the insertion point have the value of the old ...
215 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
216 wakaba 1.2
217     // 12. If there is a script that will execute as soon as ...
218    
219    
220 wakaba 1.1 } else {
221 wakaba 1.2 var el = new JSElement (this.doc, token.value);
222 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
223     this.openElements.push (el);
224     }
225     } else if (token.type == 'end-tag') {
226     if (this.openElements[this.openElements.length - 1].localName ==
227     token.value) {
228     this.openElements.pop ();
229     } else {
230     log ('parse error: unmatched end tag: ' + token.value);
231     }
232 wakaba 1.3 } else if (token.type == 'char') {
233     this.openElements[this.openElements.length - 1].manakaiAppendText
234     (token.value);
235 wakaba 1.1 } else if (token.type == 'eof') {
236     break;
237 wakaba 1.3 } else if (token.type == 'abort') {
238     log ('parse: abort');
239     return;
240 wakaba 1.1 }
241     }
242    
243     log ('stop parsing');
244 wakaba 1.4
245     // readyState = 'interactive'
246    
247     // "When a script completes loading" rules start applying.
248    
249     // TODO: Handles "list of scripts that will execute as soon as possible"
250     // and "list of scripts that will execute asynchronously"
251    
252     // Handle "list of scripts that will execute when the document has finished
253     // parsing".
254     var list = this.scriptsExecutedAfterParsing;
255     while (list.length > 0) {
256     // TODO: break unless completed loading
257    
258     // Step 1.
259     //
260    
261     // Step 2. and Step 3.
262     log ('Executing a |defer|red script...');
263     executeScript (this.doc, list.shift ());
264    
265     // Step 4.
266     }
267    
268     log ('DOMContentLoaded event fired');
269    
270     // "delays tha load event" things has completed:
271     // readyState = 'complete'
272     log ('load event fired');
273 wakaba 1.1 } // parse
274    
275 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
276     if (ip == undefined || ip == null || isNaN (ip)) {
277     log ('insertion point: set to undefined');
278     this.insertionPoint = undefined;
279 wakaba 1.4 } else if (ip == this.in.s.length) {
280     log ('insertion point: end of file');
281     this.insertionPoint = ip;
282 wakaba 1.3 } else {
283     log ('insertion point: set to ' + ip +
284     ' (before "' + this.in.s.substring (0, 10) + '")');
285     this.insertionPoint = ip;
286     }
287     }; // setInsertionPoint
288    
289 wakaba 1.2 function JSDocument (p) {
290 wakaba 1.1 this.childNodes = [];
291 wakaba 1.2 this._parser = p;
292 wakaba 1.1 } // JSDocument
293    
294 wakaba 1.2 function JSElement (doc, localName) {
295 wakaba 1.1 this.localName = localName;
296 wakaba 1.2 this.ownerDocument = doc;
297 wakaba 1.1 this.childNodes = [];
298     } // JSElement
299    
300     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
301     function (e) {
302     this.childNodes.push (e);
303     e.parentNode = this;
304 wakaba 1.2
305     if (e.localName == 'script') {
306 wakaba 1.4 log ('Running a script: start');
307 wakaba 1.2
308 wakaba 1.3 var doc = this.ownerDocument || this;
309 wakaba 1.2 var p = doc._parser;
310    
311     // 1. Script type
312     //
313    
314     // 2.1. If scripting is disabled
315     //
316     // 2.2. If the script element was created by an XML ... innerHTML ...
317     //
318     // 2.3. If the user agent does not support the scripting language ...
319     //
320     // 2.4. If the script element has its "already executed" flag set
321     if (e.manakaiAlreadyExecuted) {
322     // 2.5. Abort these steps at this point.
323 wakaba 1.4 log ('Running a script: aborted');
324 wakaba 1.2 return e;
325     }
326    
327     // 3. Set the element's "already executed" flag.
328     e.manakaiAlreadyExecuted = true;
329    
330     // 4. If the element has a src attribute, then a load for ...
331     // TODO: load an external resource
332    
333     // 5. The first of the following options:
334    
335     // 5.1.
336     if (/* TODO: If the document is still being parsed && */
337     e.defer && !e.async) {
338 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
339     log ('Running a script: aborted (defer)');
340 wakaba 1.2 } else if (e.async && e.src != null) {
341     // TODO
342     } else if (e.async && e.src == null
343     /* && list of scripts that will execute asynchronously is not empty */) {
344     // TODO
345     } else if (e.src != null && e.manakaiParserInserted) {
346     // TODO
347     } else if (e.src != null) {
348     // TODO
349     } else {
350     executeScript (doc, e); // even if other scripts are already executing.
351     }
352    
353 wakaba 1.4 log ('Running a script: end');
354 wakaba 1.2 }
355    
356 wakaba 1.1 return e;
357     }; // appendChild
358    
359 wakaba 1.2 function executeScript (doc, e) {
360     log ('executing a script block: start');
361    
362     // If the load resulted in an error, then ... firing an error event ...
363    
364     // If the load was successful
365     log ('load event fired at the script element');
366    
367     if (true) {
368     // Scripting is enabled, Document.designMode is disabled,
369     // Document is the active document in its browsing context
370    
371     var s;
372     if (e.src != null) {
373     // TODO: from external file
374     } else {
375     s = e.text;
376     }
377    
378     parseAndRunScript (doc, s);
379     }
380    
381     log ('executing a script block: end');
382     } // executeScript
383    
384     function parseAndRunScript (doc, s) {
385     while (true) {
386     var matched = false;
387     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
388     matched = true;
389     var args = [];
390     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
391     args.push (v.substring (1, v.length - 1));
392     return '';
393     });
394     doc.write.apply (doc, args);
395     return '';
396     });
397     if (s == '') break;
398     if (!matched) {
399     log ('Script parse error: "' + s + '"');
400     break;
401     }
402     }
403     } // parseAndRunScript
404    
405 wakaba 1.1 function JSText (data) {
406     this.data = data;
407     } // JSText
408    
409     JSDocument.prototype.manakaiAppendText =
410     JSElement.prototype.manakaiAppendText =
411     function (s) {
412     if (this.childNodes.length > 0 &&
413     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
414     this.childNodes[this.childNodes.length - 1].data += s;
415     } else {
416     this.childNodes.push (new JSText (s));
417     }
418     }; // manakaiAppendText
419 wakaba 1.2
420 wakaba 1.4 JSDocument.prototype.open = function () {
421     // Two or fewer arguments
422    
423     // Step 1.
424     var type = arguments[0] || 'text/html';
425    
426     // Step 2.
427     var replace = arguments[1] == 'replace';
428    
429     // Step 3.
430     if (this._parser &&
431     !this._parser.scriptCreated &&
432     this._parser.in.insertionPoint != undefined) {
433     log ('document.open () in parsing mode is ignored');
434     return this;
435     }
436    
437     // Step 4.
438     log ('onbeforeunload event fired');
439     log ('onunload event fired');
440    
441     // Step 5.
442     if (this._parser) {
443     // Discard the parser.
444     }
445    
446     // Step 6.
447     log ('document cleared by document.open ()');
448     this.childNodes = [];
449    
450     // Step 7.
451     this._parser = new Parser (new InputStream (''), this);
452     this._parser.scriptCreated = true;
453    
454     // Step 8.
455     this.manakaiIsHTML = true;
456    
457     // Step 9.
458     // If not text/html, ...
459    
460     // Step 10.
461     if (!replace) {
462     // History
463     }
464    
465     // Step 11.
466     this._parser.setInsertionPoint (this._parser.in.s.length);
467    
468     // Step 12.
469     return this;
470     }; // document.open
471    
472 wakaba 1.2 JSDocument.prototype.write = function () {
473 wakaba 1.3 var p = this._parser;
474    
475 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
476 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
477     this.open ();
478     p = this._parser;
479 wakaba 1.3 }
480 wakaba 1.2
481     // 2. ... inserted into the input stream just before the insertion point.
482 wakaba 1.3 var s = Array.join (arguments, '');
483     log ('document.write: insert "' + s + '"' +
484     ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
485     p.in.s = p.in.s.substring (0, p.insertionPoint) + s
486     + p.in.s.substring (p.insertionPoint, p.in.s.length);
487     p.insertionPoint += s.length;
488 wakaba 1.2
489     // 3. If there is a script that will execute as soon as the parser resumes
490     // TODO
491    
492     // 4. Process the characters that were inserted, ...
493 wakaba 1.3 p.parse ();
494 wakaba 1.2
495     // 5. Return
496     log ('document.write: return');
497     return;
498     }; // document.write
499    
500     JSElement.prototype.__defineGetter__ ('text', function () {
501     var r = '';
502     for (var i = 0; i < this.childNodes.length; i++) {
503     if (this.childNodes[i] instanceof JSText) {
504     r += this.childNodes[i].data;
505     }
506     }
507     return r;
508     });
509 wakaba 1.1
510     function dumpTree (n, indent) {
511     var r = '';
512     for (var i = 0; i < n.childNodes.length; i++) {
513     var node = n.childNodes[i];
514     if (node instanceof JSElement) {
515     r += '| ' + indent + node.localName + '\n';
516 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
517     if (node.defer) r += '| ' + indent + ' defer=""\n';
518     if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
519 wakaba 1.1 r += dumpTree (node, indent + ' ');
520     } else if (node instanceof JSText) {
521     r += '| ' + indent + '"' + node.data + '"\n';
522     } else {
523     r += '| ' + indent + node + '\n';
524     }
525     }
526     return r;
527     } // dumpTree
528     </script>
529     </head>
530     <body onload="
531     document.sourceElement = document.getElementsByTagName ('textarea')[0];
532     document.logElement = document.getElementsByTagName ('output')[0];
533     update ();
534     ">
535    
536     <textarea onchange=" update () ">&lt;html>
537     &lt;head>&lt;/head>&lt;body>
538     &lt;p>
539     &lt;script>
540 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
541 wakaba 1.1 &lt;/script>
542     &lt;p>
543     </textarea>
544    
545     <output></output>
546    
547     </body>
548     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24