/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download) (as text)
Fri Apr 25 11:40:56 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +17 -4 lines
File MIME type: text/html
Bug fix

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4     <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5     <style>
6     textarea {
7     display: block;
8     width: 80%;
9     margin-left: auto;
10     margin-right: auto;
11     min-height: 20em;
12     }
13     output {
14     display: block;
15     font-family: monospace;
16 wakaba 1.4 white-space: -moz-pre-wrap;
17     white-space: pre-wrap;
18 wakaba 1.1 }
19     </style>
20     <script>
21     function update () {
22     document.logElement.textContent = '';
23 wakaba 1.2 var p = new Parser (new InputStream (document.sourceElement.value));
24 wakaba 1.4 var doc = p.doc;
25 wakaba 1.2 p.parse ();
26 wakaba 1.4 log (dumpTree (doc, ''));
27 wakaba 1.1 } // update
28    
29     function log (s) {
30     document.logElement.appendChild (document.createTextNode (s + "\n"));
31     } // log
32    
33     function InputStream (s) {
34     this.s = s;
35     } // InputStream
36    
37 wakaba 1.4 function Parser (i, doc) {
38 wakaba 1.1 this.parseMode = 'pcdata';
39 wakaba 1.4 if (!doc) {
40     doc = new JSDocument (this);
41     doc.manakaiIsHTML = true;
42     }
43     this.doc = doc;
44     this.openElements = [doc];
45 wakaba 1.2 this.in = i;
46 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
47 wakaba 1.1 } // Parser
48    
49 wakaba 1.2 Parser.prototype.getNextToken = function () {
50 wakaba 1.3 var p = this;
51 wakaba 1.2 var i = this.in;
52 wakaba 1.1 if (this.parseMode == 'script') {
53     var token;
54 wakaba 1.3 if (p.insertionPoint <= 0) {
55     return {type: 'abort'};
56     }
57 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
58 wakaba 1.1 function (s, t) {
59 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
60     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
61     var ip = p.insertionPoint;
62     p.insertionPoint = 0;
63 wakaba 1.4 return t.substring (ip, t.length);
64 wakaba 1.3 }
65 wakaba 1.1 token = {type: 'char', value: t};
66 wakaba 1.4 p.insertionPoint -= t.length;
67     return '';
68 wakaba 1.1 });
69     if (token) return token;
70 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
71 wakaba 1.4 if (p.insertionPoint < s.length) {
72 wakaba 1.3 token = {type: 'abort'};
73     return s;
74     }
75 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
76 wakaba 1.3 p.insertionPoint -= s.length;
77 wakaba 1.1 return '';
78     });
79     if (token) return token;
80 wakaba 1.5 var m;
81     if ((p.insertionPoint < '</script'.length) &&
82     (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
83     var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
84     if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
85     return {type: 'abort'};
86     }
87     }
88 wakaba 1.4 i.s = i.s.replace (/^</,
89     function (s) {
90     token = {type: 'char', value: s};
91     p.insertionPoint -= s.length;
92     return '';
93     });
94     if (token) return token;
95 wakaba 1.1 return {type: 'eof'};
96     }
97    
98     var token;
99 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
100     if (p.insertionPoint < s.length ||
101     (p.insertionPoint <= s.length &&
102     s.substring (s.length - 1, 1) != '>')) {
103 wakaba 1.3 token = {type: 'abort'};
104     return s;
105     }
106 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
107 wakaba 1.3 p.insertionPoint -= s.length;
108 wakaba 1.1 return '';
109     });
110     if (token) return token;
111 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
112     if (p.insertionPoint < s.length ||
113     (p.insertionPoint <= s.length &&
114     s.substring (s.length - 1, 1) != '>')) {
115 wakaba 1.3 token = {type: 'abort'};
116     return s;
117     }
118 wakaba 1.4 var tagName;
119     var attrs = {};
120     e = e.replace (/^[\S]+/, function (v) {
121     tagName = v.toLowerCase ();
122     return '';
123     });
124     e = e.replace (/^\s*(\S+)\s*(?:=\s*"([^"]*)"|'([^']*)'|([^"']+))?/,
125     function (x, attrName, attrValue1, attrValue2, attrValue3) {
126     attrs[attrName] = attrValue1 || attrValue2 || attrValue3;
127     return '';
128     });
129     token = {type: 'start-tag', value: tagName, attrs: attrs};
130 wakaba 1.3 p.insertionPoint -= s.length;
131 wakaba 1.1 return '';
132     });
133     if (token) return token;
134 wakaba 1.3 if (p.insertionPoint <= 0) {
135     return {type: 'abort'};
136     }
137 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
138 wakaba 1.3 if (p.insertionPoint < s.length) {
139     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
140     var ip = p.insertionPoint;
141     p.insertionPoint = 0;
142     return s.substring (ip, s.length);
143     }
144 wakaba 1.1 token = {type: 'char', value: s};
145 wakaba 1.3 p.insertionPoint -= s.length;
146 wakaba 1.1 return '';
147     });
148     if (token) return token;
149     i.s = i.s.replace (/^[\s\S]/, function (s) {
150     token = {type: 'char', value: s};
151 wakaba 1.3 p.insertionPoint -= s.length;
152 wakaba 1.1 return '';
153     });
154     if (token) return token;
155     return {type: 'eof'};
156     } // getNextToken
157    
158 wakaba 1.2 Parser.prototype.parse = function () {
159 wakaba 1.1 log ('start parsing');
160    
161     while (true) {
162 wakaba 1.2 var token = this.getNextToken ();
163 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
164    
165     if (token.type == 'start-tag') {
166     if (token.value == 'script') {
167 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
168     var el = new JSElement (this.doc, token.value);
169 wakaba 1.4 if (token.attrs.async != null) el.async = true;
170     if (token.attrs.defer != null) el.defer = true;
171     if (token.attrs.src != null) el.src = token.attrs.src;
172 wakaba 1.2
173     // 2. Mark the element as being "parser-inserted".
174     el.manakaiParserInserted = true;
175    
176     // 3. Switch the tokeniser's content model flag to the CDATA state.
177 wakaba 1.1 this.parseMode = 'script';
178    
179 wakaba 1.2 // 4.1. Collect all the character tokens.
180 wakaba 1.1 while (true) {
181 wakaba 1.2 var token = this.getNextToken ();
182 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
183    
184     if (token.type == 'char') {
185 wakaba 1.2 // 5. Append a single Text node to the script element node.
186 wakaba 1.1 el.manakaiAppendText (token.value);
187 wakaba 1.2
188     // 4.2. Until it returns a token that is not a character token, or
189 wakaba 1.3 // until it stops tokenising.
190 wakaba 1.1 } else if (token.type == 'eof' ||
191 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
192     token.type == 'abort') {
193 wakaba 1.2 // 6. Switched back to the PCDATA state.
194 wakaba 1.1 this.parseMode = 'pcdata';
195 wakaba 1.2
196     // 7.1. If the next token is not an end tag token with ...
197     if (token.type != 'end-tag') {
198     // 7.2. This is a parse error.
199     log ('Parse error: no </' + 'script>');
200    
201     // 7.3. Mark the script element as "already executed".
202     el.manakaiAlreadyExecuted = true;
203     } else {
204     // 7.4. Ignore it.
205     //
206     }
207 wakaba 1.1 break;
208     }
209     }
210    
211 wakaba 1.2 // 8.1. If the parser were originally created for the ...
212     if (this.fragmentParsingMode) {
213     // 8.2. Mark the script element as "already executed" and ...
214     el.alreadyExecuted = true;
215     continue;
216     }
217    
218     // 9.1. Let the old insertion point have the same value as the ...
219 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
220 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
221 wakaba 1.3 this.setInsertionPoint (0);
222 wakaba 1.2
223     // 10. Append the new element to the current node.
224 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
225 wakaba 1.2
226     // 11. Let the insertion point have the value of the old ...
227 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
228 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
229 wakaba 1.2
230     // 12. If there is a script that will execute as soon as ...
231    
232    
233 wakaba 1.1 } else {
234 wakaba 1.2 var el = new JSElement (this.doc, token.value);
235 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
236     this.openElements.push (el);
237     }
238     } else if (token.type == 'end-tag') {
239     if (this.openElements[this.openElements.length - 1].localName ==
240     token.value) {
241     this.openElements.pop ();
242     } else {
243     log ('parse error: unmatched end tag: ' + token.value);
244     }
245 wakaba 1.3 } else if (token.type == 'char') {
246     this.openElements[this.openElements.length - 1].manakaiAppendText
247     (token.value);
248 wakaba 1.1 } else if (token.type == 'eof') {
249     break;
250 wakaba 1.3 } else if (token.type == 'abort') {
251     log ('parse: abort');
252     return;
253 wakaba 1.1 }
254     }
255    
256     log ('stop parsing');
257 wakaba 1.4
258     // readyState = 'interactive'
259    
260     // "When a script completes loading" rules start applying.
261    
262     // TODO: Handles "list of scripts that will execute as soon as possible"
263     // and "list of scripts that will execute asynchronously"
264    
265     // Handle "list of scripts that will execute when the document has finished
266     // parsing".
267     var list = this.scriptsExecutedAfterParsing;
268     while (list.length > 0) {
269     // TODO: break unless completed loading
270    
271     // Step 1.
272     //
273    
274     // Step 2. and Step 3.
275     log ('Executing a |defer|red script...');
276     executeScript (this.doc, list.shift ());
277    
278     // Step 4.
279     }
280    
281     log ('DOMContentLoaded event fired');
282    
283     // "delays tha load event" things has completed:
284     // readyState = 'complete'
285     log ('load event fired');
286 wakaba 1.1 } // parse
287    
288 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
289     if (ip == undefined || ip == null || isNaN (ip)) {
290     log ('insertion point: set to undefined');
291     this.insertionPoint = undefined;
292 wakaba 1.4 } else if (ip == this.in.s.length) {
293     log ('insertion point: end of file');
294     this.insertionPoint = ip;
295 wakaba 1.3 } else {
296     log ('insertion point: set to ' + ip +
297     ' (before "' + this.in.s.substring (0, 10) + '")');
298     this.insertionPoint = ip;
299     }
300     }; // setInsertionPoint
301    
302 wakaba 1.2 function JSDocument (p) {
303 wakaba 1.1 this.childNodes = [];
304 wakaba 1.2 this._parser = p;
305 wakaba 1.1 } // JSDocument
306    
307 wakaba 1.2 function JSElement (doc, localName) {
308 wakaba 1.1 this.localName = localName;
309 wakaba 1.2 this.ownerDocument = doc;
310 wakaba 1.1 this.childNodes = [];
311     } // JSElement
312    
313     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
314     function (e) {
315     this.childNodes.push (e);
316     e.parentNode = this;
317 wakaba 1.2
318     if (e.localName == 'script') {
319 wakaba 1.4 log ('Running a script: start');
320 wakaba 1.2
321 wakaba 1.3 var doc = this.ownerDocument || this;
322 wakaba 1.2 var p = doc._parser;
323    
324     // 1. Script type
325     //
326    
327     // 2.1. If scripting is disabled
328     //
329     // 2.2. If the script element was created by an XML ... innerHTML ...
330     //
331     // 2.3. If the user agent does not support the scripting language ...
332     //
333     // 2.4. If the script element has its "already executed" flag set
334     if (e.manakaiAlreadyExecuted) {
335     // 2.5. Abort these steps at this point.
336 wakaba 1.4 log ('Running a script: aborted');
337 wakaba 1.2 return e;
338     }
339    
340     // 3. Set the element's "already executed" flag.
341     e.manakaiAlreadyExecuted = true;
342    
343     // 4. If the element has a src attribute, then a load for ...
344     // TODO: load an external resource
345    
346     // 5. The first of the following options:
347    
348     // 5.1.
349     if (/* TODO: If the document is still being parsed && */
350     e.defer && !e.async) {
351 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
352     log ('Running a script: aborted (defer)');
353 wakaba 1.2 } else if (e.async && e.src != null) {
354     // TODO
355     } else if (e.async && e.src == null
356     /* && list of scripts that will execute asynchronously is not empty */) {
357     // TODO
358     } else if (e.src != null && e.manakaiParserInserted) {
359     // TODO
360     } else if (e.src != null) {
361     // TODO
362     } else {
363     executeScript (doc, e); // even if other scripts are already executing.
364     }
365    
366 wakaba 1.4 log ('Running a script: end');
367 wakaba 1.2 }
368    
369 wakaba 1.1 return e;
370     }; // appendChild
371    
372 wakaba 1.2 function executeScript (doc, e) {
373     log ('executing a script block: start');
374    
375     // If the load resulted in an error, then ... firing an error event ...
376    
377     // If the load was successful
378     log ('load event fired at the script element');
379    
380     if (true) {
381     // Scripting is enabled, Document.designMode is disabled,
382     // Document is the active document in its browsing context
383    
384     var s;
385     if (e.src != null) {
386     // TODO: from external file
387     } else {
388     s = e.text;
389     }
390    
391     parseAndRunScript (doc, s);
392     }
393    
394     log ('executing a script block: end');
395     } // executeScript
396    
397     function parseAndRunScript (doc, s) {
398     while (true) {
399     var matched = false;
400     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
401     matched = true;
402     var args = [];
403     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
404     args.push (v.substring (1, v.length - 1));
405     return '';
406     });
407     doc.write.apply (doc, args);
408     return '';
409     });
410     if (s == '') break;
411     if (!matched) {
412     log ('Script parse error: "' + s + '"');
413     break;
414     }
415     }
416     } // parseAndRunScript
417    
418 wakaba 1.1 function JSText (data) {
419     this.data = data;
420     } // JSText
421    
422     JSDocument.prototype.manakaiAppendText =
423     JSElement.prototype.manakaiAppendText =
424     function (s) {
425     if (this.childNodes.length > 0 &&
426     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
427     this.childNodes[this.childNodes.length - 1].data += s;
428     } else {
429     this.childNodes.push (new JSText (s));
430     }
431     }; // manakaiAppendText
432 wakaba 1.2
433 wakaba 1.4 JSDocument.prototype.open = function () {
434     // Two or fewer arguments
435    
436     // Step 1.
437     var type = arguments[0] || 'text/html';
438    
439     // Step 2.
440     var replace = arguments[1] == 'replace';
441    
442     // Step 3.
443     if (this._parser &&
444     !this._parser.scriptCreated &&
445     this._parser.in.insertionPoint != undefined) {
446     log ('document.open () in parsing mode is ignored');
447     return this;
448     }
449    
450     // Step 4.
451     log ('onbeforeunload event fired');
452     log ('onunload event fired');
453    
454     // Step 5.
455     if (this._parser) {
456     // Discard the parser.
457     }
458    
459     // Step 6.
460     log ('document cleared by document.open ()');
461     this.childNodes = [];
462    
463     // Step 7.
464     this._parser = new Parser (new InputStream (''), this);
465     this._parser.scriptCreated = true;
466    
467     // Step 8.
468     this.manakaiIsHTML = true;
469    
470     // Step 9.
471     // If not text/html, ...
472    
473     // Step 10.
474     if (!replace) {
475     // History
476     }
477    
478     // Step 11.
479     this._parser.setInsertionPoint (this._parser.in.s.length);
480    
481     // Step 12.
482     return this;
483     }; // document.open
484    
485 wakaba 1.2 JSDocument.prototype.write = function () {
486 wakaba 1.3 var p = this._parser;
487    
488 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
489 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
490     this.open ();
491     p = this._parser;
492 wakaba 1.3 }
493 wakaba 1.2
494     // 2. ... inserted into the input stream just before the insertion point.
495 wakaba 1.3 var s = Array.join (arguments, '');
496     log ('document.write: insert "' + s + '"' +
497     ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
498     p.in.s = p.in.s.substring (0, p.insertionPoint) + s
499     + p.in.s.substring (p.insertionPoint, p.in.s.length);
500     p.insertionPoint += s.length;
501 wakaba 1.2
502     // 3. If there is a script that will execute as soon as the parser resumes
503     // TODO
504    
505     // 4. Process the characters that were inserted, ...
506 wakaba 1.3 p.parse ();
507 wakaba 1.2
508     // 5. Return
509     log ('document.write: return');
510     return;
511     }; // document.write
512    
513     JSElement.prototype.__defineGetter__ ('text', function () {
514     var r = '';
515     for (var i = 0; i < this.childNodes.length; i++) {
516     if (this.childNodes[i] instanceof JSText) {
517     r += this.childNodes[i].data;
518     }
519     }
520     return r;
521     });
522 wakaba 1.1
523     function dumpTree (n, indent) {
524     var r = '';
525     for (var i = 0; i < n.childNodes.length; i++) {
526     var node = n.childNodes[i];
527     if (node instanceof JSElement) {
528     r += '| ' + indent + node.localName + '\n';
529 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
530     if (node.defer) r += '| ' + indent + ' defer=""\n';
531     if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
532 wakaba 1.1 r += dumpTree (node, indent + ' ');
533     } else if (node instanceof JSText) {
534     r += '| ' + indent + '"' + node.data + '"\n';
535     } else {
536     r += '| ' + indent + node + '\n';
537     }
538     }
539     return r;
540     } // dumpTree
541     </script>
542     </head>
543     <body onload="
544     document.sourceElement = document.getElementsByTagName ('textarea')[0];
545     document.logElement = document.getElementsByTagName ('output')[0];
546     update ();
547     ">
548    
549     <textarea onchange=" update () ">&lt;html>
550     &lt;head>&lt;/head>&lt;body>
551     &lt;p>
552     &lt;script>
553 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
554 wakaba 1.1 &lt;/script>
555     &lt;p>
556     </textarea>
557    
558     <output></output>
559    
560     </body>
561     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24