/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download) (as text)
Sun Apr 20 12:19:13 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +134 -18 lines
File MIME type: text/html
Attributes support in tokenization; stop parsing support; document.write in deferred script implemenmted; deferred script support

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 textarea {
7 display: block;
8 width: 80%;
9 margin-left: auto;
10 margin-right: auto;
11 min-height: 20em;
12 }
13 output {
14 display: block;
15 font-family: monospace;
16 white-space: -moz-pre-wrap;
17 white-space: pre-wrap;
18 }
19 </style>
20 <script>
21 function update () {
22 document.logElement.textContent = '';
23 var p = new Parser (new InputStream (document.sourceElement.value));
24 var doc = p.doc;
25 p.parse ();
26 log (dumpTree (doc, ''));
27 } // update
28
29 function log (s) {
30 document.logElement.appendChild (document.createTextNode (s + "\n"));
31 } // log
32
33 function InputStream (s) {
34 this.s = s;
35 } // InputStream
36
37 function Parser (i, doc) {
38 this.parseMode = 'pcdata';
39 if (!doc) {
40 doc = new JSDocument (this);
41 doc.manakaiIsHTML = true;
42 }
43 this.doc = doc;
44 this.openElements = [doc];
45 this.in = i;
46 this.scriptsExecutedAfterParsing = [];
47 } // Parser
48
49 Parser.prototype.getNextToken = function () {
50 var p = this;
51 var i = this.in;
52 if (this.parseMode == 'script') {
53 var token;
54 if (p.insertionPoint <= 0) {
55 return {type: 'abort'};
56 }
57 i.s = i.s.replace (/^([^<]+)/,
58 function (s, t) {
59 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
60 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
61 var ip = p.insertionPoint;
62 p.insertionPoint = 0;
63 return t.substring (ip, t.length);
64 }
65 token = {type: 'char', value: t};
66 p.insertionPoint -= t.length;
67 return '';
68 });
69 if (token) return token;
70 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
71 if (p.insertionPoint < s.length) {
72 token = {type: 'abort'};
73 return s;
74 }
75 token = {type: 'end-tag', value: 'script'};
76 p.insertionPoint -= s.length;
77 return '';
78 });
79 if (token) return token;
80 i.s = i.s.replace (/^</,
81 function (s) {
82 token = {type: 'char', value: s};
83 p.insertionPoint -= s.length;
84 return '';
85 });
86 if (token) return token;
87 return {type: 'eof'};
88 }
89
90 var token;
91 i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
92 if (p.insertionPoint < s.length) {
93 token = {type: 'abort'};
94 return s;
95 }
96 token = {type: 'end-tag', value: e.toLowerCase ()};
97 p.insertionPoint -= s.length;
98 return '';
99 });
100 if (token) return token;
101 i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
102 if (p.insertionPoint < s.length) {
103 token = {type: 'abort'};
104 return s;
105 }
106 var tagName;
107 var attrs = {};
108 e = e.replace (/^[\S]+/, function (v) {
109 tagName = v.toLowerCase ();
110 return '';
111 });
112 e = e.replace (/^\s*(\S+)\s*(?:=\s*"([^"]*)"|'([^']*)'|([^"']+))?/,
113 function (x, attrName, attrValue1, attrValue2, attrValue3) {
114 attrs[attrName] = attrValue1 || attrValue2 || attrValue3;
115 return '';
116 });
117 token = {type: 'start-tag', value: tagName, attrs: attrs};
118 p.insertionPoint -= s.length;
119 return '';
120 });
121 if (token) return token;
122 if (p.insertionPoint <= 0) {
123 return {type: 'abort'};
124 }
125 i.s = i.s.replace (/^[^<]+/, function (s) {
126 if (p.insertionPoint < s.length) {
127 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
128 var ip = p.insertionPoint;
129 p.insertionPoint = 0;
130 return s.substring (ip, s.length);
131 }
132 token = {type: 'char', value: s};
133 p.insertionPoint -= s.length;
134 return '';
135 });
136 if (token) return token;
137 i.s = i.s.replace (/^[\s\S]/, function (s) {
138 token = {type: 'char', value: s};
139 p.insertionPoint -= s.length;
140 return '';
141 });
142 if (token) return token;
143 return {type: 'eof'};
144 } // getNextToken
145
146 Parser.prototype.parse = function () {
147 log ('start parsing');
148
149 while (true) {
150 var token = this.getNextToken ();
151 log ('token: ' + token.type + ' "' + token.value + '"');
152
153 if (token.type == 'start-tag') {
154 if (token.value == 'script') {
155 // 1. Create an element for the token in the HTML namespace.
156 var el = new JSElement (this.doc, token.value);
157 if (token.attrs.async != null) el.async = true;
158 if (token.attrs.defer != null) el.defer = true;
159 if (token.attrs.src != null) el.src = token.attrs.src;
160
161 // 2. Mark the element as being "parser-inserted".
162 el.manakaiParserInserted = true;
163
164 // 3. Switch the tokeniser's content model flag to the CDATA state.
165 this.parseMode = 'script';
166
167 // 4.1. Collect all the character tokens.
168 while (true) {
169 var token = this.getNextToken ();
170 log ('token: ' + token.type + ' "' + token.value + '"');
171
172 if (token.type == 'char') {
173 // 5. Append a single Text node to the script element node.
174 el.manakaiAppendText (token.value);
175
176 // 4.2. Until it returns a token that is not a character token, or
177 // until it stops tokenising.
178 } else if (token.type == 'eof' ||
179 (token.type == 'end-tag' && token.value == 'script') ||
180 token.type == 'abort') {
181 // 6. Switched back to the PCDATA state.
182 this.parseMode = 'pcdata';
183
184 // 7.1. If the next token is not an end tag token with ...
185 if (token.type != 'end-tag') {
186 // 7.2. This is a parse error.
187 log ('Parse error: no </' + 'script>');
188
189 // 7.3. Mark the script element as "already executed".
190 el.manakaiAlreadyExecuted = true;
191 } else {
192 // 7.4. Ignore it.
193 //
194 }
195 break;
196 }
197 }
198
199 // 8.1. If the parser were originally created for the ...
200 if (this.fragmentParsingMode) {
201 // 8.2. Mark the script element as "already executed" and ...
202 el.alreadyExecuted = true;
203 continue;
204 }
205
206 // 9.1. Let the old insertion point have the same value as the ...
207 var oldInsertionPoint = this.insertionPoint;
208 // 9.2. Let the insertion point be just before the next input ...
209 this.setInsertionPoint (0);
210
211 // 10. Append the new element to the current node.
212 this.openElements[this.openElements.length - 1].appendChild (el);
213
214 // 11. Let the insertion point have the value of the old ...
215 this.setInsertionPoint (oldInsertionPoint);
216
217 // 12. If there is a script that will execute as soon as ...
218
219
220 } else {
221 var el = new JSElement (this.doc, token.value);
222 this.openElements[this.openElements.length - 1].appendChild (el);
223 this.openElements.push (el);
224 }
225 } else if (token.type == 'end-tag') {
226 if (this.openElements[this.openElements.length - 1].localName ==
227 token.value) {
228 this.openElements.pop ();
229 } else {
230 log ('parse error: unmatched end tag: ' + token.value);
231 }
232 } else if (token.type == 'char') {
233 this.openElements[this.openElements.length - 1].manakaiAppendText
234 (token.value);
235 } else if (token.type == 'eof') {
236 break;
237 } else if (token.type == 'abort') {
238 log ('parse: abort');
239 return;
240 }
241 }
242
243 log ('stop parsing');
244
245 // readyState = 'interactive'
246
247 // "When a script completes loading" rules start applying.
248
249 // TODO: Handles "list of scripts that will execute as soon as possible"
250 // and "list of scripts that will execute asynchronously"
251
252 // Handle "list of scripts that will execute when the document has finished
253 // parsing".
254 var list = this.scriptsExecutedAfterParsing;
255 while (list.length > 0) {
256 // TODO: break unless completed loading
257
258 // Step 1.
259 //
260
261 // Step 2. and Step 3.
262 log ('Executing a |defer|red script...');
263 executeScript (this.doc, list.shift ());
264
265 // Step 4.
266 }
267
268 log ('DOMContentLoaded event fired');
269
270 // "delays tha load event" things has completed:
271 // readyState = 'complete'
272 log ('load event fired');
273 } // parse
274
275 Parser.prototype.setInsertionPoint = function (ip) {
276 if (ip == undefined || ip == null || isNaN (ip)) {
277 log ('insertion point: set to undefined');
278 this.insertionPoint = undefined;
279 } else if (ip == this.in.s.length) {
280 log ('insertion point: end of file');
281 this.insertionPoint = ip;
282 } else {
283 log ('insertion point: set to ' + ip +
284 ' (before "' + this.in.s.substring (0, 10) + '")');
285 this.insertionPoint = ip;
286 }
287 }; // setInsertionPoint
288
289 function JSDocument (p) {
290 this.childNodes = [];
291 this._parser = p;
292 } // JSDocument
293
294 function JSElement (doc, localName) {
295 this.localName = localName;
296 this.ownerDocument = doc;
297 this.childNodes = [];
298 } // JSElement
299
300 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
301 function (e) {
302 this.childNodes.push (e);
303 e.parentNode = this;
304
305 if (e.localName == 'script') {
306 log ('Running a script: start');
307
308 var doc = this.ownerDocument || this;
309 var p = doc._parser;
310
311 // 1. Script type
312 //
313
314 // 2.1. If scripting is disabled
315 //
316 // 2.2. If the script element was created by an XML ... innerHTML ...
317 //
318 // 2.3. If the user agent does not support the scripting language ...
319 //
320 // 2.4. If the script element has its "already executed" flag set
321 if (e.manakaiAlreadyExecuted) {
322 // 2.5. Abort these steps at this point.
323 log ('Running a script: aborted');
324 return e;
325 }
326
327 // 3. Set the element's "already executed" flag.
328 e.manakaiAlreadyExecuted = true;
329
330 // 4. If the element has a src attribute, then a load for ...
331 // TODO: load an external resource
332
333 // 5. The first of the following options:
334
335 // 5.1.
336 if (/* TODO: If the document is still being parsed && */
337 e.defer && !e.async) {
338 p.scriptsExecutedAfterParsing.push (e);
339 log ('Running a script: aborted (defer)');
340 } else if (e.async && e.src != null) {
341 // TODO
342 } else if (e.async && e.src == null
343 /* && list of scripts that will execute asynchronously is not empty */) {
344 // TODO
345 } else if (e.src != null && e.manakaiParserInserted) {
346 // TODO
347 } else if (e.src != null) {
348 // TODO
349 } else {
350 executeScript (doc, e); // even if other scripts are already executing.
351 }
352
353 log ('Running a script: end');
354 }
355
356 return e;
357 }; // appendChild
358
359 function executeScript (doc, e) {
360 log ('executing a script block: start');
361
362 // If the load resulted in an error, then ... firing an error event ...
363
364 // If the load was successful
365 log ('load event fired at the script element');
366
367 if (true) {
368 // Scripting is enabled, Document.designMode is disabled,
369 // Document is the active document in its browsing context
370
371 var s;
372 if (e.src != null) {
373 // TODO: from external file
374 } else {
375 s = e.text;
376 }
377
378 parseAndRunScript (doc, s);
379 }
380
381 log ('executing a script block: end');
382 } // executeScript
383
384 function parseAndRunScript (doc, s) {
385 while (true) {
386 var matched = false;
387 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
388 matched = true;
389 var args = [];
390 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
391 args.push (v.substring (1, v.length - 1));
392 return '';
393 });
394 doc.write.apply (doc, args);
395 return '';
396 });
397 if (s == '') break;
398 if (!matched) {
399 log ('Script parse error: "' + s + '"');
400 break;
401 }
402 }
403 } // parseAndRunScript
404
405 function JSText (data) {
406 this.data = data;
407 } // JSText
408
409 JSDocument.prototype.manakaiAppendText =
410 JSElement.prototype.manakaiAppendText =
411 function (s) {
412 if (this.childNodes.length > 0 &&
413 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
414 this.childNodes[this.childNodes.length - 1].data += s;
415 } else {
416 this.childNodes.push (new JSText (s));
417 }
418 }; // manakaiAppendText
419
420 JSDocument.prototype.open = function () {
421 // Two or fewer arguments
422
423 // Step 1.
424 var type = arguments[0] || 'text/html';
425
426 // Step 2.
427 var replace = arguments[1] == 'replace';
428
429 // Step 3.
430 if (this._parser &&
431 !this._parser.scriptCreated &&
432 this._parser.in.insertionPoint != undefined) {
433 log ('document.open () in parsing mode is ignored');
434 return this;
435 }
436
437 // Step 4.
438 log ('onbeforeunload event fired');
439 log ('onunload event fired');
440
441 // Step 5.
442 if (this._parser) {
443 // Discard the parser.
444 }
445
446 // Step 6.
447 log ('document cleared by document.open ()');
448 this.childNodes = [];
449
450 // Step 7.
451 this._parser = new Parser (new InputStream (''), this);
452 this._parser.scriptCreated = true;
453
454 // Step 8.
455 this.manakaiIsHTML = true;
456
457 // Step 9.
458 // If not text/html, ...
459
460 // Step 10.
461 if (!replace) {
462 // History
463 }
464
465 // Step 11.
466 this._parser.setInsertionPoint (this._parser.in.s.length);
467
468 // Step 12.
469 return this;
470 }; // document.open
471
472 JSDocument.prototype.write = function () {
473 var p = this._parser;
474
475 // 1. If the insertion point is undefined, the open() method must be ...
476 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
477 this.open ();
478 p = this._parser;
479 }
480
481 // 2. ... inserted into the input stream just before the insertion point.
482 var s = Array.join (arguments, '');
483 log ('document.write: insert "' + s + '"' +
484 ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
485 p.in.s = p.in.s.substring (0, p.insertionPoint) + s
486 + p.in.s.substring (p.insertionPoint, p.in.s.length);
487 p.insertionPoint += s.length;
488
489 // 3. If there is a script that will execute as soon as the parser resumes
490 // TODO
491
492 // 4. Process the characters that were inserted, ...
493 p.parse ();
494
495 // 5. Return
496 log ('document.write: return');
497 return;
498 }; // document.write
499
500 JSElement.prototype.__defineGetter__ ('text', function () {
501 var r = '';
502 for (var i = 0; i < this.childNodes.length; i++) {
503 if (this.childNodes[i] instanceof JSText) {
504 r += this.childNodes[i].data;
505 }
506 }
507 return r;
508 });
509
510 function dumpTree (n, indent) {
511 var r = '';
512 for (var i = 0; i < n.childNodes.length; i++) {
513 var node = n.childNodes[i];
514 if (node instanceof JSElement) {
515 r += '| ' + indent + node.localName + '\n';
516 if (node.async) r += '| ' + indent + ' async=""\n';
517 if (node.defer) r += '| ' + indent + ' defer=""\n';
518 if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
519 r += dumpTree (node, indent + ' ');
520 } else if (node instanceof JSText) {
521 r += '| ' + indent + '"' + node.data + '"\n';
522 } else {
523 r += '| ' + indent + node + '\n';
524 }
525 }
526 return r;
527 } // dumpTree
528 </script>
529 </head>
530 <body onload="
531 document.sourceElement = document.getElementsByTagName ('textarea')[0];
532 document.logElement = document.getElementsByTagName ('output')[0];
533 update ();
534 ">
535
536 <textarea onchange=" update () ">&lt;html>
537 &lt;head>&lt;/head>&lt;body>
538 &lt;p>
539 &lt;script>
540 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
541 &lt;/script>
542 &lt;p>
543 </textarea>
544
545 <output></output>
546
547 </body>
548 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24