/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (show annotations) (download) (as text)
Fri Apr 25 23:03:35 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.6: +40 -9 lines
File MIME type: text/html
UI improvement, round 1

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 h1, h2 {
7 margin: 0;
8 font-size: 100%;
9 }
10 p, pre {
11 margin: 0;
12 }
13 textarea {
14 width: 100%;
15 -width: 99%;
16 height: 10em;
17 }
18 output {
19 display: block;
20 font-family: monospace;
21 white-space: -moz-pre-wrap;
22 white-space: pre-wrap;
23 }
24 </style>
25 <script>
26 var delayedUpdater = 0;
27
28 function update () {
29 if (delayedUpdater) {
30 clearTimeout (delayedUpdater);
31 delayedUpdater = 0;
32 }
33 delayedUpdater = setTimeout (update2, 100);
34 } // update
35
36 function update2 () {
37 document.logElement.textContent = '';
38 var v = document.sourceElement.value;
39 var p = new Parser (new InputStream (v));
40 var doc = p.doc;
41 p.parse ();
42 log (dumpTree (doc, ''));
43
44 document.links['permalink'].href
45 = location.href + '?s=' + encodeURIComponent (v);
46 } // update2
47
48 var logIndentLevel = 0;
49 function log (s) {
50 for (var i = 0; i < logIndentLevel; i++) {
51 s = ' ' + s;
52 }
53 document.logElement.appendChild (document.createTextNode (s + "\n"));
54 } // log
55
56 function InputStream (s) {
57 this.s = s;
58 } // InputStream
59
60 function Parser (i, doc) {
61 this.parseMode = 'pcdata';
62 if (!doc) {
63 doc = new JSDocument (this);
64 doc.manakaiIsHTML = true;
65 }
66 this.doc = doc;
67 this.openElements = [doc];
68 this.in = i;
69 this.scriptsExecutedAfterParsing = [];
70 } // Parser
71
72 Parser.prototype.getNextToken = function () {
73 var p = this;
74 var i = this.in;
75 if (this.parseMode == 'script') {
76 var token;
77 if (p.insertionPoint <= 0) {
78 return {type: 'abort'};
79 }
80 i.s = i.s.replace (/^([^<]+)/,
81 function (s, t) {
82 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
83 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
84 var ip = p.insertionPoint;
85 p.insertionPoint = 0;
86 return t.substring (ip, t.length);
87 }
88 token = {type: 'char', value: t};
89 p.insertionPoint -= t.length;
90 return '';
91 });
92 if (token) return token;
93 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
94 if (p.insertionPoint < s.length) {
95 token = {type: 'abort'};
96 return s;
97 }
98 token = {type: 'end-tag', value: 'script'};
99 p.insertionPoint -= s.length;
100 return '';
101 });
102 if (token) return token;
103 var m;
104 if ((p.insertionPoint < '</script'.length) &&
105 (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
106 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
107 if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
108 return {type: 'abort'};
109 }
110 }
111 i.s = i.s.replace (/^</,
112 function (s) {
113 token = {type: 'char', value: s};
114 p.insertionPoint -= s.length;
115 return '';
116 });
117 if (token) return token;
118 return {type: 'eof'};
119 }
120
121 var token;
122 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
123 if (p.insertionPoint < s.length ||
124 (p.insertionPoint <= s.length &&
125 s.substring (s.length - 1, 1) != '>')) {
126 token = {type: 'abort'};
127 return s;
128 }
129 token = {type: 'end-tag', value: e.toLowerCase ()};
130 p.insertionPoint -= s.length;
131 return '';
132 });
133 if (token) return token;
134 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
135 if (p.insertionPoint < s.length ||
136 (p.insertionPoint <= s.length &&
137 s.substring (s.length - 1, 1) != '>')) {
138 token = {type: 'abort'};
139 return s;
140 }
141 var tagName;
142 var attrs = {};
143 e = e.replace (/^[\S]+/, function (v) {
144 tagName = v.toLowerCase ();
145 return '';
146 });
147 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,
148 function (x, attrName, attrValue1, attrValue2, attrValue3) {
149 v = attrValue1 || attrValue2 || attrValue3;
150 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
151 .replace (/&amp;/g, '&');
152 attrs[attrName.toLowerCase ()] = v;
153 return '';
154 });
155 if (e.length) {
156 log ('Broken start tag: "' + e + '"');
157 }
158 token = {type: 'start-tag', value: tagName, attrs: attrs};
159 p.insertionPoint -= s.length;
160 return '';
161 });
162 if (token) return token;
163 if (p.insertionPoint <= 0) {
164 return {type: 'abort'};
165 }
166 i.s = i.s.replace (/^[^<]+/, function (s) {
167 if (p.insertionPoint < s.length) {
168 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
169 var ip = p.insertionPoint;
170 p.insertionPoint = 0;
171 return s.substring (ip, s.length);
172 }
173 token = {type: 'char', value: s};
174 p.insertionPoint -= s.length;
175 return '';
176 });
177 if (token) return token;
178 i.s = i.s.replace (/^[\s\S]/, function (s) {
179 token = {type: 'char', value: s};
180 p.insertionPoint -= s.length;
181 return '';
182 });
183 if (token) return token;
184 return {type: 'eof'};
185 } // getNextToken
186
187 Parser.prototype.parse = function () {
188 logIndentLevel++;
189 log ('parse: start');
190
191 while (true) {
192 var token = this.getNextToken ();
193 log ('token: ' + token.type + ' "' + token.value + '"');
194
195 if (token.type == 'start-tag') {
196 if (token.value == 'script') {
197 // 1. Create an element for the token in the HTML namespace.
198 var el = new JSElement (this.doc, token.value);
199 if (token.attrs.async != null) el.async = true;
200 if (token.attrs.defer != null) el.defer = true;
201 if (token.attrs.src != null) el.src = token.attrs.src;
202
203 // 2. Mark the element as being "parser-inserted".
204 el.manakaiParserInserted = true;
205
206 // 3. Switch the tokeniser's content model flag to the CDATA state.
207 this.parseMode = 'script';
208
209 // 4.1. Collect all the character tokens.
210 while (true) {
211 var token = this.getNextToken ();
212 log ('token: ' + token.type + ' "' + token.value + '"');
213
214 if (token.type == 'char') {
215 // 5. Append a single Text node to the script element node.
216 el.manakaiAppendText (token.value);
217
218 // 4.2. Until it returns a token that is not a character token, or
219 // until it stops tokenising.
220 } else if (token.type == 'eof' ||
221 (token.type == 'end-tag' && token.value == 'script') ||
222 token.type == 'abort') {
223 // 6. Switched back to the PCDATA state.
224 this.parseMode = 'pcdata';
225
226 // 7.1. If the next token is not an end tag token with ...
227 if (token.type != 'end-tag') {
228 // 7.2. This is a parse error.
229 log ('Parse error: no </' + 'script>');
230
231 // 7.3. Mark the script element as "already executed".
232 el.manakaiAlreadyExecuted = true;
233 } else {
234 // 7.4. Ignore it.
235 //
236 }
237 break;
238 }
239 }
240
241 // 8.1. If the parser were originally created for the ...
242 if (this.fragmentParsingMode) {
243 // 8.2. Mark the script element as "already executed" and ...
244 el.alreadyExecuted = true;
245 continue;
246 }
247
248 // 9.1. Let the old insertion point have the same value as the ...
249 var oldInsertionPoint = this.insertionPoint;
250 // 9.2. Let the insertion point be just before the next input ...
251 this.setInsertionPoint (0);
252
253 // 10. Append the new element to the current node.
254 this.openElements[this.openElements.length - 1].appendChild (el);
255
256 // 11. Let the insertion point have the value of the old ...
257
258 oldInsertionPoint += this.insertionPoint;
259 this.setInsertionPoint (oldInsertionPoint);
260
261 // 12. If there is a script that will execute as soon as ...
262 while (this.scriptExecutedWhenParserResumes) {
263 // 12.1. If the tree construction stage is being called reentrantly
264 if (this.reentrant) {
265 log ('parse: abort (reentrance)');
266 logIndentLevel--;
267 return;
268
269 // 12.2. Otherwise
270 } else {
271 // 1.
272 var script = this.scriptExecutedWhenParserResumes;
273 this.scriptExecutedWhenParserResumes = null;
274
275 // 2. Pause until the script has completed loading.
276 //
277
278 // 3. Let the insertion point to just before the next input char.
279 this.setInsertionPoint (0);
280
281 // 4. Execute the script.
282 executeScript (this.doc, script);
283
284 // 5. Let the insertion point be undefined again.
285 this.setInsertionPoint (undefined);
286
287 // 6. If there is once again a script that will execute ...
288 //
289 }
290 }
291 } else {
292 var el = new JSElement (this.doc, token.value);
293 this.openElements[this.openElements.length - 1].appendChild (el);
294 this.openElements.push (el);
295 }
296 } else if (token.type == 'end-tag') {
297 if (this.openElements[this.openElements.length - 1].localName ==
298 token.value) {
299 this.openElements.pop ();
300 } else {
301 log ('parse error: unmatched end tag: ' + token.value);
302 }
303 } else if (token.type == 'char') {
304 this.openElements[this.openElements.length - 1].manakaiAppendText
305 (token.value);
306 } else if (token.type == 'eof') {
307 break;
308 } else if (token.type == 'abort') {
309 log ('parse: abort');
310 logIndentLevel--;
311 return;
312 }
313 }
314
315 log ('stop parsing');
316
317 // readyState = 'interactive'
318
319 // "When a script completes loading" rules start applying.
320
321 // TODO: Handles "list of scripts that will execute as soon as possible"
322 // and "list of scripts that will execute asynchronously"
323
324 // Handle "list of scripts that will execute when the document has finished
325 // parsing".
326 var list = this.scriptsExecutedAfterParsing;
327 while (list.length > 0) {
328 // TODO: break unless completed loading
329
330 // Step 1.
331 //
332
333 // Step 2. and Step 3.
334 log ('Executing a |defer|red script...');
335 executeScript (this.doc, list.shift ());
336
337 // Step 4.
338 }
339
340 log ('DOMContentLoaded event fired');
341
342 // "delays tha load event" things has completed:
343 // readyState = 'complete'
344 log ('load event fired');
345
346 logIndentLevel--;
347 } // parse
348
349 Parser.prototype.setInsertionPoint = function (ip) {
350 if (ip == undefined || ip == null || isNaN (ip)) {
351 log ('insertion point: set to undefined');
352 this.insertionPoint = undefined;
353 } else if (ip == this.in.s.length) {
354 log ('insertion point: end of file');
355 this.insertionPoint = ip;
356 } else {
357 log ('insertion point: set to ' + ip +
358 ' (before "' + this.in.s.substring (0, 10) + '")');
359 this.insertionPoint = ip;
360 }
361 }; // setInsertionPoint
362
363 function JSDocument (p) {
364 this.childNodes = [];
365 this._parser = p;
366 } // JSDocument
367
368 function JSElement (doc, localName) {
369 this.localName = localName;
370 this.ownerDocument = doc;
371 this.childNodes = [];
372 } // JSElement
373
374 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
375 function (e) {
376 this.childNodes.push (e);
377 e.parentNode = this;
378
379 if (e.localName == 'script') {
380 logIndentLevel++;
381 log ('Running a script: start');
382
383 var doc = this.ownerDocument || this;
384 var p = doc._parser;
385
386 // 1. Script type
387 //
388
389 // 2.1. If scripting is disabled
390 //
391 // 2.2. If the script element was created by an XML ... innerHTML ...
392 //
393 // 2.3. If the user agent does not support the scripting language ...
394 //
395 // 2.4. If the script element has its "already executed" flag set
396 if (e.manakaiAlreadyExecuted) {
397 // 2.5. Abort these steps at this point.
398 log ('Running a script: aborted');
399 logIndentLevel--;
400 return e;
401 }
402
403 // 3. Set the element's "already executed" flag.
404 e.manakaiAlreadyExecuted = true;
405
406 // 4. If the element has a src attribute, then a load for ...
407 // TODO: load an external resource
408
409 // 5. The first of the following options:
410
411 // 5.1.
412 if (/* TODO: If the document is still being parsed && */
413 e.defer && !e.async) {
414 p.scriptsExecutedAfterParsing.push (e);
415 log ('Running a script: aborted (defer)');
416 } else if (e.async && e.src != null) {
417 // TODO
418 } else if (e.async && e.src == null
419 /* && list of scripts that will execute asynchronously is not empty */) {
420 // TODO
421 } else if (e.src != null && e.manakaiParserInserted) {
422 if (p.scriptExecutedWhenParserResumes) {
423 log ('Error: There is a script that will execute as soon as the parser resumes.');
424 }
425 p.scriptExecutedWhenParserResumes = e;
426 log ('Running a script: aborted (src)');
427 } else if (e.src != null) {
428 // TODO
429 } else {
430 executeScript (doc, e); // even if other scripts are already executing.
431 }
432
433 log ('Running a script: end');
434 logIndentLevel--;
435 }
436
437 return e;
438 }; // appendChild
439
440 function executeScript (doc, e) {
441 log ('executing a script block: start');
442
443 var s;
444 if (e.src != null) {
445 s = getExternalScript (e.src);
446
447 // If the load resulted in an error, then ... firing an error event ...
448 if (s == null) {
449 log ('error event fired at the script element');
450 return;
451 }
452
453 log ('External script loaded: "' + s + '"');
454 } else {
455 s = e.text;
456 }
457
458 // If the load was successful
459 log ('load event fired at the script element');
460
461 if (true) {
462 // Scripting is enabled, Document.designMode is disabled,
463 // Document is the active document in its browsing context
464
465 parseAndRunScript (doc, s);
466 }
467
468 log ('executing a script block: end');
469 } // executeScript
470
471 function getExternalScript (uri) {
472 if (uri.match (/^javascript:/i)) {
473 var m;
474 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
475 if (m[1]) {
476 return m[1];
477 } else if (m[2]) {
478 return m[2];
479 } else {
480 return null;
481 }
482 } else {
483 log ('Complex javascript: URI is not supported: <' + uri + '>');
484 return null;
485 }
486 } else {
487 log ('URI scheme not supported: <' + uri + '>');
488 return null;
489 }
490 } // getExternalScript
491
492 function parseAndRunScript (doc, s) {
493 while (true) {
494 var matched = false;
495 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
496 matched = true;
497 var args = [];
498 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
499 args.push (v.substring (1, v.length - 1));
500 return '';
501 });
502 doc.write.apply (doc, args);
503 return '';
504 });
505 if (s == '') break;
506 if (!matched) {
507 log ('Script parse error: "' + s + '"');
508 break;
509 }
510 }
511 } // parseAndRunScript
512
513 function JSText (data) {
514 this.data = data;
515 } // JSText
516
517 JSDocument.prototype.manakaiAppendText =
518 JSElement.prototype.manakaiAppendText =
519 function (s) {
520 if (this.childNodes.length > 0 &&
521 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
522 this.childNodes[this.childNodes.length - 1].data += s;
523 } else {
524 this.childNodes.push (new JSText (s));
525 }
526 }; // manakaiAppendText
527
528 JSDocument.prototype.open = function () {
529 // Two or fewer arguments
530
531 // Step 1.
532 var type = arguments[0] || 'text/html';
533
534 // Step 2.
535 var replace = arguments[1] == 'replace';
536
537 // Step 3.
538 if (this._parser &&
539 !this._parser.scriptCreated &&
540 this._parser.in.insertionPoint != undefined) {
541 log ('document.open () in parsing mode is ignored');
542 return this;
543 }
544
545 // Step 4.
546 log ('onbeforeunload event fired');
547 log ('onunload event fired');
548
549 // Step 5.
550 if (this._parser) {
551 // Discard the parser.
552 }
553
554 // Step 6.
555 log ('document cleared by document.open ()');
556 this.childNodes = [];
557
558 // Step 7.
559 this._parser = new Parser (new InputStream (''), this);
560 this._parser.scriptCreated = true;
561
562 // Step 8.
563 this.manakaiIsHTML = true;
564
565 // Step 9.
566 // If not text/html, ...
567
568 // Step 10.
569 if (!replace) {
570 // History
571 }
572
573 // Step 11.
574 this._parser.setInsertionPoint (this._parser.in.s.length);
575
576 // Step 12.
577 return this;
578 }; // document.open
579
580 JSDocument.prototype.write = function () {
581 logIndentLevel++;
582
583 var p = this._parser;
584
585 // 1. If the insertion point is undefined, the open() method must be ...
586 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
587 this.open ();
588 p = this._parser;
589 }
590
591 // 2. ... inserted into the input stream just before the insertion point.
592 var s = Array.join (arguments, '');
593 log ('document.write: insert "' + s + '"' +
594 ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
595 p.in.s = p.in.s.substring (0, p.insertionPoint) + s
596 + p.in.s.substring (p.insertionPoint, p.in.s.length);
597 p.insertionPoint += s.length;
598
599 // 3. If there is a script that will execute as soon as the parser resumes
600 if (p.scriptExecutedAfterParserResumes) {
601 log ('document.write: processed later (there is an unprocessed <script src>)');
602 logIndentLevel--;
603 return;
604 }
605
606 // 4. Process the characters that were inserted, ...
607 var originalReentrant = p.reentrant;
608 p.reentrant = true;
609 p.parse ();
610 p.reentrant = originalReentrant;
611 // TODO: "Abort the processing of any nested invokations of the tokeniser,
612 // yielding control back to the caller." (<script> parsing). Do we need
613 // to do something here?
614
615 // 5. Return
616 log ('document.write: return');
617
618 logIndentLevel--;
619 return;
620 }; // document.write
621
622 JSElement.prototype.__defineGetter__ ('text', function () {
623 var r = '';
624 for (var i = 0; i < this.childNodes.length; i++) {
625 if (this.childNodes[i] instanceof JSText) {
626 r += this.childNodes[i].data;
627 }
628 }
629 return r;
630 });
631
632 function dumpTree (n, indent) {
633 var r = '';
634 for (var i = 0; i < n.childNodes.length; i++) {
635 var node = n.childNodes[i];
636 if (node instanceof JSElement) {
637 r += '| ' + indent + node.localName + '\n';
638 if (node.async) r += '| ' + indent + ' async=""\n';
639 if (node.defer) r += '| ' + indent + ' defer=""\n';
640 if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
641 r += dumpTree (node, indent + ' ');
642 } else if (node instanceof JSText) {
643 r += '| ' + indent + '"' + node.data + '"\n';
644 } else {
645 r += '| ' + indent + node + '\n';
646 }
647 }
648 return r;
649 } // dumpTree
650 </script>
651 </head>
652 <body onload="
653 document.sourceElement = document.getElementsByTagName ('textarea')[0];
654 document.logElement = document.getElementsByTagName ('output')[0];
655 update ();
656 ">
657 <h1>Live Scripting Parser</h1>
658
659 <h2>Markup to test
660 (<a href=data:, id=permalink rel=bookmark>permalink</a>)</h2>
661 <p>
662 <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
663 &lt;head>&lt;/head>&lt;body>
664 &lt;p>
665 &lt;script>
666 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
667 &lt;/script>
668 &lt;p>
669 </textarea>
670
671 <h2>Log</h2>
672 <p><output></output>
673
674 <!-- TODO: short description -->
675
676 <!-- TODO: permalink query -> textarea -->
677
678 <!-- TODO: multiple attributes are not supported yet -->
679
680 </body>
681 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24