/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations) (download) (as text)
Fri Apr 25 13:42:51 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.5: +103 -14 lines
File MIME type: text/html
<script src> in parsing algorithm is now supported

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 textarea {
7 display: block;
8 width: 80%;
9 margin-left: auto;
10 margin-right: auto;
11 min-height: 20em;
12 }
13 output {
14 display: block;
15 font-family: monospace;
16 white-space: -moz-pre-wrap;
17 white-space: pre-wrap;
18 }
19 </style>
20 <script>
21 function update () {
22 document.logElement.textContent = '';
23 var p = new Parser (new InputStream (document.sourceElement.value));
24 var doc = p.doc;
25 p.parse ();
26 log (dumpTree (doc, ''));
27 } // update
28
29 var logIndentLevel = 0;
30 function log (s) {
31 for (var i = 0; i < logIndentLevel; i++) {
32 s = ' ' + s;
33 }
34 document.logElement.appendChild (document.createTextNode (s + "\n"));
35 } // log
36
37 function InputStream (s) {
38 this.s = s;
39 } // InputStream
40
41 function Parser (i, doc) {
42 this.parseMode = 'pcdata';
43 if (!doc) {
44 doc = new JSDocument (this);
45 doc.manakaiIsHTML = true;
46 }
47 this.doc = doc;
48 this.openElements = [doc];
49 this.in = i;
50 this.scriptsExecutedAfterParsing = [];
51 } // Parser
52
53 Parser.prototype.getNextToken = function () {
54 var p = this;
55 var i = this.in;
56 if (this.parseMode == 'script') {
57 var token;
58 if (p.insertionPoint <= 0) {
59 return {type: 'abort'};
60 }
61 i.s = i.s.replace (/^([^<]+)/,
62 function (s, t) {
63 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
64 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
65 var ip = p.insertionPoint;
66 p.insertionPoint = 0;
67 return t.substring (ip, t.length);
68 }
69 token = {type: 'char', value: t};
70 p.insertionPoint -= t.length;
71 return '';
72 });
73 if (token) return token;
74 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
75 if (p.insertionPoint < s.length) {
76 token = {type: 'abort'};
77 return s;
78 }
79 token = {type: 'end-tag', value: 'script'};
80 p.insertionPoint -= s.length;
81 return '';
82 });
83 if (token) return token;
84 var m;
85 if ((p.insertionPoint < '</script'.length) &&
86 (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
87 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
88 if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
89 return {type: 'abort'};
90 }
91 }
92 i.s = i.s.replace (/^</,
93 function (s) {
94 token = {type: 'char', value: s};
95 p.insertionPoint -= s.length;
96 return '';
97 });
98 if (token) return token;
99 return {type: 'eof'};
100 }
101
102 var token;
103 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
104 if (p.insertionPoint < s.length ||
105 (p.insertionPoint <= s.length &&
106 s.substring (s.length - 1, 1) != '>')) {
107 token = {type: 'abort'};
108 return s;
109 }
110 token = {type: 'end-tag', value: e.toLowerCase ()};
111 p.insertionPoint -= s.length;
112 return '';
113 });
114 if (token) return token;
115 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
116 if (p.insertionPoint < s.length ||
117 (p.insertionPoint <= s.length &&
118 s.substring (s.length - 1, 1) != '>')) {
119 token = {type: 'abort'};
120 return s;
121 }
122 var tagName;
123 var attrs = {};
124 e = e.replace (/^[\S]+/, function (v) {
125 tagName = v.toLowerCase ();
126 return '';
127 });
128 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,
129 function (x, attrName, attrValue1, attrValue2, attrValue3) {
130 v = attrValue1 || attrValue2 || attrValue3;
131 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
132 .replace (/&amp;/g, '&');
133 attrs[attrName.toLowerCase ()] = v;
134 return '';
135 });
136 if (e.length) {
137 log ('Broken start tag: "' + e + '"');
138 }
139 token = {type: 'start-tag', value: tagName, attrs: attrs};
140 p.insertionPoint -= s.length;
141 return '';
142 });
143 if (token) return token;
144 if (p.insertionPoint <= 0) {
145 return {type: 'abort'};
146 }
147 i.s = i.s.replace (/^[^<]+/, function (s) {
148 if (p.insertionPoint < s.length) {
149 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
150 var ip = p.insertionPoint;
151 p.insertionPoint = 0;
152 return s.substring (ip, s.length);
153 }
154 token = {type: 'char', value: s};
155 p.insertionPoint -= s.length;
156 return '';
157 });
158 if (token) return token;
159 i.s = i.s.replace (/^[\s\S]/, function (s) {
160 token = {type: 'char', value: s};
161 p.insertionPoint -= s.length;
162 return '';
163 });
164 if (token) return token;
165 return {type: 'eof'};
166 } // getNextToken
167
168 Parser.prototype.parse = function () {
169 logIndentLevel++;
170 log ('parse: start');
171
172 while (true) {
173 var token = this.getNextToken ();
174 log ('token: ' + token.type + ' "' + token.value + '"');
175
176 if (token.type == 'start-tag') {
177 if (token.value == 'script') {
178 // 1. Create an element for the token in the HTML namespace.
179 var el = new JSElement (this.doc, token.value);
180 if (token.attrs.async != null) el.async = true;
181 if (token.attrs.defer != null) el.defer = true;
182 if (token.attrs.src != null) el.src = token.attrs.src;
183
184 // 2. Mark the element as being "parser-inserted".
185 el.manakaiParserInserted = true;
186
187 // 3. Switch the tokeniser's content model flag to the CDATA state.
188 this.parseMode = 'script';
189
190 // 4.1. Collect all the character tokens.
191 while (true) {
192 var token = this.getNextToken ();
193 log ('token: ' + token.type + ' "' + token.value + '"');
194
195 if (token.type == 'char') {
196 // 5. Append a single Text node to the script element node.
197 el.manakaiAppendText (token.value);
198
199 // 4.2. Until it returns a token that is not a character token, or
200 // until it stops tokenising.
201 } else if (token.type == 'eof' ||
202 (token.type == 'end-tag' && token.value == 'script') ||
203 token.type == 'abort') {
204 // 6. Switched back to the PCDATA state.
205 this.parseMode = 'pcdata';
206
207 // 7.1. If the next token is not an end tag token with ...
208 if (token.type != 'end-tag') {
209 // 7.2. This is a parse error.
210 log ('Parse error: no </' + 'script>');
211
212 // 7.3. Mark the script element as "already executed".
213 el.manakaiAlreadyExecuted = true;
214 } else {
215 // 7.4. Ignore it.
216 //
217 }
218 break;
219 }
220 }
221
222 // 8.1. If the parser were originally created for the ...
223 if (this.fragmentParsingMode) {
224 // 8.2. Mark the script element as "already executed" and ...
225 el.alreadyExecuted = true;
226 continue;
227 }
228
229 // 9.1. Let the old insertion point have the same value as the ...
230 var oldInsertionPoint = this.insertionPoint;
231 // 9.2. Let the insertion point be just before the next input ...
232 this.setInsertionPoint (0);
233
234 // 10. Append the new element to the current node.
235 this.openElements[this.openElements.length - 1].appendChild (el);
236
237 // 11. Let the insertion point have the value of the old ...
238 oldInsertionPoint += this.insertionPoint;
239 this.setInsertionPoint (oldInsertionPoint);
240
241 // 12. If there is a script that will execute as soon as ...
242 while (this.scriptExecutedWhenParserResumes) {
243 // 12.1. If the tree construction stage is being called reentrantly
244 if (this.reentrant) {
245 log ('parse: abort (reentrance)');
246 logIndentLevel--;
247 return;
248
249 // 12.2. Otherwise
250 } else {
251 // 1.
252 var script = this.scriptExecutedWhenParserResumes;
253 this.scriptExecutedWhenParserResumes = null;
254
255 // 2. Pause until the script has completed loading.
256 //
257
258 // 3. Let the insertion point to just before the next input char.
259 this.setInsertionPoint (0);
260
261 // 4. Execute the script.
262 executeScript (this.doc, script);
263
264 // 5. Let the insertion point be undefined again.
265 this.setInsertionPoint (undefined);
266
267 // 6. If there is once again a script that will execute ...
268 //
269 }
270 }
271 } else {
272 var el = new JSElement (this.doc, token.value);
273 this.openElements[this.openElements.length - 1].appendChild (el);
274 this.openElements.push (el);
275 }
276 } else if (token.type == 'end-tag') {
277 if (this.openElements[this.openElements.length - 1].localName ==
278 token.value) {
279 this.openElements.pop ();
280 } else {
281 log ('parse error: unmatched end tag: ' + token.value);
282 }
283 } else if (token.type == 'char') {
284 this.openElements[this.openElements.length - 1].manakaiAppendText
285 (token.value);
286 } else if (token.type == 'eof') {
287 break;
288 } else if (token.type == 'abort') {
289 log ('parse: abort');
290 logIndentLevel--;
291 return;
292 }
293 }
294
295 log ('stop parsing');
296
297 // readyState = 'interactive'
298
299 // "When a script completes loading" rules start applying.
300
301 // TODO: Handles "list of scripts that will execute as soon as possible"
302 // and "list of scripts that will execute asynchronously"
303
304 // Handle "list of scripts that will execute when the document has finished
305 // parsing".
306 var list = this.scriptsExecutedAfterParsing;
307 while (list.length > 0) {
308 // TODO: break unless completed loading
309
310 // Step 1.
311 //
312
313 // Step 2. and Step 3.
314 log ('Executing a |defer|red script...');
315 executeScript (this.doc, list.shift ());
316
317 // Step 4.
318 }
319
320 log ('DOMContentLoaded event fired');
321
322 // "delays tha load event" things has completed:
323 // readyState = 'complete'
324 log ('load event fired');
325
326 logIndentLevel--;
327 } // parse
328
329 Parser.prototype.setInsertionPoint = function (ip) {
330 if (ip == undefined || ip == null || isNaN (ip)) {
331 log ('insertion point: set to undefined');
332 this.insertionPoint = undefined;
333 } else if (ip == this.in.s.length) {
334 log ('insertion point: end of file');
335 this.insertionPoint = ip;
336 } else {
337 log ('insertion point: set to ' + ip +
338 ' (before "' + this.in.s.substring (0, 10) + '")');
339 this.insertionPoint = ip;
340 }
341 }; // setInsertionPoint
342
343 function JSDocument (p) {
344 this.childNodes = [];
345 this._parser = p;
346 } // JSDocument
347
348 function JSElement (doc, localName) {
349 this.localName = localName;
350 this.ownerDocument = doc;
351 this.childNodes = [];
352 } // JSElement
353
354 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
355 function (e) {
356 this.childNodes.push (e);
357 e.parentNode = this;
358
359 if (e.localName == 'script') {
360 logIndentLevel++;
361 log ('Running a script: start');
362
363 var doc = this.ownerDocument || this;
364 var p = doc._parser;
365
366 // 1. Script type
367 //
368
369 // 2.1. If scripting is disabled
370 //
371 // 2.2. If the script element was created by an XML ... innerHTML ...
372 //
373 // 2.3. If the user agent does not support the scripting language ...
374 //
375 // 2.4. If the script element has its "already executed" flag set
376 if (e.manakaiAlreadyExecuted) {
377 // 2.5. Abort these steps at this point.
378 log ('Running a script: aborted');
379 logIndentLevel--;
380 return e;
381 }
382
383 // 3. Set the element's "already executed" flag.
384 e.manakaiAlreadyExecuted = true;
385
386 // 4. If the element has a src attribute, then a load for ...
387 // TODO: load an external resource
388
389 // 5. The first of the following options:
390
391 // 5.1.
392 if (/* TODO: If the document is still being parsed && */
393 e.defer && !e.async) {
394 p.scriptsExecutedAfterParsing.push (e);
395 log ('Running a script: aborted (defer)');
396 } else if (e.async && e.src != null) {
397 // TODO
398 } else if (e.async && e.src == null
399 /* && list of scripts that will execute asynchronously is not empty */) {
400 // TODO
401 } else if (e.src != null && e.manakaiParserInserted) {
402 if (p.scriptExecutedWhenParserResumes) {
403 log ('Error: There is a script that will execute as soon as the parser resumes.');
404 }
405 p.scriptExecutedWhenParserResumes = e;
406 log ('Running a script: aborted (src)');
407 } else if (e.src != null) {
408 // TODO
409 } else {
410 executeScript (doc, e); // even if other scripts are already executing.
411 }
412
413 log ('Running a script: end');
414 logIndentLevel--;
415 }
416
417 return e;
418 }; // appendChild
419
420 function executeScript (doc, e) {
421 log ('executing a script block: start');
422
423 var s;
424 if (e.src != null) {
425 s = getExternalScript (e.src);
426
427 // If the load resulted in an error, then ... firing an error event ...
428 if (s == null) {
429 log ('error event fired at the script element');
430 return;
431 }
432
433 log ('External script loaded: "' + s + '"');
434 } else {
435 s = e.text;
436 }
437
438 // If the load was successful
439 log ('load event fired at the script element');
440
441 if (true) {
442 // Scripting is enabled, Document.designMode is disabled,
443 // Document is the active document in its browsing context
444
445 parseAndRunScript (doc, s);
446 }
447
448 log ('executing a script block: end');
449 } // executeScript
450
451 function getExternalScript (uri) {
452 if (uri.match (/^javascript:/i)) {
453 var m;
454 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
455 if (m[1]) {
456 return m[1];
457 } else if (m[2]) {
458 return m[2];
459 } else {
460 return null;
461 }
462 } else {
463 log ('Complex javascript: URI is not supported: <' + uri + '>');
464 return null;
465 }
466 } else {
467 log ('URI scheme not supported: <' + uri + '>');
468 return null;
469 }
470 } // getExternalScript
471
472 function parseAndRunScript (doc, s) {
473 while (true) {
474 var matched = false;
475 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
476 matched = true;
477 var args = [];
478 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
479 args.push (v.substring (1, v.length - 1));
480 return '';
481 });
482 doc.write.apply (doc, args);
483 return '';
484 });
485 if (s == '') break;
486 if (!matched) {
487 log ('Script parse error: "' + s + '"');
488 break;
489 }
490 }
491 } // parseAndRunScript
492
493 function JSText (data) {
494 this.data = data;
495 } // JSText
496
497 JSDocument.prototype.manakaiAppendText =
498 JSElement.prototype.manakaiAppendText =
499 function (s) {
500 if (this.childNodes.length > 0 &&
501 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
502 this.childNodes[this.childNodes.length - 1].data += s;
503 } else {
504 this.childNodes.push (new JSText (s));
505 }
506 }; // manakaiAppendText
507
508 JSDocument.prototype.open = function () {
509 // Two or fewer arguments
510
511 // Step 1.
512 var type = arguments[0] || 'text/html';
513
514 // Step 2.
515 var replace = arguments[1] == 'replace';
516
517 // Step 3.
518 if (this._parser &&
519 !this._parser.scriptCreated &&
520 this._parser.in.insertionPoint != undefined) {
521 log ('document.open () in parsing mode is ignored');
522 return this;
523 }
524
525 // Step 4.
526 log ('onbeforeunload event fired');
527 log ('onunload event fired');
528
529 // Step 5.
530 if (this._parser) {
531 // Discard the parser.
532 }
533
534 // Step 6.
535 log ('document cleared by document.open ()');
536 this.childNodes = [];
537
538 // Step 7.
539 this._parser = new Parser (new InputStream (''), this);
540 this._parser.scriptCreated = true;
541
542 // Step 8.
543 this.manakaiIsHTML = true;
544
545 // Step 9.
546 // If not text/html, ...
547
548 // Step 10.
549 if (!replace) {
550 // History
551 }
552
553 // Step 11.
554 this._parser.setInsertionPoint (this._parser.in.s.length);
555
556 // Step 12.
557 return this;
558 }; // document.open
559
560 JSDocument.prototype.write = function () {
561 logIndentLevel++;
562
563 var p = this._parser;
564
565 // 1. If the insertion point is undefined, the open() method must be ...
566 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
567 this.open ();
568 p = this._parser;
569 }
570
571 // 2. ... inserted into the input stream just before the insertion point.
572 var s = Array.join (arguments, '');
573 log ('document.write: insert "' + s + '"' +
574 ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
575 p.in.s = p.in.s.substring (0, p.insertionPoint) + s
576 + p.in.s.substring (p.insertionPoint, p.in.s.length);
577 p.insertionPoint += s.length;
578
579 // 3. If there is a script that will execute as soon as the parser resumes
580 if (p.scriptExecutedAfterParserResumes) {
581 log ('document.write: processed later (there is an unprocessed <script src>)');
582 logIndentLevel--;
583 return;
584 }
585
586 // 4. Process the characters that were inserted, ...
587 var originalReentrant = p.reentrant;
588 p.reentrant = true;
589 p.parse ();
590 p.reentrant = originalReentrant;
591 // TODO: "Abort the processing of any nested invokations of the tokeniser,
592 // yielding control back to the caller." (<script> parsing). Do we need
593 // to do something here?
594
595 // 5. Return
596 log ('document.write: return');
597
598 logIndentLevel--;
599 return;
600 }; // document.write
601
602 JSElement.prototype.__defineGetter__ ('text', function () {
603 var r = '';
604 for (var i = 0; i < this.childNodes.length; i++) {
605 if (this.childNodes[i] instanceof JSText) {
606 r += this.childNodes[i].data;
607 }
608 }
609 return r;
610 });
611
612 function dumpTree (n, indent) {
613 var r = '';
614 for (var i = 0; i < n.childNodes.length; i++) {
615 var node = n.childNodes[i];
616 if (node instanceof JSElement) {
617 r += '| ' + indent + node.localName + '\n';
618 if (node.async) r += '| ' + indent + ' async=""\n';
619 if (node.defer) r += '| ' + indent + ' defer=""\n';
620 if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
621 r += dumpTree (node, indent + ' ');
622 } else if (node instanceof JSText) {
623 r += '| ' + indent + '"' + node.data + '"\n';
624 } else {
625 r += '| ' + indent + node + '\n';
626 }
627 }
628 return r;
629 } // dumpTree
630 </script>
631 </head>
632 <body onload="
633 document.sourceElement = document.getElementsByTagName ('textarea')[0];
634 document.logElement = document.getElementsByTagName ('output')[0];
635 update ();
636 ">
637
638 <textarea onchange=" update () ">&lt;html>
639 &lt;head>&lt;/head>&lt;body>
640 &lt;p>
641 &lt;script>
642 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
643 &lt;/script>
644 &lt;p>
645 </textarea>
646
647 <output></output>
648
649 </body>
650 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24