/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations) (download) (as text)
Fri Apr 25 11:40:56 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +17 -4 lines
File MIME type: text/html
Bug fix

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 textarea {
7 display: block;
8 width: 80%;
9 margin-left: auto;
10 margin-right: auto;
11 min-height: 20em;
12 }
13 output {
14 display: block;
15 font-family: monospace;
16 white-space: -moz-pre-wrap;
17 white-space: pre-wrap;
18 }
19 </style>
20 <script>
21 function update () {
22 document.logElement.textContent = '';
23 var p = new Parser (new InputStream (document.sourceElement.value));
24 var doc = p.doc;
25 p.parse ();
26 log (dumpTree (doc, ''));
27 } // update
28
29 function log (s) {
30 document.logElement.appendChild (document.createTextNode (s + "\n"));
31 } // log
32
33 function InputStream (s) {
34 this.s = s;
35 } // InputStream
36
37 function Parser (i, doc) {
38 this.parseMode = 'pcdata';
39 if (!doc) {
40 doc = new JSDocument (this);
41 doc.manakaiIsHTML = true;
42 }
43 this.doc = doc;
44 this.openElements = [doc];
45 this.in = i;
46 this.scriptsExecutedAfterParsing = [];
47 } // Parser
48
49 Parser.prototype.getNextToken = function () {
50 var p = this;
51 var i = this.in;
52 if (this.parseMode == 'script') {
53 var token;
54 if (p.insertionPoint <= 0) {
55 return {type: 'abort'};
56 }
57 i.s = i.s.replace (/^([^<]+)/,
58 function (s, t) {
59 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
60 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
61 var ip = p.insertionPoint;
62 p.insertionPoint = 0;
63 return t.substring (ip, t.length);
64 }
65 token = {type: 'char', value: t};
66 p.insertionPoint -= t.length;
67 return '';
68 });
69 if (token) return token;
70 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
71 if (p.insertionPoint < s.length) {
72 token = {type: 'abort'};
73 return s;
74 }
75 token = {type: 'end-tag', value: 'script'};
76 p.insertionPoint -= s.length;
77 return '';
78 });
79 if (token) return token;
80 var m;
81 if ((p.insertionPoint < '</script'.length) &&
82 (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
83 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
84 if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
85 return {type: 'abort'};
86 }
87 }
88 i.s = i.s.replace (/^</,
89 function (s) {
90 token = {type: 'char', value: s};
91 p.insertionPoint -= s.length;
92 return '';
93 });
94 if (token) return token;
95 return {type: 'eof'};
96 }
97
98 var token;
99 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
100 if (p.insertionPoint < s.length ||
101 (p.insertionPoint <= s.length &&
102 s.substring (s.length - 1, 1) != '>')) {
103 token = {type: 'abort'};
104 return s;
105 }
106 token = {type: 'end-tag', value: e.toLowerCase ()};
107 p.insertionPoint -= s.length;
108 return '';
109 });
110 if (token) return token;
111 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
112 if (p.insertionPoint < s.length ||
113 (p.insertionPoint <= s.length &&
114 s.substring (s.length - 1, 1) != '>')) {
115 token = {type: 'abort'};
116 return s;
117 }
118 var tagName;
119 var attrs = {};
120 e = e.replace (/^[\S]+/, function (v) {
121 tagName = v.toLowerCase ();
122 return '';
123 });
124 e = e.replace (/^\s*(\S+)\s*(?:=\s*"([^"]*)"|'([^']*)'|([^"']+))?/,
125 function (x, attrName, attrValue1, attrValue2, attrValue3) {
126 attrs[attrName] = attrValue1 || attrValue2 || attrValue3;
127 return '';
128 });
129 token = {type: 'start-tag', value: tagName, attrs: attrs};
130 p.insertionPoint -= s.length;
131 return '';
132 });
133 if (token) return token;
134 if (p.insertionPoint <= 0) {
135 return {type: 'abort'};
136 }
137 i.s = i.s.replace (/^[^<]+/, function (s) {
138 if (p.insertionPoint < s.length) {
139 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
140 var ip = p.insertionPoint;
141 p.insertionPoint = 0;
142 return s.substring (ip, s.length);
143 }
144 token = {type: 'char', value: s};
145 p.insertionPoint -= s.length;
146 return '';
147 });
148 if (token) return token;
149 i.s = i.s.replace (/^[\s\S]/, function (s) {
150 token = {type: 'char', value: s};
151 p.insertionPoint -= s.length;
152 return '';
153 });
154 if (token) return token;
155 return {type: 'eof'};
156 } // getNextToken
157
158 Parser.prototype.parse = function () {
159 log ('start parsing');
160
161 while (true) {
162 var token = this.getNextToken ();
163 log ('token: ' + token.type + ' "' + token.value + '"');
164
165 if (token.type == 'start-tag') {
166 if (token.value == 'script') {
167 // 1. Create an element for the token in the HTML namespace.
168 var el = new JSElement (this.doc, token.value);
169 if (token.attrs.async != null) el.async = true;
170 if (token.attrs.defer != null) el.defer = true;
171 if (token.attrs.src != null) el.src = token.attrs.src;
172
173 // 2. Mark the element as being "parser-inserted".
174 el.manakaiParserInserted = true;
175
176 // 3. Switch the tokeniser's content model flag to the CDATA state.
177 this.parseMode = 'script';
178
179 // 4.1. Collect all the character tokens.
180 while (true) {
181 var token = this.getNextToken ();
182 log ('token: ' + token.type + ' "' + token.value + '"');
183
184 if (token.type == 'char') {
185 // 5. Append a single Text node to the script element node.
186 el.manakaiAppendText (token.value);
187
188 // 4.2. Until it returns a token that is not a character token, or
189 // until it stops tokenising.
190 } else if (token.type == 'eof' ||
191 (token.type == 'end-tag' && token.value == 'script') ||
192 token.type == 'abort') {
193 // 6. Switched back to the PCDATA state.
194 this.parseMode = 'pcdata';
195
196 // 7.1. If the next token is not an end tag token with ...
197 if (token.type != 'end-tag') {
198 // 7.2. This is a parse error.
199 log ('Parse error: no </' + 'script>');
200
201 // 7.3. Mark the script element as "already executed".
202 el.manakaiAlreadyExecuted = true;
203 } else {
204 // 7.4. Ignore it.
205 //
206 }
207 break;
208 }
209 }
210
211 // 8.1. If the parser were originally created for the ...
212 if (this.fragmentParsingMode) {
213 // 8.2. Mark the script element as "already executed" and ...
214 el.alreadyExecuted = true;
215 continue;
216 }
217
218 // 9.1. Let the old insertion point have the same value as the ...
219 var oldInsertionPoint = this.insertionPoint;
220 // 9.2. Let the insertion point be just before the next input ...
221 this.setInsertionPoint (0);
222
223 // 10. Append the new element to the current node.
224 this.openElements[this.openElements.length - 1].appendChild (el);
225
226 // 11. Let the insertion point have the value of the old ...
227 oldInsertionPoint += this.insertionPoint;
228 this.setInsertionPoint (oldInsertionPoint);
229
230 // 12. If there is a script that will execute as soon as ...
231
232
233 } else {
234 var el = new JSElement (this.doc, token.value);
235 this.openElements[this.openElements.length - 1].appendChild (el);
236 this.openElements.push (el);
237 }
238 } else if (token.type == 'end-tag') {
239 if (this.openElements[this.openElements.length - 1].localName ==
240 token.value) {
241 this.openElements.pop ();
242 } else {
243 log ('parse error: unmatched end tag: ' + token.value);
244 }
245 } else if (token.type == 'char') {
246 this.openElements[this.openElements.length - 1].manakaiAppendText
247 (token.value);
248 } else if (token.type == 'eof') {
249 break;
250 } else if (token.type == 'abort') {
251 log ('parse: abort');
252 return;
253 }
254 }
255
256 log ('stop parsing');
257
258 // readyState = 'interactive'
259
260 // "When a script completes loading" rules start applying.
261
262 // TODO: Handles "list of scripts that will execute as soon as possible"
263 // and "list of scripts that will execute asynchronously"
264
265 // Handle "list of scripts that will execute when the document has finished
266 // parsing".
267 var list = this.scriptsExecutedAfterParsing;
268 while (list.length > 0) {
269 // TODO: break unless completed loading
270
271 // Step 1.
272 //
273
274 // Step 2. and Step 3.
275 log ('Executing a |defer|red script...');
276 executeScript (this.doc, list.shift ());
277
278 // Step 4.
279 }
280
281 log ('DOMContentLoaded event fired');
282
283 // "delays tha load event" things has completed:
284 // readyState = 'complete'
285 log ('load event fired');
286 } // parse
287
288 Parser.prototype.setInsertionPoint = function (ip) {
289 if (ip == undefined || ip == null || isNaN (ip)) {
290 log ('insertion point: set to undefined');
291 this.insertionPoint = undefined;
292 } else if (ip == this.in.s.length) {
293 log ('insertion point: end of file');
294 this.insertionPoint = ip;
295 } else {
296 log ('insertion point: set to ' + ip +
297 ' (before "' + this.in.s.substring (0, 10) + '")');
298 this.insertionPoint = ip;
299 }
300 }; // setInsertionPoint
301
302 function JSDocument (p) {
303 this.childNodes = [];
304 this._parser = p;
305 } // JSDocument
306
307 function JSElement (doc, localName) {
308 this.localName = localName;
309 this.ownerDocument = doc;
310 this.childNodes = [];
311 } // JSElement
312
313 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
314 function (e) {
315 this.childNodes.push (e);
316 e.parentNode = this;
317
318 if (e.localName == 'script') {
319 log ('Running a script: start');
320
321 var doc = this.ownerDocument || this;
322 var p = doc._parser;
323
324 // 1. Script type
325 //
326
327 // 2.1. If scripting is disabled
328 //
329 // 2.2. If the script element was created by an XML ... innerHTML ...
330 //
331 // 2.3. If the user agent does not support the scripting language ...
332 //
333 // 2.4. If the script element has its "already executed" flag set
334 if (e.manakaiAlreadyExecuted) {
335 // 2.5. Abort these steps at this point.
336 log ('Running a script: aborted');
337 return e;
338 }
339
340 // 3. Set the element's "already executed" flag.
341 e.manakaiAlreadyExecuted = true;
342
343 // 4. If the element has a src attribute, then a load for ...
344 // TODO: load an external resource
345
346 // 5. The first of the following options:
347
348 // 5.1.
349 if (/* TODO: If the document is still being parsed && */
350 e.defer && !e.async) {
351 p.scriptsExecutedAfterParsing.push (e);
352 log ('Running a script: aborted (defer)');
353 } else if (e.async && e.src != null) {
354 // TODO
355 } else if (e.async && e.src == null
356 /* && list of scripts that will execute asynchronously is not empty */) {
357 // TODO
358 } else if (e.src != null && e.manakaiParserInserted) {
359 // TODO
360 } else if (e.src != null) {
361 // TODO
362 } else {
363 executeScript (doc, e); // even if other scripts are already executing.
364 }
365
366 log ('Running a script: end');
367 }
368
369 return e;
370 }; // appendChild
371
372 function executeScript (doc, e) {
373 log ('executing a script block: start');
374
375 // If the load resulted in an error, then ... firing an error event ...
376
377 // If the load was successful
378 log ('load event fired at the script element');
379
380 if (true) {
381 // Scripting is enabled, Document.designMode is disabled,
382 // Document is the active document in its browsing context
383
384 var s;
385 if (e.src != null) {
386 // TODO: from external file
387 } else {
388 s = e.text;
389 }
390
391 parseAndRunScript (doc, s);
392 }
393
394 log ('executing a script block: end');
395 } // executeScript
396
397 function parseAndRunScript (doc, s) {
398 while (true) {
399 var matched = false;
400 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
401 matched = true;
402 var args = [];
403 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
404 args.push (v.substring (1, v.length - 1));
405 return '';
406 });
407 doc.write.apply (doc, args);
408 return '';
409 });
410 if (s == '') break;
411 if (!matched) {
412 log ('Script parse error: "' + s + '"');
413 break;
414 }
415 }
416 } // parseAndRunScript
417
418 function JSText (data) {
419 this.data = data;
420 } // JSText
421
422 JSDocument.prototype.manakaiAppendText =
423 JSElement.prototype.manakaiAppendText =
424 function (s) {
425 if (this.childNodes.length > 0 &&
426 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
427 this.childNodes[this.childNodes.length - 1].data += s;
428 } else {
429 this.childNodes.push (new JSText (s));
430 }
431 }; // manakaiAppendText
432
433 JSDocument.prototype.open = function () {
434 // Two or fewer arguments
435
436 // Step 1.
437 var type = arguments[0] || 'text/html';
438
439 // Step 2.
440 var replace = arguments[1] == 'replace';
441
442 // Step 3.
443 if (this._parser &&
444 !this._parser.scriptCreated &&
445 this._parser.in.insertionPoint != undefined) {
446 log ('document.open () in parsing mode is ignored');
447 return this;
448 }
449
450 // Step 4.
451 log ('onbeforeunload event fired');
452 log ('onunload event fired');
453
454 // Step 5.
455 if (this._parser) {
456 // Discard the parser.
457 }
458
459 // Step 6.
460 log ('document cleared by document.open ()');
461 this.childNodes = [];
462
463 // Step 7.
464 this._parser = new Parser (new InputStream (''), this);
465 this._parser.scriptCreated = true;
466
467 // Step 8.
468 this.manakaiIsHTML = true;
469
470 // Step 9.
471 // If not text/html, ...
472
473 // Step 10.
474 if (!replace) {
475 // History
476 }
477
478 // Step 11.
479 this._parser.setInsertionPoint (this._parser.in.s.length);
480
481 // Step 12.
482 return this;
483 }; // document.open
484
485 JSDocument.prototype.write = function () {
486 var p = this._parser;
487
488 // 1. If the insertion point is undefined, the open() method must be ...
489 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
490 this.open ();
491 p = this._parser;
492 }
493
494 // 2. ... inserted into the input stream just before the insertion point.
495 var s = Array.join (arguments, '');
496 log ('document.write: insert "' + s + '"' +
497 ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
498 p.in.s = p.in.s.substring (0, p.insertionPoint) + s
499 + p.in.s.substring (p.insertionPoint, p.in.s.length);
500 p.insertionPoint += s.length;
501
502 // 3. If there is a script that will execute as soon as the parser resumes
503 // TODO
504
505 // 4. Process the characters that were inserted, ...
506 p.parse ();
507
508 // 5. Return
509 log ('document.write: return');
510 return;
511 }; // document.write
512
513 JSElement.prototype.__defineGetter__ ('text', function () {
514 var r = '';
515 for (var i = 0; i < this.childNodes.length; i++) {
516 if (this.childNodes[i] instanceof JSText) {
517 r += this.childNodes[i].data;
518 }
519 }
520 return r;
521 });
522
523 function dumpTree (n, indent) {
524 var r = '';
525 for (var i = 0; i < n.childNodes.length; i++) {
526 var node = n.childNodes[i];
527 if (node instanceof JSElement) {
528 r += '| ' + indent + node.localName + '\n';
529 if (node.async) r += '| ' + indent + ' async=""\n';
530 if (node.defer) r += '| ' + indent + ' defer=""\n';
531 if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
532 r += dumpTree (node, indent + ' ');
533 } else if (node instanceof JSText) {
534 r += '| ' + indent + '"' + node.data + '"\n';
535 } else {
536 r += '| ' + indent + node + '\n';
537 }
538 }
539 return r;
540 } // dumpTree
541 </script>
542 </head>
543 <body onload="
544 document.sourceElement = document.getElementsByTagName ('textarea')[0];
545 document.logElement = document.getElementsByTagName ('output')[0];
546 update ();
547 ">
548
549 <textarea onchange=" update () ">&lt;html>
550 &lt;head>&lt;/head>&lt;body>
551 &lt;p>
552 &lt;script>
553 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
554 &lt;/script>
555 &lt;p>
556 </textarea>
557
558 <output></output>
559
560 </body>
561 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24