/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (show annotations) (download) (as text)
Tue Apr 29 04:07:18 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +33 -3 lines
File MIME type: text/html
Allow CDATA end tag on or after an insertion point since the spec can be interpreted such that this is allowed

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Live Scripting HTML Parser</title>
5 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6 <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7 title="GNU GPL2 or later">
8 <style>
9 h1 {
10 margin: 0;
11 font-size: 150%;
12 }
13 h2 {
14 margin: 0;
15 font-size: 100%;
16 }
17 p {
18 margin: 0 1em;
19 }
20 textarea {
21 width: 100%;
22 -width: 99%;
23 height: 10em;
24 }
25 output {
26 display: block;
27 font-family: monospace;
28 white-space: -moz-pre-wrap;
29 white-space: pre-wrap;
30 }
31 </style>
32 <script>
33 var delayedUpdater = 0;
34
35 function update () {
36 if (delayedUpdater) {
37 clearTimeout (delayedUpdater);
38 delayedUpdater = 0;
39 }
40 delayedUpdater = setTimeout (update2, 100);
41 } // update
42
43 function update2 () {
44 var v = document.sourceElement.value;
45 if (v != document.previousSourceText) {
46 document.previousSourceText = v;
47 document.links['permalink'].href
48 = location.pathname + '?s=' + encodeURIComponent (v);
49 document.links['ldvlink'].href
50 = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51 + encodeURIComponent (v);
52
53 document.logElement.textContent = '';
54 var p = new Parser (new InputStream (v));
55 var doc = p.doc;
56 p.parse ();
57
58 log (dumpTree (doc, ''));
59
60 if (p.hasAsyncScript) {
61 log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62 }
63 }
64 } // update2
65
66 var logIndentLevel = 0;
67 function log (s) {
68 var indent = '';
69 for (var i = 0; i < logIndentLevel; i++) {
70 indent += ' ';
71 }
72 s = indent + s.replace (/\n/g, "\n" + indent);
73 document.logElement.appendChild (document.createTextNode (s + "\n"));
74 } // log
75
76 function InputStream (s) {
77 this.s = s;
78 } // InputStream
79
80 function Parser (i, doc) {
81 this.parseMode = 'pcdata';
82 if (!doc) {
83 doc = new JSDocument (this);
84 doc.manakaiIsHTML = true;
85 }
86 this.nextToken = [];
87 this.doc = doc;
88 this.openElements = [doc];
89 this.input = i;
90 this.scriptsExecutedAfterParsing = [];
91 this.scriptsExecutedSoon = [];
92 this.scriptsExecutedAsynchronously = [];
93 } // Parser
94
95 Parser.prototype.getNextToken = function () {
96 if (this.nextToken.length) {
97 return this.nextToken.shift ();
98 }
99
100 var p = this;
101 var i = this.input;
102 if (this.parseMode == 'cdata') {
103 var tagName = this.endTagName;
104 var token;
105 if (p.insertionPoint <= 0) {
106 return {type: 'abort'};
107 }
108 i.s = i.s.replace (/^([^<]+)/,
109 function (s, t) {
110 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112 var ip = p.insertionPoint;
113 p.insertionPoint = 0;
114 return t.substring (ip, t.length);
115 }
116 token = {type: 'char', value: t};
117 p.insertionPoint -= t.length;
118 return '';
119 });
120 if (token) return token;
121 var pattern = new RegExp ('^</' + tagName + '>', 'i');
122 i.s = i.s.replace (pattern, function (s) {
123 if (p.insertionPoint < s.length) {
124 token = {type: 'abort'};
125 return s;
126 }
127 token = {type: 'end-tag', value: tagName};
128 p.insertionPoint -= s.length;
129 return '';
130 });
131 if (token) return token;
132 var m;
133 if ((p.insertionPoint < ('</' + tagName).length) &&
134 (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137 return {type: 'abort'};
138 }
139 }
140 i.s = i.s.replace (/^</,
141 function (s) {
142 token = {type: 'char', value: s};
143 p.insertionPoint -= s.length;
144 return '';
145 });
146 if (token) return token;
147 return {type: 'eof'};
148 }
149
150 var token;
151 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152 if (p.insertionPoint < s.length ||
153 (p.insertionPoint <= s.length &&
154 s.substring (s.length - 1, s.length) != '>')) {
155 token = {type: 'abort'};
156 return s;
157 }
158 token = {type: 'end-tag', value: e.toLowerCase ()};
159 p.insertionPoint -= s.length;
160 return '';
161 });
162 if (token) return token;
163 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164 if (p.insertionPoint < s.length ||
165 (p.insertionPoint <= s.length &&
166 s.substring (s.length - 1, s.length) != '>')) {
167 token = {type: 'abort'};
168 return s;
169 }
170 var tagName;
171 var attrs = {};
172 e = e.replace (/^[\S]+/, function (v) {
173 tagName = v.toLowerCase ();
174 return '';
175 });
176 while (true) {
177 var m = false;
178 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179 function (x, attrName, attrValue1, attrValue2, attrValue3) {
180 v = attrValue1 || attrValue2 || attrValue3;
181 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182 .replace (/&amp;/g, '&');
183 attrs[attrName.toLowerCase ()] = v;
184 m = true;
185 return '';
186 });
187 if (!m) break;
188 }
189 if (e.length) {
190 log ('Broken start tag: "' + e + '"');
191 }
192 token = {type: 'start-tag', value: tagName, attrs: attrs};
193 p.insertionPoint -= s.length;
194 return '';
195 });
196 if (token) return token;
197 if (p.insertionPoint <= 0) {
198 return {type: 'abort'};
199 }
200 i.s = i.s.replace (/^[^<]+/, function (s) {
201 if (p.insertionPoint < s.length) {
202 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203 var ip = p.insertionPoint;
204 p.insertionPoint = 0;
205 return s.substring (ip, s.length);
206 }
207 token = {type: 'char', value: s};
208 p.insertionPoint -= s.length;
209 return '';
210 });
211 if (token) return token;
212 i.s = i.s.replace (/^[\s\S]/, function (s) {
213 token = {type: 'char', value: s};
214 p.insertionPoint -= s.length;
215 return '';
216 });
217 if (token) return token;
218 return {type: 'eof'};
219 } // getNextToken
220
221 Parser.prototype.parse = function () {
222 logIndentLevel++;
223 log ('parse: start');
224
225 while (true) {
226 var token = this.getNextToken ();
227 log ('token: ' + token.type + ' "' + token.value + '"');
228
229 if (this.cdataEndTagRequired) {
230 // Generic CDATA parsing algorithm
231
232 if (token.type != 'abort') {
233 // 7.
234 if (token.type == 'end-tag' && token.value == this.endTagName) {
235 // 7.1. Ignores it.
236 //
237 } else {
238 // 7.2. Parse error.
239 log ('Parse error: no </' + this.endTagName + '>');
240 this.nextToken.unshift (token);
241 }
242 this.cdataEndTagRequired = false;
243 continue;
244 }
245 }
246
247 if (token.type == 'start-tag') {
248 if (token.value == 'script') {
249 // 1. Create an element for the token in the HTML namespace.
250 var el = new JSElement (this.doc, token.value);
251 if (token.attrs.async != null) el.async = true;
252 if (token.attrs.defer != null) el.defer = true;
253 if (token.attrs.src != null) el.src = token.attrs.src;
254
255 // 2. Mark the element as being "parser-inserted".
256 el.manakaiParserInserted = true;
257
258 // 3. Switch the tokeniser's content model flag to the CDATA state.
259 this.parseMode = 'cdata';
260 this.endTagName = 'script';
261
262 // 4.1. Collect all the character tokens.
263 while (true) {
264 var token = this.getNextToken ();
265 log ('token: ' + token.type + ' "' + token.value + '"');
266
267 if (token.type == 'char') {
268 // 5. Append a single Text node to the script element node.
269 el.manakaiAppendText (token.value);
270
271 // 4.2. Until it returns a token that is not a character token, or
272 // until it stops tokenising.
273 } else if (token.type == 'eof' ||
274 token.type == 'end-tag' ||
275 token.type == 'abort') {
276 // 6. Switched back to the PCDATA state.
277 this.parseMode = 'pcdata';
278
279 // 7.1. If the next token is not an end tag token with ...
280 if (!(token.type == 'end-tag' && token.value == 'script')) {
281 // 7.2. This is a parse error.
282 log ('Parse error: no </' + 'script>');
283 this.nextToken.unshift (token);
284
285 // 7.3. Mark the script element as "already executed".
286 el.manakaiAlreadyExecuted = true;
287 } else {
288 // 7.4. Ignore it.
289 //
290 }
291 break;
292 }
293 }
294
295 // 8.1. If the parser were originally created for the ...
296 if (this.fragmentParsingMode) {
297 // 8.2. Mark the script element as "already executed" and ...
298 el.alreadyExecuted = true;
299 continue;
300 }
301
302 // 9.1. Let the old insertion point have the same value as the ...
303 var oldInsertionPoint = this.insertionPoint;
304 // 9.2. Let the insertion point be just before the next input ...
305 this.setInsertionPoint (0);
306
307 // 10. Append the new element to the current node.
308 this.openElements[this.openElements.length - 1].appendChild (el);
309
310 // 11. Let the insertion point have the value of the old ...
311
312 oldInsertionPoint += this.insertionPoint;
313 this.setInsertionPoint (oldInsertionPoint);
314
315 // 12. If there is a script that will execute as soon as ...
316 while (this.scriptExecutedWhenParserResumes) {
317 // 12.1. If the tree construction stage is being called reentrantly
318 if (this.reentrant) {
319 log ('parse: abort (reentrance)');
320 logIndentLevel--;
321 return;
322
323 // 12.2. Otherwise
324 } else {
325 // 1.
326 var script = this.scriptExecutedWhenParserResumes;
327 this.scriptExecutedWhenParserResumes = null;
328
329 // 2. Pause until the script has completed loading.
330 //
331
332 // 3. Let the insertion point to just before the next input char.
333 this.setInsertionPoint (0);
334
335 // 4. Execute the script.
336 executeScript (this.doc, script);
337
338 // 5. Let the insertion point be undefined again.
339 this.setInsertionPoint (undefined);
340
341 // 6. If there is once again a script that will execute ...
342 //
343 }
344 }
345 } else if (token.value == 'style' ||
346 token.value == 'noscript' ||
347 token.value == 'xmp') {
348 // 1. Create an element for the token in the HTML namespace.
349 var el = new JSElement (this.doc, token.value);
350
351 // 2. Append the new element to the current node.
352 this.openElements[this.openElements.length - 1].appendChild (el);
353
354 // 3. Switch the tokeniser's content model flag to the CDATA state.
355 this.parseMode = 'cdata';
356 this.endTagName = token.value;
357
358 // 4.1. Collect all the character tokens.
359 while (true) {
360 var token = this.getNextToken ();
361 log ('token: ' + token.type + ' "' + token.value + '"');
362
363 if (token.type == 'char') {
364 // 5. Append a single Text node to the script element node.
365 el.manakaiAppendText (token.value);
366
367 // 4.2. Until it returns a token that is not a character token, or
368 // until it stops tokenising.
369 } else if (token.type == 'eof' ||
370 token.type == 'end-tag' ||
371 token.type == 'abort') {
372 // 6. Switched back to the PCDATA state.
373 this.parseMode = 'pcdata';
374
375 if (token.type == 'abort') {
376 this.cdataEndTagRequired = true;
377 break;
378 }
379
380 // 7.1. If the next token is not an end tag token with ...
381 if (!(token.type == 'end-tag' &&
382 token.value == this.endTagName)) {
383 // 7.2. This is a parse error.
384 log ('Parse error: no </' + this.endTagName + '>');
385 this.nextToken.unshift (token);
386
387 // 7.3. Mark the script element as "already executed".
388 el.manakaiAlreadyExecuted = true;
389 } else {
390 // 7.4. Ignore it.
391 //
392 }
393 break;
394 }
395 }
396 } else {
397 var el = new JSElement (this.doc, token.value);
398 this.openElements[this.openElements.length - 1].appendChild (el);
399 this.openElements.push (el);
400 }
401 } else if (token.type == 'end-tag') {
402 if (this.openElements[this.openElements.length - 1].localName ==
403 token.value) {
404 this.openElements.pop ();
405 } else {
406 log ('parse error: unmatched end tag: ' + token.value);
407 }
408 } else if (token.type == 'char') {
409 this.openElements[this.openElements.length - 1].manakaiAppendText
410 (token.value);
411 } else if (token.type == 'eof') {
412 break;
413 } else if (token.type == 'abort') {
414 log ('parse: abort');
415 logIndentLevel--;
416 return;
417 }
418 }
419
420 log ('stop parsing');
421
422 // readyState = 'interactive'
423
424 // "When a script completes loading" rules start applying.
425
426 while (this.scriptsExecutedSoon.length > 0 ||
427 this.scriptsExecutedAsynchronously.length > 0) {
428 // Handle "list of scripts that will execute as soon as possible".
429 while (this.scriptsExecutedSoon.length > 0) {
430 var e = this.scriptsExecutedSoon.shift ();
431
432 // If it has completed loading
433 log ('Execute an external script not inserted by parser...');
434 executeScript (this.doc, e);
435
436 // NOTE: It MAY be executed before the end of the parsing, according
437 // to the spec.
438 this.hasAsyncScript = true;
439 }
440
441 // Handle "list of scripts that will execute asynchronously".
442 while (this.scriptsExecutedAsynchronously.length > 0) {
443 var e = this.scriptsExecutedAsynchronously.shift ();
444
445 // Step 1.
446 // We assume that all scripts have been loaded at this time.
447
448 // Step 2.
449 log ('Execute an asynchronous script...');
450 executeScript (this.doc, e);
451
452 // Step 3.
453 //
454
455 // Step 4.
456 //
457
458 this.hasAsyncScript = true;
459 }
460 }
461
462 // Handle "list of scripts that will execute when the document has finished
463 // parsing".
464 var list = this.scriptsExecutedAfterParsing;
465 while (list.length > 0) {
466 // TODO: break unless completed loading
467
468 // Step 1.
469 //
470
471 // Step 2. and Step 3.
472 log ('Executing a |defer|red script...');
473 executeScript (this.doc, list.shift ());
474
475 // Step 4.
476 }
477
478 log ('DOMContentLoaded event fired');
479
480 // "delays the load event" things has completed:
481 // readyState = 'complete'
482 log ('load event fired');
483
484 logIndentLevel--;
485 } // parse
486
487 Parser.prototype.setInsertionPoint = function (ip) {
488 if (ip == undefined || ip == null || isNaN (ip)) {
489 log ('insertion point: set to undefined');
490 this.insertionPoint = undefined;
491 } else if (ip == this.input.s.length) {
492 log ('insertion point: end of file');
493 this.insertionPoint = ip;
494 } else {
495 log ('insertion point: set to ' + ip +
496 ' (before "' + this.input.s.substring (0, 10) + '")');
497 this.insertionPoint = ip;
498 }
499 }; // setInsertionPoint
500
501 function JSDocument (p) {
502 this.childNodes = [];
503 this._parser = p;
504 } // JSDocument
505
506 function JSElement (doc, localName) {
507 this.localName = localName;
508 this.ownerDocument = doc;
509 this.childNodes = [];
510 } // JSElement
511
512 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
513 function (e) {
514 this.childNodes.push (e);
515 e.parentNode = this;
516
517 if (e.localName == 'script') {
518 logIndentLevel++;
519 log ('Running a script: start');
520
521 var doc = this.ownerDocument || this;
522 var p = doc._parser;
523
524 // 1. Script type
525 //
526
527 // 2.1. If scripting is disabled
528 //
529 // 2.2. If the script element was created by an XML ... innerHTML ...
530 //
531 // 2.3. If the user agent does not support the scripting language ...
532 //
533 // 2.4. If the script element has its "already executed" flag set
534 if (e.manakaiAlreadyExecuted) {
535 // 2.5. Abort these steps at this point.
536 log ('Running a script: aborted (already executed)');
537 logIndentLevel--;
538 return e;
539 }
540
541 // 3. Set the element's "already executed" flag.
542 e.manakaiAlreadyExecuted = true;
543
544 // 4. If the element has a src attribute, then a load for ...
545 // TODO: load an external resource
546
547 // 5. The first of the following options:
548
549 // 5.1.
550 if (/* TODO: If the document is still being parsed && */
551 e.defer && !e.async) {
552 p.scriptsExecutedAfterParsing.push (e);
553 log ('Running a script: aborted (defer)');
554 } else if (e.async && e.src != null) {
555 p.scriptsExecutedAsynchronously.push (e);
556 log ('Running a script: aborted (async src)');
557 } else if (e.async && e.src == null &&
558 p.scriptsExecutedAsynchronously.length > 0) {
559 p.scriptsExecutedAsynchronously.push (e);
560 log ('Running a script: aborted (async)');
561 // ISSUE: What is the difference with the case above?
562 } else if (e.src != null && e.manakaiParserInserted) {
563 if (p.scriptExecutedWhenParserResumes) {
564 log ('Error: There is a script that will execute as soon as the parser resumes.');
565 }
566 p.scriptExecutedWhenParserResumes = e;
567 log ('Running a script: aborted (src parser-inserted)');
568 } else if (e.src != null) {
569 p.scriptsExecutedSoon.push (e);
570 log ('Running a script: aborted (src)');
571 } else {
572 executeScript (doc, e); // even if other scripts are already executing.
573 }
574
575 log ('Running a script: end');
576 logIndentLevel--;
577 }
578
579 return e;
580 }; // appendChild
581
582 function executeScript (doc, e) {
583 log ('executing a script block: start');
584
585 var s;
586 if (e.src != null) {
587 s = getExternalScript (e.src);
588
589 // If the load resulted in an error, then ... firing an error event ...
590 if (s == null) {
591 log ('error event fired at the script element');
592 return;
593 }
594
595 log ('External script loaded: "' + s + '"');
596 } else {
597 s = e.text;
598 }
599
600 // If the load was successful
601 log ('load event fired at the script element');
602
603 if (true) {
604 // Scripting is enabled, Document.designMode is disabled,
605 // Document is the active document in its browsing context
606
607 parseAndRunScript (doc, s);
608 }
609
610 log ('executing a script block: end');
611 } // executeScript
612
613 function getExternalScript (uri) {
614 if (uri.match (/^javascript:/i)) {
615 var m;
616 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
617 if (m[1]) {
618 return unescapeJSLiteral (m[1]);
619 } else if (m[2]) {
620 return unescapeJSLiteral (m[2]);
621 } else {
622 return null;
623 }
624 } else {
625 log ('Complex javascript: URI is not supported: <' + uri + '>');
626 return null;
627 }
628 } else {
629 log ('URI scheme not supported: <' + uri + '>');
630 return null;
631 }
632 } // getExternalScript
633
634 function parseAndRunScript (doc, s) {
635 while (true) {
636 var matched = false;
637 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
638 matched = true;
639 var args = [];
640 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
641 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
642 return '';
643 });
644 doc.write.apply (doc, args);
645 return '';
646 });
647 var noDocumentElement = false;
648 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
649 function (s, t, u) {
650 matched = true;
651 var args = [unescapeJSLiteral (t ? t : u)];
652 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
653 return '';
654 });
655 if (noDocumentElement) {
656 log ('Script error: documentElement is null');
657 break;
658 }
659 s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
660 function (s, t) {
661 matched = true;
662 log (dumpTree (doc, ''));
663 return '';
664 });
665 if (s == '') break;
666 if (!matched) {
667 log ('Script parse error: "' + s + '"');
668 break;
669 }
670 }
671 } // parseAndRunScript
672
673 function unescapeJSLiteral (s) {
674 return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
675 return String.fromCharCode (parseInt ('0x' + v));
676 });
677 } // unescapeJSLiteral
678
679 function JSText (data) {
680 this.data = data;
681 } // JSText
682
683 JSDocument.prototype.manakaiAppendText =
684 JSElement.prototype.manakaiAppendText =
685 function (s) {
686 if (this.childNodes.length > 0 &&
687 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
688 this.childNodes[this.childNodes.length - 1].data += s;
689 } else {
690 this.childNodes.push (new JSText (s));
691 }
692 }; // manakaiAppendText
693
694 JSDocument.prototype.open = function () {
695 // Two or fewer arguments
696
697 // Step 1.
698 var type = arguments[0] || 'text/html';
699
700 // Step 2.
701 var replace = arguments[1] == 'replace';
702
703 // Step 3.
704 if (this._parser &&
705 !this._parser.scriptCreated &&
706 this._parser.input.insertionPoint != undefined) {
707 log ('document.open () in parsing mode is ignored');
708 return this;
709 }
710
711 // Step 4.
712 log ('onbeforeunload event fired');
713 log ('onunload event fired');
714
715 // Step 5.
716 if (this._parser) {
717 // Discard the parser.
718 }
719
720 // Step 6.
721 log ('document cleared by document.open ()');
722 this.childNodes = [];
723
724 // Step 7.
725 this._parser = new Parser (new InputStream (''), this);
726 this._parser.scriptCreated = true;
727
728 // Step 8.
729 this.manakaiIsHTML = true;
730
731 // Step 9.
732 // If not text/html, ...
733
734 // Step 10.
735 if (!replace) {
736 // History
737 }
738
739 // Step 11.
740 this._parser.setInsertionPoint (this._parser.input.s.length);
741
742 // Step 12.
743 return this;
744 }; // document.open
745
746 JSDocument.prototype.write = function () {
747 log ('document.write: start');
748 logIndentLevel++;
749
750 var p = this._parser;
751
752 // 1. If the insertion point is undefined, the open() method must be ...
753 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
754 this.open ();
755 p = this._parser;
756 }
757
758 // 2. ... inserted into the input stream just before the insertion point.
759 var s = Array.join (arguments, '');
760 log ('document.write: insert "' + s + '"' +
761 ' before "' +
762 p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
763 p.input.s = p.input.s.substring (0, p.insertionPoint) + s
764 + p.input.s.substring (p.insertionPoint, p.input.s.length);
765 p.insertionPoint += s.length;
766
767 // 3. If there is a script that will execute as soon as the parser resumes
768 if (p.scriptExecutedAfterParserResumes) {
769 log ('document.write: processed later (there is an unprocessed <script src>)');
770 logIndentLevel--;
771 log ('document.write: return');
772 return;
773 }
774
775 // 4. Process the characters that were inserted, ...
776 var originalReentrant = p.reentrant;
777 p.reentrant = true;
778 p.parse ();
779 p.reentrant = originalReentrant;
780 // TODO: "Abort the processing of any nested invokations of the tokeniser,
781 // yielding control back to the caller." (<script> parsing). Do we need
782 // to do something here?
783
784 // 5. Return
785 logIndentLevel--;
786 log ('document.write: return');
787
788 return;
789 }; // document.write
790
791 JSDocument.prototype._insertExternalScript = function (uri) {
792 var s = new JSElement (this, 'script');
793 s.src = uri;
794 if (this.documentElement) {
795 this.documentElement.appendChild (s);
796 return true;
797 } else {
798 return false;
799 }
800 }; // _insertExternalScript
801
802 JSDocument.prototype.__defineGetter__ ('documentElement', function () {
803 var cn = this.childNodes;
804 for (var i = 0; i < cn.length; i++) {
805 if (cn[i] instanceof JSElement) {
806 return cn[i]
807 }
808 }
809 return null;
810 });
811
812 JSElement.prototype.__defineGetter__ ('text', function () {
813 var r = '';
814 for (var i = 0; i < this.childNodes.length; i++) {
815 if (this.childNodes[i] instanceof JSText) {
816 r += this.childNodes[i].data;
817 }
818 }
819 return r;
820 });
821
822 function dumpTree (n, indent) {
823 var r = '';
824 for (var i = 0; i < n.childNodes.length; i++) {
825 var node = n.childNodes[i];
826 if (node instanceof JSElement) {
827 r += '| ' + indent + node.localName + '\n';
828 if (node.async) r += '| ' + indent + ' async=""\n';
829 if (node.defer) r += '| ' + indent + ' defer=""\n';
830 if (node.src != null) {
831 r += '| ' + indent + ' src="' + node.src + '"\n';
832 }
833 r += dumpTree (node, indent + ' ');
834 } else if (node instanceof JSText) {
835 r += '| ' + indent + '"' + node.data + '"\n';
836 } else {
837 r += '| ' + indent + node + '\n';
838 }
839 }
840 return r;
841 } // dumpTree
842 </script>
843 </head>
844 <body onload="
845 document.sourceElement = document.getElementsByTagName ('textarea')[0];
846
847 var q = location.search;
848 if (q != null) {
849 q = q.substring (1).split (/;/);
850 for (var i = 0; i < q.length; i++) {
851 var v = q[i].split (/=/, 2);
852 v[0] = decodeURIComponent (v[0]);
853 v[1] = decodeURIComponent (v[1] || '');
854 if (v[0] == 's') {
855 document.sourceElement.value = v[1];
856 }
857 }
858 }
859
860 document.logElement = document.getElementsByTagName ('output')[0];
861 update ();
862 ">
863 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
864 Parser</h1>
865
866 <h2>Markup to test
867 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
868 <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
869 id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
870 Viewer</a>)</h2>
871 <p>
872 <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
873 &lt;head>&lt;/head>&lt;body>
874 &lt;p>
875 &lt;script>
876 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
877 &lt;/script>
878 &lt;p>
879 </textarea>
880
881 <h2 id=log>Log</h2>
882 <p><output></output>
883
884 <h2 id=notes>Notes</h2>
885
886 <p>This is a <em>simplified</em> implementation of
887 <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
888 Parsing Algorithm</a>. It only implements script-related part of the
889 algorithm. Especially, this parser:
890 <ul>
891 <li>Does not support <code>DOCTYPE</code> and comment tokens.
892 <li>Does not support entities except for <code>&amp;quot;</code>,
893 <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
894 <code>src</code> attribute value.
895 <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
896 algorithm, and so on.
897 <li>Does not raise parse errors for invalid attribute specifications in start
898 or end tags.
899 <li>Does not support PCDATA elements (<code>title</code> and
900 <code>textarea</code>).
901 <li>Does not strip the first newline in <code>pre</code> elements.
902 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
903 in <code>script</code> element.
904 <li>Does not support foreign (SVG or MathML) elements.
905 <li>Only supports <code>script</code> <code>type</code>
906 <code>text/javascript</code>. <code>type</code> and <code>language</code>
907 attributes are ignored.
908 <li>Only supports limited statements. It must consist of zero or more
909 of statements looking similar to the following statements, possibly
910 introduced, followed, or separated by white space characters:
911 <ul>
912 <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
913 <li><code>var s = document.createElement ("script");
914 s.src = "<var>string</var>";
915 document.documentElement.appendChild (s);</code>
916 <li><code>w (document.documentElement.innerHTML);</code> (This statement
917 can be used to dump the document, even when the document has no
918 document element. The output format is the tree dump format used
919 in html5lib test data, not <abbr>HTML</abbr>.)
920 </ul>
921 Note that strings may be delimited by <code>'</code>s instead of
922 <code>"</code>s.
923 <li>Only supports <code>javascript:</code>
924 <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
925 <code>src</code> attribute of the <code>script</code> element. In addition,
926 the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
927 the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
928 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
929 string literals.
930 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
931 replaced by <code>document.open ()</code> call. In other word, delayed
932 (deferred or asynchronous) script executions and event firings might be
933 treated in a wrong way if a <code>document.open ()</code> invocation
934 is implicitly done by <code>document.write ()</code> in a delayed script.
935 </ul>
936
937 <p>For some reason, this parser does not work in browsers that do
938 not support JavaScript 1.5.
939
940 <!-- TODO: |src| attribute value should refer the value at the time
941 when it is inserted into the document, not the value when the script is
942 executed. Currently it does not matter, since we don't allow dynamic
943 modification to the |src| content/DOM attribute value yet. -->
944
945 </body>
946 </html>
947 <!-- $Date: 2008/04/29 03:29:41 $ -->
948 <!--
949
950 Copyright 2008 Wakaba <w@suika.fam.cx>
951
952 This program is free software; you can redistribute it and/or
953 modify it under the terms of the GNU General Public License
954 as published by the Free Software Foundation; either version 2
955 of the License, or (at your option) any later version.
956
957 This program is distributed in the hope that it will be useful,
958 but WITHOUT ANY WARRANTY; without even the implied warranty of
959 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
960 GNU General Public License for more details.
961
962 You should have received a copy of the GNU General Public License
963 along with this program; if not, write to the Free Software
964 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
965
966 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24