/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (show annotations) (download) (as text)
Sun Jun 20 03:39:12 2010 UTC (14 years, 4 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.20: +6 -5 lines
File MIME type: text/html
was not committed

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Live Scripting HTML Parser</title>
5 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6 <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7 title="GNU GPL2 or later">
8 <style>
9 h1 {
10 margin: 0;
11 font-size: 150%;
12 }
13 h2 {
14 margin: 0;
15 font-size: 100%;
16 }
17 p {
18 margin: 0 1em;
19 }
20 textarea {
21 width: 100%;
22 -width: 99%;
23 height: 10em;
24 }
25 output {
26 display: block;
27 font-family: monospace;
28 white-space: -moz-pre-wrap;
29 white-space: pre-wrap;
30 }
31 </style>
32 <script>
33 var delayedUpdater = 0;
34
35 function update () {
36 if (delayedUpdater) {
37 clearTimeout (delayedUpdater);
38 delayedUpdater = 0;
39 }
40 delayedUpdater = setTimeout (update2, 100);
41 } // update
42
43 function update2 () {
44 var v = document.sourceElement.value;
45 if (v != document.previousSourceText) {
46 document.previousSourceText = v;
47 document.links['permalink'].href
48 = location.pathname + '?s=' + encodeURIComponent (v);
49 document.links['ldvlink'].href
50 = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51 + encodeURIComponent (v);
52
53 document.logElement.textContent = '';
54 var p = new Parser (new InputStream (v));
55 var doc = p.doc;
56 p.parse ();
57
58 log (dumpTree (doc, ''));
59
60 if (p.hasAsyncScript) {
61 log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62 }
63 }
64 } // update2
65
66 var logIndentLevel = 0;
67 function log (s) {
68 var indent = '';
69 for (var i = 0; i < logIndentLevel; i++) {
70 indent += ' ';
71 }
72 s = indent + s.replace (/\n/g, "\n" + indent);
73 document.logElement.appendChild (document.createTextNode (s + "\n"));
74 } // log
75
76 function InputStream (s) {
77 this.s = s;
78 } // InputStream
79
80 function Parser (i, doc) {
81 this.parseMode = 'pcdata';
82 if (!doc) {
83 doc = new JSDocument (this);
84 doc.manakaiIsHTML = true;
85 }
86 this.nextToken = [];
87 this.doc = doc;
88 this.openElements = [doc];
89 this.input = i;
90 this.scriptsExecutedAfterParsing = [];
91 this.scriptsExecutedSoon = [];
92 this.scriptsExecutedAsynchronously = [];
93 } // Parser
94
95 Parser.prototype.getNextToken = function () {
96 if (this.nextToken.length) {
97 return this.nextToken.shift ();
98 }
99
100 var p = this;
101 var i = this.input;
102 if (this.parseMode == 'cdata') {
103 var tagName = this.endTagName;
104 var token;
105 if (p.insertionPoint <= 0) {
106 return {type: 'abort'};
107 }
108 i.s = i.s.replace (/^([^<]+)/,
109 function (s, t) {
110 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112 var ip = p.insertionPoint;
113 p.insertionPoint = 0;
114 return t.substring (ip, t.length);
115 }
116 token = {type: 'char', value: t};
117 p.insertionPoint -= t.length;
118 return '';
119 });
120 if (token) return token;
121 var pattern = new RegExp ('^</' + tagName + '>', 'i');
122 i.s = i.s.replace (pattern, function (s) {
123 if (p.insertionPoint < s.length) {
124 token = {type: 'abort'};
125 return s;
126 }
127 token = {type: 'end-tag', value: tagName};
128 p.insertionPoint -= s.length;
129 return '';
130 });
131 if (token) return token;
132 var m;
133 if ((p.insertionPoint < ('</' + tagName).length) &&
134 (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137 return {type: 'abort'};
138 }
139 }
140 i.s = i.s.replace (/^</,
141 function (s) {
142 token = {type: 'char', value: s};
143 p.insertionPoint -= s.length;
144 return '';
145 });
146 if (token) return token;
147 return {type: 'eof'};
148 }
149
150 var token;
151 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152 if (p.insertionPoint < s.length ||
153 (p.insertionPoint <= s.length &&
154 s.substring (s.length - 1, s.length) != '>')) {
155 token = {type: 'abort'};
156 return s;
157 }
158 token = {type: 'end-tag', value: e.toLowerCase ()};
159 p.insertionPoint -= s.length;
160 return '';
161 });
162 if (token) return token;
163 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164 if (p.insertionPoint < s.length ||
165 (p.insertionPoint <= s.length &&
166 s.substring (s.length - 1, s.length) != '>')) {
167 token = {type: 'abort'};
168 return s;
169 }
170 var tagName;
171 var attrs = {};
172 e = e.replace (/^[\S]+/, function (v) {
173 tagName = v.toLowerCase ();
174 return '';
175 });
176 while (true) {
177 var m = false;
178 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179 function (x, attrName, attrValue1, attrValue2, attrValue3) {
180 v = attrValue1 || attrValue2 || attrValue3;
181 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182 .replace (/&amp;/g, '&');
183 attrs[attrName.toLowerCase ()] = v;
184 m = true;
185 return '';
186 });
187 if (!m) break;
188 }
189 if (e.length) {
190 log ('Broken start tag: "' + e + '"');
191 }
192 token = {type: 'start-tag', value: tagName, attrs: attrs};
193 p.insertionPoint -= s.length;
194 return '';
195 });
196 if (token) return token;
197 if (p.insertionPoint <= 0) {
198 return {type: 'abort'};
199 }
200 i.s = i.s.replace (/^[^<]+/, function (s) {
201 if (p.insertionPoint < s.length) {
202 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203 var ip = p.insertionPoint;
204 p.insertionPoint = 0;
205 return s.substring (ip, s.length);
206 }
207 token = {type: 'char', value: s};
208 p.insertionPoint -= s.length;
209 return '';
210 });
211 if (token) return token;
212 i.s = i.s.replace (/^[\s\S]/, function (s) {
213 token = {type: 'char', value: s};
214 p.insertionPoint -= s.length;
215 return '';
216 });
217 if (token) return token;
218 return {type: 'eof'};
219 } // getNextToken
220
221 Parser.prototype.parse = function () {
222 logIndentLevel++;
223 log ('parse: start');
224
225 while (true) {
226 var token = this.getNextToken ();
227 log ('token: ' + token.type + ' "' + token.value + '"');
228
229 if (this.cdataEndTagRequired) {
230 // Generic CDATA parsing algorithm
231
232 if (token.type != 'abort') {
233 // 7.
234 if (token.type == 'end-tag' && token.value == this.endTagName) {
235 // 7.1. Ignores it.
236 //
237 } else {
238 // 7.2. Parse error.
239 log ('Parse error: no </' + this.endTagName + '>');
240 this.nextToken.unshift (token);
241 }
242 this.cdataEndTagRequired = false;
243 continue;
244 }
245 }
246
247 if (token.type == 'start-tag') {
248 if (token.value == 'script') {
249 // 1. Create an element for the token in the HTML namespace.
250 var el = new JSElement (this.doc, token.value);
251 if (token.attrs.async != null) el.async = true;
252 if (token.attrs.defer != null) el.defer = true;
253 if (token.attrs.src != null) el.src = token.attrs.src;
254
255 // 2. Mark the element as being "parser-inserted".
256 el.manakaiParserInserted = true;
257
258 // 3. Switch the tokeniser's content model flag to the CDATA state.
259 this.parseMode = 'cdata';
260 this.endTagName = 'script';
261
262 // 4.1. Collect all the character tokens.
263 while (true) {
264 var token = this.getNextToken ();
265 log ('token: ' + token.type + ' "' + token.value + '"');
266
267 if (token.type == 'char') {
268 // 5. Append a single Text node to the script element node.
269 el.manakaiAppendText (token.value);
270
271 // 4.2. Until it returns a token that is not a character token, or
272 // until it stops tokenising.
273 } else if (token.type == 'eof' ||
274 token.type == 'end-tag' ||
275 token.type == 'abort') {
276 // 6. Switched back to the PCDATA state.
277 this.parseMode = 'pcdata';
278
279 // 7.1. If the next token is not an end tag token with ...
280 if (!(token.type == 'end-tag' && token.value == 'script')) {
281 // 7.2. This is a parse error.
282 log ('Parse error: no </' + 'script>');
283 this.nextToken.unshift (token);
284
285 // 7.3. Mark the script element as "already executed".
286 el.manakaiAlreadyExecuted = true;
287 } else {
288 // 7.4. Ignore it.
289 //
290 }
291 break;
292 }
293 }
294
295 // 8.1. If the parser were originally created for the ...
296 if (this.fragmentParsingMode) {
297 // 8.2. Mark the script element as "already executed" and ...
298 el.manakaiAlreadyExecuted = true;
299 continue;
300 }
301
302 // 9.1. Let the old insertion point have the same value as the ...
303 var oldInsertionPoint = this.insertionPoint;
304 // 9.2. Let the insertion point be just before the next input ...
305 this.setInsertionPoint (0);
306
307 // 10. Append the new element to the current node.
308 this.openElements[this.openElements.length - 1].appendChild (el);
309
310 // 11. Let the insertion point have the value of the old ...
311
312 oldInsertionPoint += this.insertionPoint;
313 this.setInsertionPoint (oldInsertionPoint);
314
315 // 12. If there is a pending external script
316 while (this.pendingExternalScript) {
317 // 12.1. If the tree construction stage is being called reentrantly
318 if (this.reentrant) {
319 log ('parse: abort (reentrance)');
320 logIndentLevel--;
321 return;
322
323 // 12.2. Otherwise
324 } else {
325 // 1.
326 var script = this.pendingExternalScript;
327 this.pendingExternalScript = null;
328
329 // 2. Pause until the script has completed loading.
330 //
331
332 // 3. Let the insertion point to just before the next input char.
333 this.setInsertionPoint (0);
334
335 // 4. Execute the script.
336 executeScript (this.doc, script);
337
338 // 5. Let the insertion point be undefined again.
339 this.setInsertionPoint (undefined);
340
341 // 6. If there is once again a script that will execute ...
342 //
343 }
344 }
345 } else if (token.value == 'style' ||
346 token.value == 'noscript' ||
347 token.value == 'xmp') {
348 // 1. Create an element for the token in the HTML namespace.
349 var el = new JSElement (this.doc, token.value);
350
351 // 2. Append the new element to the current node.
352 this.openElements[this.openElements.length - 1].appendChild (el);
353
354 // 3. Switch the tokeniser's content model flag to the CDATA state.
355 this.parseMode = 'cdata';
356 this.endTagName = token.value;
357
358 // 4.1. Collect all the character tokens.
359 while (true) {
360 var token = this.getNextToken ();
361 log ('token: ' + token.type + ' "' + token.value + '"');
362
363 if (token.type == 'char') {
364 // 5. Append a single Text node to the script element node.
365 el.manakaiAppendText (token.value);
366
367 // 4.2. Until it returns a token that is not a character token, or
368 // until it stops tokenising.
369 } else if (token.type == 'eof' ||
370 token.type == 'end-tag' ||
371 token.type == 'abort') {
372 // 6. Switched back to the PCDATA state.
373 this.parseMode = 'pcdata';
374
375 if (token.type == 'abort') {
376 this.cdataEndTagRequired = true;
377 break;
378 }
379
380 // 7.1. If the next token is not an end tag token with ...
381 if (!(token.type == 'end-tag' &&
382 token.value == this.endTagName)) {
383 // 7.2. This is a parse error.
384 log ('Parse error: no </' + this.endTagName + '>');
385 this.nextToken.unshift (token);
386
387 // 7.3. Mark the script element as "already executed".
388 el.manakaiAlreadyExecuted = true;
389 } else {
390 // 7.4. Ignore it.
391 //
392 }
393 break;
394 }
395 }
396 } else {
397 var el = new JSElement (this.doc, token.value);
398 this.openElements[this.openElements.length - 1].appendChild (el);
399 this.openElements.push (el);
400 }
401 } else if (token.type == 'end-tag') {
402 if (this.openElements[this.openElements.length - 1].localName ==
403 token.value) {
404 this.openElements.pop ();
405 } else {
406 log ('parse error: unmatched end tag: ' + token.value);
407 }
408 } else if (token.type == 'char') {
409 this.openElements[this.openElements.length - 1].manakaiAppendText
410 (token.value);
411 } else if (token.type == 'eof') {
412 break;
413 } else if (token.type == 'abort') {
414 log ('parse: abort');
415 logIndentLevel--;
416 return;
417 }
418 }
419
420 log ('stop parsing');
421
422 // readyState = 'interactive'
423
424 // "When a script completes loading" rules start applying.
425
426 while (this.scriptsExecutedSoon.length > 0 ||
427 this.scriptsExecutedAsynchronously.length > 0) {
428 // Handle "list of scripts that will execute as soon as possible".
429 while (this.scriptsExecutedSoon.length > 0) {
430 var e = this.scriptsExecutedSoon.shift ();
431
432 // If it has completed loading
433 log ('Execute an external script not inserted by parser...');
434 executeScript (this.doc, e);
435
436 // NOTE: It MAY be executed before the end of the parsing, according
437 // to the spec.
438 this.hasAsyncScript = true;
439 }
440
441 // Handle "list of scripts that will execute asynchronously".
442 while (this.scriptsExecutedAsynchronously.length > 0) {
443 var e = this.scriptsExecutedAsynchronously.shift ();
444
445 // Step 1.
446 // We assume that all scripts have been loaded at this time.
447
448 // Step 2.
449 log ('Execute an asynchronous script...');
450 executeScript (this.doc, e);
451
452 // Step 3.
453 //
454
455 // Step 4.
456 //
457
458 this.hasAsyncScript = true;
459 }
460 }
461
462 // Handle "list of scripts that will execute when the document has finished
463 // parsing".
464 var list = this.scriptsExecutedAfterParsing;
465 while (list.length > 0) {
466 // TODO: break unless completed loading
467
468 // Step 1.
469 //
470
471 // Step 2. and Step 3.
472 log ('Executing a |defer|red script...');
473 executeScript (this.doc, list.shift ());
474
475 // Step 4.
476 }
477
478 log ('DOMContentLoaded event fired');
479
480 // "delays the load event" things has completed:
481 // readyState = 'complete'
482 log ('load event fired');
483
484 logIndentLevel--;
485 } // parse
486
487 Parser.prototype.setInsertionPoint = function (ip) {
488 if (ip == undefined || ip == null || isNaN (ip)) {
489 log ('insertion point: set to undefined');
490 this.insertionPoint = undefined;
491 } else if (ip == this.input.s.length) {
492 log ('insertion point: end of file');
493 this.insertionPoint = ip;
494 } else {
495 log ('insertion point: set to ' + ip +
496 ' (before "' + this.input.s.substring (0, 10) + '")');
497 this.insertionPoint = ip;
498 }
499 }; // setInsertionPoint
500
501 function JSDocument (p) {
502 this.childNodes = [];
503 this._parser = p;
504 } // JSDocument
505
506 function JSElement (doc, localName) {
507 this.localName = localName;
508 this.ownerDocument = doc;
509 this.childNodes = [];
510 } // JSElement
511
512 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
513 function (e) {
514 this.childNodes.push (e);
515 e.parentNode = this;
516
517 if (e.localName == 'script') {
518 logIndentLevel++;
519 log ('Running a script: start');
520
521 var doc = this.ownerDocument || this;
522 var p = doc._parser;
523
524 // 1.The script's type
525 //
526
527 // 2. The cript's character encoding
528 //
529
530 // 3.1. If without script
531 //
532 // 2.2. If the script element was created by an XML ... innerHTML ...
533 //
534 // 2.3. If the user agent does not support the scripting language ...
535 //
536 if (false) {
537 // 2.5. Abort these steps at this point.
538 log ('Running a script: aborted (noscript)');
539 logIndentLevel--;
540 return e;
541 }
542
543 // 4. Set the element's "already executed" flag.
544 e.manakaiAlreadyExecuted = true;
545
546 // 5. If the element has a src attribute, then a load for ...
547 // TODO: load an external resource
548
549 // 5. The first of the following options:
550
551 if (/* TODO: If the document is still being parsed && */
552 e.defer && !e.async) {
553 // 6.1.
554 p.scriptsExecutedAfterParsing.push (e);
555 log ('Running a script: aborted (defer)');
556 } else if (e.async && e.src != null) {
557 // 6.2.
558 p.scriptsExecutedAsynchronously.push (e);
559 log ('Running a script: aborted (async src)');
560 } else if (e.async && e.src == null &&
561 p.scriptsExecutedAsynchronously.length > 0) {
562 // 6.3.
563 p.scriptsExecutedAsynchronously.push (e);
564 log ('Running a script: aborted (async)');
565 } else if (e.src != null && e.manakaiParserInserted) {
566 // 6.4.
567 if (p.pendingExternalScript) {
568 log ('Error: There is a pending external script.');
569 }
570 p.pendingExternalScript = e;
571 log ('Running a script: aborted (src parser-inserted)');
572 } else if (e.src != null) {
573 // 6.5.
574 p.scriptsExecutedSoon.push (e);
575 log ('Running a script: aborted (src)');
576 } else {
577 // 6.6.
578 executeScript (doc, e); // even if other scripts are already executing.
579 }
580
581 log ('Running a script: end');
582 logIndentLevel--;
583 }
584
585 return e;
586 }; // appendChild
587
588 function executeScript (doc, e) {
589 log ('executing a script block: start');
590
591 var s;
592 if (e.src != null) {
593 s = getExternalScript (e.src);
594
595 // If the load resulted in an error, then ... firing an error event ...
596 if (s == null) {
597 log ('error event fired at the script element');
598 return;
599 }
600
601 log ('External script loaded: "' + s + '"');
602 } else {
603 s = e.text;
604 }
605
606 // If the load was successful
607
608 if (true) {
609 // Scripting is enabled, Document.designMode is disabled,
610 // Document is the active document in its browsing context
611
612 parseAndRunScript (doc, s);
613 }
614
615 log ('load event fired at the script element');
616
617 log ('executing a script block: end');
618 } // executeScript
619
620 function getExternalScript (uri) {
621 if (uri.match (/^javascript:/i)) {
622 var m;
623 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
624 if (m[1]) {
625 return unescapeJSLiteral (m[1]);
626 } else if (m[2]) {
627 return unescapeJSLiteral (m[2]);
628 } else {
629 return null;
630 }
631 } else {
632 log ('Complex javascript: URI is not supported: <' + uri + '>');
633 return null;
634 }
635 } else {
636 log ('URI scheme not supported: <' + uri + '>');
637 return null;
638 }
639 } // getExternalScript
640
641 function parseAndRunScript (doc, s) {
642 while (true) {
643 var matched = false;
644 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
645 matched = true;
646 var args = [];
647 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
648 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
649 return '';
650 });
651 doc.write.apply (doc, args);
652 return '';
653 });
654 var noDocumentElement = false;
655 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
656 function (s, t, u) {
657 matched = true;
658 var args = [unescapeJSLiteral (t ? t : u)];
659 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
660 return '';
661 });
662 if (noDocumentElement) {
663 log ('Script error: documentElement is null');
664 break;
665 }
666 s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
667 function (s, t) {
668 matched = true;
669 log (dumpTree (doc, ''));
670 return '';
671 });
672 if (s == '') break;
673 if (!matched) {
674 log ('Script parse error: "' + s + '"');
675 break;
676 }
677 }
678 } // parseAndRunScript
679
680 function unescapeJSLiteral (s) {
681 return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
682 return String.fromCharCode (parseInt ('0x' + v));
683 });
684 } // unescapeJSLiteral
685
686 function JSText (data) {
687 this.data = data;
688 } // JSText
689
690 JSDocument.prototype.manakaiAppendText =
691 JSElement.prototype.manakaiAppendText =
692 function (s) {
693 if (this.childNodes.length > 0 &&
694 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
695 this.childNodes[this.childNodes.length - 1].data += s;
696 } else {
697 this.childNodes.push (new JSText (s));
698 }
699 }; // manakaiAppendText
700
701 JSDocument.prototype.open = function () {
702 // Two or fewer arguments
703
704 // Step 1.
705 var type = arguments[0] || 'text/html';
706
707 // Step 2.
708 var replace = arguments[1] == 'replace';
709
710 // Step 3.
711 if (this._parser &&
712 !this._parser.scriptCreated &&
713 this._parser.input.insertionPoint != undefined) {
714 log ('document.open () in parsing mode is ignored');
715 return this;
716 }
717
718 // Step 4.
719 log ('onbeforeunload event fired');
720 log ('onunload event fired');
721
722 // Step 5.
723 if (this._parser) {
724 // Discard the parser.
725 }
726
727 // Step 6.
728 log ('document cleared by document.open ()');
729 this.childNodes = [];
730
731 // Step 7.
732 this._parser = new Parser (new InputStream (''), this);
733 this._parser.scriptCreated = true;
734
735 // Step 8.
736 this.manakaiIsHTML = true;
737
738 // Step 9.
739 // If not text/html, ...
740
741 // Step 10.
742 if (!replace) {
743 // History
744 }
745
746 // Step 11.
747 this._parser.setInsertionPoint (this._parser.input.s.length);
748
749 // Step 12.
750 return this;
751 }; // document.open
752
753 JSDocument.prototype.write = function () {
754 log ('document.write: start');
755 logIndentLevel++;
756
757 var p = this._parser;
758
759 // 1. If the insertion point is undefined, the open() method must be ...
760 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
761 this.open ();
762 p = this._parser;
763 }
764
765 // 2. ... inserted into the input stream just before the insertion point.
766 var s = Array.join (arguments, '');
767 log ('document.write: insert "' + s + '"' +
768 ' before "' +
769 p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
770 p.input.s = p.input.s.substring (0, p.insertionPoint) + s
771 + p.input.s.substring (p.insertionPoint, p.input.s.length);
772 p.insertionPoint += s.length;
773
774 // 3. If there is a pending external script
775 if (p.pendingExternalScript) {
776 log ('document.write: processed later (there is an unprocessed <script src>)');
777 logIndentLevel--;
778 log ('document.write: return');
779 return;
780 }
781
782 // 4. Process the characters that were inserted, ...
783 var originalReentrant = p.reentrant;
784 p.reentrant = true;
785 p.parse ();
786 p.reentrant = originalReentrant;
787 // TODO: "Abort the processing of any nested invokations of the tokeniser,
788 // yielding control back to the caller." (<script> parsing). Do we need
789 // to do something here?
790
791 // 5. Return
792 logIndentLevel--;
793 log ('document.write: return');
794
795 return;
796 }; // document.write
797
798 JSDocument.prototype._insertExternalScript = function (uri) {
799 var s = new JSElement (this, 'script');
800 s.src = uri;
801 if (this.documentElement) {
802 this.documentElement.appendChild (s);
803 return true;
804 } else {
805 return false;
806 }
807 }; // _insertExternalScript
808
809 JSDocument.prototype.__defineGetter__ ('documentElement', function () {
810 var cn = this.childNodes;
811 for (var i = 0; i < cn.length; i++) {
812 if (cn[i] instanceof JSElement) {
813 return cn[i]
814 }
815 }
816 return null;
817 });
818
819 JSElement.prototype.__defineGetter__ ('text', function () {
820 var r = '';
821 for (var i = 0; i < this.childNodes.length; i++) {
822 if (this.childNodes[i] instanceof JSText) {
823 r += this.childNodes[i].data;
824 }
825 }
826 return r;
827 });
828
829 function dumpTree (n, indent) {
830 var r = '';
831 for (var i = 0; i < n.childNodes.length; i++) {
832 var node = n.childNodes[i];
833 if (node instanceof JSElement) {
834 r += '| ' + indent + node.localName + '\n';
835 if (node.async) r += '| ' + indent + ' async=""\n';
836 if (node.defer) r += '| ' + indent + ' defer=""\n';
837 if (node.src != null) {
838 r += '| ' + indent + ' src="' + node.src + '"\n';
839 }
840 r += dumpTree (node, indent + ' ');
841 } else if (node instanceof JSText) {
842 r += '| ' + indent + '"' + node.data + '"\n';
843 } else {
844 r += '| ' + indent + node + '\n';
845 }
846 }
847 return r;
848 } // dumpTree
849 </script>
850 </head>
851 <body onload="
852 document.sourceElement = document.getElementsByTagName ('textarea')[0];
853
854 var q = location.search;
855 if (q != null) {
856 q = q.substring (1).split (/;/);
857 for (var i = 0; i < q.length; i++) {
858 var v = q[i].split (/=/, 2);
859 v[0] = decodeURIComponent (v[0]);
860 v[1] = decodeURIComponent (v[1] || '');
861 if (v[0] == 's') {
862 document.sourceElement.value = v[1];
863 }
864 }
865 }
866
867 document.logElement = document.getElementsByTagName ('output')[0];
868 update ();
869 ">
870 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
871 Parser</h1>
872
873 <h2>Markup to test
874 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
875 <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
876 id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
877 Viewer</a>)</h2>
878 <p>
879 <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
880 &lt;head>&lt;/head>&lt;body>
881 &lt;p>
882 &lt;script>
883 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
884 &lt;/script>
885 &lt;p>
886 </textarea>
887
888 <h2 id=log>Log</h2>
889 <p><output></output>
890
891 <h2 id=notes>Notes</h2>
892
893 <p>This is a <em>simplified</em> implementation of <a
894 href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
895 Parsing Algorithm</a> (revision 2138). It only implements
896 scripting-related parts of the algorithm. Especially, this parser:
897
898 <ul>
899 <li>Does not support <code>DOCTYPE</code> and comment tokens.
900 <li>Does not support entities except for <code>&amp;quot;</code>,
901 <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
902 <code>src</code> attribute value.
903 <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
904 algorithm, and so on.
905 <li>Does not raise parse errors for invalid attribute specifications in start
906 or end tags.
907 <li>Does not support RCDATA elements (<code>title</code> and
908 <code>textarea</code>).
909 <li>Does not strip the first newline in <code>pre</code>,
910 <code>listing</code>, and <code>textarea</code> elements.
911 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
912 in CDATA/RCDATA elements.
913 <li>Does not support foreign (SVG or MathML) elements.
914 <li>Only supports <code>script</code> <code>type</code>
915 <code>text/javascript</code>. <code>type</code> and <code>language</code>
916 attributes are ignored.
917 <li>Only supports limited statements. It must consist of zero or more
918 of statements looking similar to the following statements, possibly
919 introduced, followed, or separated by white space characters:
920 <ul>
921 <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
922 <li><code>var s = document.createElement ("script");
923 s.src = "<var>string</var>";
924 document.documentElement.appendChild (s);</code>
925 <li><code>w (document.documentElement.innerHTML);</code> (This statement
926 can be used to dump the document, even when the document has no
927 document element. The output format is the tree dump format used
928 in html5lib test data, not <abbr>HTML</abbr>.)
929 </ul>
930 Note that strings may be delimited by <code>'</code>s instead of
931 <code>"</code>s.
932 <li>Only supports <code>javascript:</code>
933 <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
934 <code>src</code> attribute of the <code>script</code> element. In addition,
935 the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
936 the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
937 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
938 string literals.
939 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
940 replaced by <code>document.open ()</code> call. In other word, delayed
941 (deferred or asynchronous) script executions and event firings might be
942 treated in a wrong way if a <code>document.open ()</code> invocation
943 is implicitly done by <code>document.write ()</code> in a delayed script.
944 </ul>
945
946 <p>For some reason, this parser does not work in browsers that do
947 not support JavaScript 1.5.
948
949 <!-- TODO: |src| attribute value should refer the value at the time
950 when it is inserted into the document, not the value when the script is
951 executed. Currently it does not matter, since we don't allow dynamic
952 modification to the |src| content/DOM attribute value yet. -->
953
954 <p>See also
955 <a href="http://suika.fam.cx/gate/2005/sw/Live%20Scripting%20HTML%20Parser">SuikaWiki:
956 Live Scripting HTML Parser</a>.
957
958 </body>
959 </html>
960 <!-- $Date: 2008/09/20 13:32:45 $ -->
961 <!--
962
963 Copyright 2008 Wakaba <w@suika.fam.cx>
964
965 This program is free software; you can redistribute it and/or
966 modify it under the terms of the GNU General Public License
967 as published by the Free Software Foundation; either version 2
968 of the License, or (at your option) any later version.
969
970 This program is distributed in the hope that it will be useful,
971 but WITHOUT ANY WARRANTY; without even the implied warranty of
972 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
973 GNU General Public License for more details.
974
975 You should have received a copy of the GNU General Public License
976 along with this program; if not, write to the Free Software
977 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
978
979 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24