/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (show annotations) (download) (as text)
Sun Aug 31 09:46:14 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.17: +10 -10 lines
File MIME type: text/html
'the script that will execute as soon as the parser resumes' -> 'the pending external script' (HTML5 revision 1830)

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Live Scripting HTML Parser</title>
5 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6 <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7 title="GNU GPL2 or later">
8 <style>
9 h1 {
10 margin: 0;
11 font-size: 150%;
12 }
13 h2 {
14 margin: 0;
15 font-size: 100%;
16 }
17 p {
18 margin: 0 1em;
19 }
20 textarea {
21 width: 100%;
22 -width: 99%;
23 height: 10em;
24 }
25 output {
26 display: block;
27 font-family: monospace;
28 white-space: -moz-pre-wrap;
29 white-space: pre-wrap;
30 }
31 </style>
32 <script>
33 var delayedUpdater = 0;
34
35 function update () {
36 if (delayedUpdater) {
37 clearTimeout (delayedUpdater);
38 delayedUpdater = 0;
39 }
40 delayedUpdater = setTimeout (update2, 100);
41 } // update
42
43 function update2 () {
44 var v = document.sourceElement.value;
45 if (v != document.previousSourceText) {
46 document.previousSourceText = v;
47 document.links['permalink'].href
48 = location.pathname + '?s=' + encodeURIComponent (v);
49 document.links['ldvlink'].href
50 = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51 + encodeURIComponent (v);
52
53 document.logElement.textContent = '';
54 var p = new Parser (new InputStream (v));
55 var doc = p.doc;
56 p.parse ();
57
58 log (dumpTree (doc, ''));
59
60 if (p.hasAsyncScript) {
61 log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62 }
63 }
64 } // update2
65
66 var logIndentLevel = 0;
67 function log (s) {
68 var indent = '';
69 for (var i = 0; i < logIndentLevel; i++) {
70 indent += ' ';
71 }
72 s = indent + s.replace (/\n/g, "\n" + indent);
73 document.logElement.appendChild (document.createTextNode (s + "\n"));
74 } // log
75
76 function InputStream (s) {
77 this.s = s;
78 } // InputStream
79
80 function Parser (i, doc) {
81 this.parseMode = 'pcdata';
82 if (!doc) {
83 doc = new JSDocument (this);
84 doc.manakaiIsHTML = true;
85 }
86 this.nextToken = [];
87 this.doc = doc;
88 this.openElements = [doc];
89 this.input = i;
90 this.scriptsExecutedAfterParsing = [];
91 this.scriptsExecutedSoon = [];
92 this.scriptsExecutedAsynchronously = [];
93 } // Parser
94
95 Parser.prototype.getNextToken = function () {
96 if (this.nextToken.length) {
97 return this.nextToken.shift ();
98 }
99
100 var p = this;
101 var i = this.input;
102 if (this.parseMode == 'cdata') {
103 var tagName = this.endTagName;
104 var token;
105 if (p.insertionPoint <= 0) {
106 return {type: 'abort'};
107 }
108 i.s = i.s.replace (/^([^<]+)/,
109 function (s, t) {
110 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112 var ip = p.insertionPoint;
113 p.insertionPoint = 0;
114 return t.substring (ip, t.length);
115 }
116 token = {type: 'char', value: t};
117 p.insertionPoint -= t.length;
118 return '';
119 });
120 if (token) return token;
121 var pattern = new RegExp ('^</' + tagName + '>', 'i');
122 i.s = i.s.replace (pattern, function (s) {
123 if (p.insertionPoint < s.length) {
124 token = {type: 'abort'};
125 return s;
126 }
127 token = {type: 'end-tag', value: tagName};
128 p.insertionPoint -= s.length;
129 return '';
130 });
131 if (token) return token;
132 var m;
133 if ((p.insertionPoint < ('</' + tagName).length) &&
134 (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137 return {type: 'abort'};
138 }
139 }
140 i.s = i.s.replace (/^</,
141 function (s) {
142 token = {type: 'char', value: s};
143 p.insertionPoint -= s.length;
144 return '';
145 });
146 if (token) return token;
147 return {type: 'eof'};
148 }
149
150 var token;
151 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152 if (p.insertionPoint < s.length ||
153 (p.insertionPoint <= s.length &&
154 s.substring (s.length - 1, s.length) != '>')) {
155 token = {type: 'abort'};
156 return s;
157 }
158 token = {type: 'end-tag', value: e.toLowerCase ()};
159 p.insertionPoint -= s.length;
160 return '';
161 });
162 if (token) return token;
163 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164 if (p.insertionPoint < s.length ||
165 (p.insertionPoint <= s.length &&
166 s.substring (s.length - 1, s.length) != '>')) {
167 token = {type: 'abort'};
168 return s;
169 }
170 var tagName;
171 var attrs = {};
172 e = e.replace (/^[\S]+/, function (v) {
173 tagName = v.toLowerCase ();
174 return '';
175 });
176 while (true) {
177 var m = false;
178 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179 function (x, attrName, attrValue1, attrValue2, attrValue3) {
180 v = attrValue1 || attrValue2 || attrValue3;
181 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182 .replace (/&amp;/g, '&');
183 attrs[attrName.toLowerCase ()] = v;
184 m = true;
185 return '';
186 });
187 if (!m) break;
188 }
189 if (e.length) {
190 log ('Broken start tag: "' + e + '"');
191 }
192 token = {type: 'start-tag', value: tagName, attrs: attrs};
193 p.insertionPoint -= s.length;
194 return '';
195 });
196 if (token) return token;
197 if (p.insertionPoint <= 0) {
198 return {type: 'abort'};
199 }
200 i.s = i.s.replace (/^[^<]+/, function (s) {
201 if (p.insertionPoint < s.length) {
202 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203 var ip = p.insertionPoint;
204 p.insertionPoint = 0;
205 return s.substring (ip, s.length);
206 }
207 token = {type: 'char', value: s};
208 p.insertionPoint -= s.length;
209 return '';
210 });
211 if (token) return token;
212 i.s = i.s.replace (/^[\s\S]/, function (s) {
213 token = {type: 'char', value: s};
214 p.insertionPoint -= s.length;
215 return '';
216 });
217 if (token) return token;
218 return {type: 'eof'};
219 } // getNextToken
220
221 Parser.prototype.parse = function () {
222 logIndentLevel++;
223 log ('parse: start');
224
225 while (true) {
226 var token = this.getNextToken ();
227 log ('token: ' + token.type + ' "' + token.value + '"');
228
229 if (this.cdataEndTagRequired) {
230 // Generic CDATA parsing algorithm
231
232 if (token.type != 'abort') {
233 // 7.
234 if (token.type == 'end-tag' && token.value == this.endTagName) {
235 // 7.1. Ignores it.
236 //
237 } else {
238 // 7.2. Parse error.
239 log ('Parse error: no </' + this.endTagName + '>');
240 this.nextToken.unshift (token);
241 }
242 this.cdataEndTagRequired = false;
243 continue;
244 }
245 }
246
247 if (token.type == 'start-tag') {
248 if (token.value == 'script') {
249 // 1. Create an element for the token in the HTML namespace.
250 var el = new JSElement (this.doc, token.value);
251 if (token.attrs.async != null) el.async = true;
252 if (token.attrs.defer != null) el.defer = true;
253 if (token.attrs.src != null) el.src = token.attrs.src;
254
255 // 2. Mark the element as being "parser-inserted".
256 el.manakaiParserInserted = true;
257
258 // 3. Switch the tokeniser's content model flag to the CDATA state.
259 this.parseMode = 'cdata';
260 this.endTagName = 'script';
261
262 // 4.1. Collect all the character tokens.
263 while (true) {
264 var token = this.getNextToken ();
265 log ('token: ' + token.type + ' "' + token.value + '"');
266
267 if (token.type == 'char') {
268 // 5. Append a single Text node to the script element node.
269 el.manakaiAppendText (token.value);
270
271 // 4.2. Until it returns a token that is not a character token, or
272 // until it stops tokenising.
273 } else if (token.type == 'eof' ||
274 token.type == 'end-tag' ||
275 token.type == 'abort') {
276 // 6. Switched back to the PCDATA state.
277 this.parseMode = 'pcdata';
278
279 // 7.1. If the next token is not an end tag token with ...
280 if (!(token.type == 'end-tag' && token.value == 'script')) {
281 // 7.2. This is a parse error.
282 log ('Parse error: no </' + 'script>');
283 this.nextToken.unshift (token);
284
285 // 7.3. Mark the script element as "already executed".
286 el.manakaiAlreadyExecuted = true;
287 } else {
288 // 7.4. Ignore it.
289 //
290 }
291 break;
292 }
293 }
294
295 // 8.1. If the parser were originally created for the ...
296 if (this.fragmentParsingMode) {
297 // 8.2. Mark the script element as "already executed" and ...
298 el.alreadyExecuted = true;
299 continue;
300 }
301
302 // 9.1. Let the old insertion point have the same value as the ...
303 var oldInsertionPoint = this.insertionPoint;
304 // 9.2. Let the insertion point be just before the next input ...
305 this.setInsertionPoint (0);
306
307 // 10. Append the new element to the current node.
308 this.openElements[this.openElements.length - 1].appendChild (el);
309
310 // 11. Let the insertion point have the value of the old ...
311
312 oldInsertionPoint += this.insertionPoint;
313 this.setInsertionPoint (oldInsertionPoint);
314
315 // 12. If there is a pending external script
316 while (this.pendingExternalScript) {
317 // 12.1. If the tree construction stage is being called reentrantly
318 if (this.reentrant) {
319 log ('parse: abort (reentrance)');
320 logIndentLevel--;
321 return;
322
323 // 12.2. Otherwise
324 } else {
325 // 1.
326 var script = this.pendingExternalScript;
327 this.pendingExternalScript = null;
328
329 // 2. Pause until the script has completed loading.
330 //
331
332 // 3. Let the insertion point to just before the next input char.
333 this.setInsertionPoint (0);
334
335 // 4. Execute the script.
336 executeScript (this.doc, script);
337
338 // 5. Let the insertion point be undefined again.
339 this.setInsertionPoint (undefined);
340
341 // 6. If there is once again a script that will execute ...
342 //
343 }
344 }
345 } else if (token.value == 'style' ||
346 token.value == 'noscript' ||
347 token.value == 'xmp') {
348 // 1. Create an element for the token in the HTML namespace.
349 var el = new JSElement (this.doc, token.value);
350
351 // 2. Append the new element to the current node.
352 this.openElements[this.openElements.length - 1].appendChild (el);
353
354 // 3. Switch the tokeniser's content model flag to the CDATA state.
355 this.parseMode = 'cdata';
356 this.endTagName = token.value;
357
358 // 4.1. Collect all the character tokens.
359 while (true) {
360 var token = this.getNextToken ();
361 log ('token: ' + token.type + ' "' + token.value + '"');
362
363 if (token.type == 'char') {
364 // 5. Append a single Text node to the script element node.
365 el.manakaiAppendText (token.value);
366
367 // 4.2. Until it returns a token that is not a character token, or
368 // until it stops tokenising.
369 } else if (token.type == 'eof' ||
370 token.type == 'end-tag' ||
371 token.type == 'abort') {
372 // 6. Switched back to the PCDATA state.
373 this.parseMode = 'pcdata';
374
375 if (token.type == 'abort') {
376 this.cdataEndTagRequired = true;
377 break;
378 }
379
380 // 7.1. If the next token is not an end tag token with ...
381 if (!(token.type == 'end-tag' &&
382 token.value == this.endTagName)) {
383 // 7.2. This is a parse error.
384 log ('Parse error: no </' + this.endTagName + '>');
385 this.nextToken.unshift (token);
386
387 // 7.3. Mark the script element as "already executed".
388 el.manakaiAlreadyExecuted = true;
389 } else {
390 // 7.4. Ignore it.
391 //
392 }
393 break;
394 }
395 }
396 } else {
397 var el = new JSElement (this.doc, token.value);
398 this.openElements[this.openElements.length - 1].appendChild (el);
399 this.openElements.push (el);
400 }
401 } else if (token.type == 'end-tag') {
402 if (this.openElements[this.openElements.length - 1].localName ==
403 token.value) {
404 this.openElements.pop ();
405 } else {
406 log ('parse error: unmatched end tag: ' + token.value);
407 }
408 } else if (token.type == 'char') {
409 this.openElements[this.openElements.length - 1].manakaiAppendText
410 (token.value);
411 } else if (token.type == 'eof') {
412 break;
413 } else if (token.type == 'abort') {
414 log ('parse: abort');
415 logIndentLevel--;
416 return;
417 }
418 }
419
420 log ('stop parsing');
421
422 // readyState = 'interactive'
423
424 // "When a script completes loading" rules start applying.
425
426 while (this.scriptsExecutedSoon.length > 0 ||
427 this.scriptsExecutedAsynchronously.length > 0) {
428 // Handle "list of scripts that will execute as soon as possible".
429 while (this.scriptsExecutedSoon.length > 0) {
430 var e = this.scriptsExecutedSoon.shift ();
431
432 // If it has completed loading
433 log ('Execute an external script not inserted by parser...');
434 executeScript (this.doc, e);
435
436 // NOTE: It MAY be executed before the end of the parsing, according
437 // to the spec.
438 this.hasAsyncScript = true;
439 }
440
441 // Handle "list of scripts that will execute asynchronously".
442 while (this.scriptsExecutedAsynchronously.length > 0) {
443 var e = this.scriptsExecutedAsynchronously.shift ();
444
445 // Step 1.
446 // We assume that all scripts have been loaded at this time.
447
448 // Step 2.
449 log ('Execute an asynchronous script...');
450 executeScript (this.doc, e);
451
452 // Step 3.
453 //
454
455 // Step 4.
456 //
457
458 this.hasAsyncScript = true;
459 }
460 }
461
462 // Handle "list of scripts that will execute when the document has finished
463 // parsing".
464 var list = this.scriptsExecutedAfterParsing;
465 while (list.length > 0) {
466 // TODO: break unless completed loading
467
468 // Step 1.
469 //
470
471 // Step 2. and Step 3.
472 log ('Executing a |defer|red script...');
473 executeScript (this.doc, list.shift ());
474
475 // Step 4.
476 }
477
478 log ('DOMContentLoaded event fired');
479
480 // "delays the load event" things has completed:
481 // readyState = 'complete'
482 log ('load event fired');
483
484 logIndentLevel--;
485 } // parse
486
487 Parser.prototype.setInsertionPoint = function (ip) {
488 if (ip == undefined || ip == null || isNaN (ip)) {
489 log ('insertion point: set to undefined');
490 this.insertionPoint = undefined;
491 } else if (ip == this.input.s.length) {
492 log ('insertion point: end of file');
493 this.insertionPoint = ip;
494 } else {
495 log ('insertion point: set to ' + ip +
496 ' (before "' + this.input.s.substring (0, 10) + '")');
497 this.insertionPoint = ip;
498 }
499 }; // setInsertionPoint
500
501 function JSDocument (p) {
502 this.childNodes = [];
503 this._parser = p;
504 } // JSDocument
505
506 function JSElement (doc, localName) {
507 this.localName = localName;
508 this.ownerDocument = doc;
509 this.childNodes = [];
510 } // JSElement
511
512 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
513 function (e) {
514 this.childNodes.push (e);
515 e.parentNode = this;
516
517 if (e.localName == 'script') {
518 logIndentLevel++;
519 log ('Running a script: start');
520
521 var doc = this.ownerDocument || this;
522 var p = doc._parser;
523
524 // 1. Script type
525 //
526
527 // 2.1. If scripting is disabled
528 //
529 // 2.2. If the script element was created by an XML ... innerHTML ...
530 //
531 // 2.3. If the user agent does not support the scripting language ...
532 //
533 // 2.4. If the script element has its "already executed" flag set
534 if (e.manakaiAlreadyExecuted) {
535 // 2.5. Abort these steps at this point.
536 log ('Running a script: aborted (already executed)');
537 logIndentLevel--;
538 return e;
539 }
540
541 // 3. Set the element's "already executed" flag.
542 e.manakaiAlreadyExecuted = true;
543
544 // 4. If the element has a src attribute, then a load for ...
545 // TODO: load an external resource
546
547 // 5. The first of the following options:
548
549 // 5.1.
550 if (/* TODO: If the document is still being parsed && */
551 e.defer && !e.async) {
552 p.scriptsExecutedAfterParsing.push (e);
553 log ('Running a script: aborted (defer)');
554 } else if (e.async && e.src != null) {
555 p.scriptsExecutedAsynchronously.push (e);
556 log ('Running a script: aborted (async src)');
557 } else if (e.async && e.src == null &&
558 p.scriptsExecutedAsynchronously.length > 0) {
559 p.scriptsExecutedAsynchronously.push (e);
560 log ('Running a script: aborted (async)');
561 // ISSUE: What is the difference with the case above?
562 } else if (e.src != null && e.manakaiParserInserted) {
563 if (p.pendingExternalScript) {
564 log ('Error: There is a pending external script.');
565 }
566 p.pendingExternalScript = e;
567 log ('Running a script: aborted (src parser-inserted)');
568 } else if (e.src != null) {
569 p.scriptsExecutedSoon.push (e);
570 log ('Running a script: aborted (src)');
571 } else {
572 executeScript (doc, e); // even if other scripts are already executing.
573 }
574
575 log ('Running a script: end');
576 logIndentLevel--;
577 }
578
579 return e;
580 }; // appendChild
581
582 function executeScript (doc, e) {
583 log ('executing a script block: start');
584
585 var s;
586 if (e.src != null) {
587 s = getExternalScript (e.src);
588
589 // If the load resulted in an error, then ... firing an error event ...
590 if (s == null) {
591 log ('error event fired at the script element');
592 return;
593 }
594
595 log ('External script loaded: "' + s + '"');
596 } else {
597 s = e.text;
598 }
599
600 // If the load was successful
601
602 if (true) {
603 // Scripting is enabled, Document.designMode is disabled,
604 // Document is the active document in its browsing context
605
606 parseAndRunScript (doc, s);
607 }
608
609 log ('load event fired at the script element');
610
611 log ('executing a script block: end');
612 } // executeScript
613
614 function getExternalScript (uri) {
615 if (uri.match (/^javascript:/i)) {
616 var m;
617 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
618 if (m[1]) {
619 return unescapeJSLiteral (m[1]);
620 } else if (m[2]) {
621 return unescapeJSLiteral (m[2]);
622 } else {
623 return null;
624 }
625 } else {
626 log ('Complex javascript: URI is not supported: <' + uri + '>');
627 return null;
628 }
629 } else {
630 log ('URI scheme not supported: <' + uri + '>');
631 return null;
632 }
633 } // getExternalScript
634
635 function parseAndRunScript (doc, s) {
636 while (true) {
637 var matched = false;
638 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
639 matched = true;
640 var args = [];
641 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
642 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
643 return '';
644 });
645 doc.write.apply (doc, args);
646 return '';
647 });
648 var noDocumentElement = false;
649 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
650 function (s, t, u) {
651 matched = true;
652 var args = [unescapeJSLiteral (t ? t : u)];
653 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
654 return '';
655 });
656 if (noDocumentElement) {
657 log ('Script error: documentElement is null');
658 break;
659 }
660 s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
661 function (s, t) {
662 matched = true;
663 log (dumpTree (doc, ''));
664 return '';
665 });
666 if (s == '') break;
667 if (!matched) {
668 log ('Script parse error: "' + s + '"');
669 break;
670 }
671 }
672 } // parseAndRunScript
673
674 function unescapeJSLiteral (s) {
675 return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
676 return String.fromCharCode (parseInt ('0x' + v));
677 });
678 } // unescapeJSLiteral
679
680 function JSText (data) {
681 this.data = data;
682 } // JSText
683
684 JSDocument.prototype.manakaiAppendText =
685 JSElement.prototype.manakaiAppendText =
686 function (s) {
687 if (this.childNodes.length > 0 &&
688 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
689 this.childNodes[this.childNodes.length - 1].data += s;
690 } else {
691 this.childNodes.push (new JSText (s));
692 }
693 }; // manakaiAppendText
694
695 JSDocument.prototype.open = function () {
696 // Two or fewer arguments
697
698 // Step 1.
699 var type = arguments[0] || 'text/html';
700
701 // Step 2.
702 var replace = arguments[1] == 'replace';
703
704 // Step 3.
705 if (this._parser &&
706 !this._parser.scriptCreated &&
707 this._parser.input.insertionPoint != undefined) {
708 log ('document.open () in parsing mode is ignored');
709 return this;
710 }
711
712 // Step 4.
713 log ('onbeforeunload event fired');
714 log ('onunload event fired');
715
716 // Step 5.
717 if (this._parser) {
718 // Discard the parser.
719 }
720
721 // Step 6.
722 log ('document cleared by document.open ()');
723 this.childNodes = [];
724
725 // Step 7.
726 this._parser = new Parser (new InputStream (''), this);
727 this._parser.scriptCreated = true;
728
729 // Step 8.
730 this.manakaiIsHTML = true;
731
732 // Step 9.
733 // If not text/html, ...
734
735 // Step 10.
736 if (!replace) {
737 // History
738 }
739
740 // Step 11.
741 this._parser.setInsertionPoint (this._parser.input.s.length);
742
743 // Step 12.
744 return this;
745 }; // document.open
746
747 JSDocument.prototype.write = function () {
748 log ('document.write: start');
749 logIndentLevel++;
750
751 var p = this._parser;
752
753 // 1. If the insertion point is undefined, the open() method must be ...
754 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
755 this.open ();
756 p = this._parser;
757 }
758
759 // 2. ... inserted into the input stream just before the insertion point.
760 var s = Array.join (arguments, '');
761 log ('document.write: insert "' + s + '"' +
762 ' before "' +
763 p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
764 p.input.s = p.input.s.substring (0, p.insertionPoint) + s
765 + p.input.s.substring (p.insertionPoint, p.input.s.length);
766 p.insertionPoint += s.length;
767
768 // 3. If there is a pending external script
769 if (p.pendingExternalScript) {
770 log ('document.write: processed later (there is an unprocessed <script src>)');
771 logIndentLevel--;
772 log ('document.write: return');
773 return;
774 }
775
776 // 4. Process the characters that were inserted, ...
777 var originalReentrant = p.reentrant;
778 p.reentrant = true;
779 p.parse ();
780 p.reentrant = originalReentrant;
781 // TODO: "Abort the processing of any nested invokations of the tokeniser,
782 // yielding control back to the caller." (<script> parsing). Do we need
783 // to do something here?
784
785 // 5. Return
786 logIndentLevel--;
787 log ('document.write: return');
788
789 return;
790 }; // document.write
791
792 JSDocument.prototype._insertExternalScript = function (uri) {
793 var s = new JSElement (this, 'script');
794 s.src = uri;
795 if (this.documentElement) {
796 this.documentElement.appendChild (s);
797 return true;
798 } else {
799 return false;
800 }
801 }; // _insertExternalScript
802
803 JSDocument.prototype.__defineGetter__ ('documentElement', function () {
804 var cn = this.childNodes;
805 for (var i = 0; i < cn.length; i++) {
806 if (cn[i] instanceof JSElement) {
807 return cn[i]
808 }
809 }
810 return null;
811 });
812
813 JSElement.prototype.__defineGetter__ ('text', function () {
814 var r = '';
815 for (var i = 0; i < this.childNodes.length; i++) {
816 if (this.childNodes[i] instanceof JSText) {
817 r += this.childNodes[i].data;
818 }
819 }
820 return r;
821 });
822
823 function dumpTree (n, indent) {
824 var r = '';
825 for (var i = 0; i < n.childNodes.length; i++) {
826 var node = n.childNodes[i];
827 if (node instanceof JSElement) {
828 r += '| ' + indent + node.localName + '\n';
829 if (node.async) r += '| ' + indent + ' async=""\n';
830 if (node.defer) r += '| ' + indent + ' defer=""\n';
831 if (node.src != null) {
832 r += '| ' + indent + ' src="' + node.src + '"\n';
833 }
834 r += dumpTree (node, indent + ' ');
835 } else if (node instanceof JSText) {
836 r += '| ' + indent + '"' + node.data + '"\n';
837 } else {
838 r += '| ' + indent + node + '\n';
839 }
840 }
841 return r;
842 } // dumpTree
843 </script>
844 </head>
845 <body onload="
846 document.sourceElement = document.getElementsByTagName ('textarea')[0];
847
848 var q = location.search;
849 if (q != null) {
850 q = q.substring (1).split (/;/);
851 for (var i = 0; i < q.length; i++) {
852 var v = q[i].split (/=/, 2);
853 v[0] = decodeURIComponent (v[0]);
854 v[1] = decodeURIComponent (v[1] || '');
855 if (v[0] == 's') {
856 document.sourceElement.value = v[1];
857 }
858 }
859 }
860
861 document.logElement = document.getElementsByTagName ('output')[0];
862 update ();
863 ">
864 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
865 Parser</h1>
866
867 <h2>Markup to test
868 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
869 <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
870 id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
871 Viewer</a>)</h2>
872 <p>
873 <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
874 &lt;head>&lt;/head>&lt;body>
875 &lt;p>
876 &lt;script>
877 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
878 &lt;/script>
879 &lt;p>
880 </textarea>
881
882 <h2 id=log>Log</h2>
883 <p><output></output>
884
885 <h2 id=notes>Notes</h2>
886
887 <p>This is a <em>simplified</em> implementation of
888 <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
889 Parsing Algorithm</a>. It only implements script-related part of the
890 algorithm. Especially, this parser:
891 <ul>
892 <li>Does not support <code>DOCTYPE</code> and comment tokens.
893 <li>Does not support entities except for <code>&amp;quot;</code>,
894 <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
895 <code>src</code> attribute value.
896 <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
897 algorithm, and so on.
898 <li>Does not raise parse errors for invalid attribute specifications in start
899 or end tags.
900 <li>Does not support RCDATA elements (<code>title</code> and
901 <code>textarea</code>).
902 <li>Does not strip the first newline in <code>pre</code>,
903 <code>listing</code>, and <code>textarea</code> elements.
904 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
905 in CDATA/RCDATA elements.
906 <li>Does not support foreign (SVG or MathML) elements.
907 <li>Only supports <code>script</code> <code>type</code>
908 <code>text/javascript</code>. <code>type</code> and <code>language</code>
909 attributes are ignored.
910 <li>Only supports limited statements. It must consist of zero or more
911 of statements looking similar to the following statements, possibly
912 introduced, followed, or separated by white space characters:
913 <ul>
914 <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
915 <li><code>var s = document.createElement ("script");
916 s.src = "<var>string</var>";
917 document.documentElement.appendChild (s);</code>
918 <li><code>w (document.documentElement.innerHTML);</code> (This statement
919 can be used to dump the document, even when the document has no
920 document element. The output format is the tree dump format used
921 in html5lib test data, not <abbr>HTML</abbr>.)
922 </ul>
923 Note that strings may be delimited by <code>'</code>s instead of
924 <code>"</code>s.
925 <li>Only supports <code>javascript:</code>
926 <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
927 <code>src</code> attribute of the <code>script</code> element. In addition,
928 the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
929 the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
930 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
931 string literals.
932 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
933 replaced by <code>document.open ()</code> call. In other word, delayed
934 (deferred or asynchronous) script executions and event firings might be
935 treated in a wrong way if a <code>document.open ()</code> invocation
936 is implicitly done by <code>document.write ()</code> in a delayed script.
937 </ul>
938
939 <p>For some reason, this parser does not work in browsers that do
940 not support JavaScript 1.5.
941
942 <!-- TODO: |src| attribute value should refer the value at the time
943 when it is inserted into the document, not the value when the script is
944 executed. Currently it does not matter, since we don't allow dynamic
945 modification to the |src| content/DOM attribute value yet. -->
946
947 </body>
948 </html>
949 <!-- $Date: 2008/05/16 10:29:25 $ -->
950 <!--
951
952 Copyright 2008 Wakaba <w@suika.fam.cx>
953
954 This program is free software; you can redistribute it and/or
955 modify it under the terms of the GNU General Public License
956 as published by the Free Software Foundation; either version 2
957 of the License, or (at your option) any later version.
958
959 This program is distributed in the hope that it will be useful,
960 but WITHOUT ANY WARRANTY; without even the implied warranty of
961 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
962 GNU General Public License for more details.
963
964 You should have received a copy of the GNU General Public License
965 along with this program; if not, write to the Free Software
966 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
967
968 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24