/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (show annotations) (download) (as text)
Tue Apr 29 03:29:41 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.14: +31 -7 lines
File MIME type: text/html
Support for w(innerHTML) dumpping

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Live Scripting HTML Parser</title>
5 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6 <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7 title="GNU GPL2 or later">
8 <style>
9 h1 {
10 margin: 0;
11 font-size: 150%;
12 }
13 h2 {
14 margin: 0;
15 font-size: 100%;
16 }
17 p {
18 margin: 0 1em;
19 }
20 textarea {
21 width: 100%;
22 -width: 99%;
23 height: 10em;
24 }
25 output {
26 display: block;
27 font-family: monospace;
28 white-space: -moz-pre-wrap;
29 white-space: pre-wrap;
30 }
31 </style>
32 <script>
33 var delayedUpdater = 0;
34
35 function update () {
36 if (delayedUpdater) {
37 clearTimeout (delayedUpdater);
38 delayedUpdater = 0;
39 }
40 delayedUpdater = setTimeout (update2, 100);
41 } // update
42
43 function update2 () {
44 var v = document.sourceElement.value;
45 if (v != document.previousSourceText) {
46 document.previousSourceText = v;
47 document.links['permalink'].href
48 = location.pathname + '?s=' + encodeURIComponent (v);
49 document.links['ldvlink'].href
50 = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51 + encodeURIComponent (v);
52
53 document.logElement.textContent = '';
54 var p = new Parser (new InputStream (v));
55 var doc = p.doc;
56 p.parse ();
57
58 log (dumpTree (doc, ''));
59
60 if (p.hasAsyncScript) {
61 log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62 }
63 }
64 } // update2
65
66 var logIndentLevel = 0;
67 function log (s) {
68 var indent = '';
69 for (var i = 0; i < logIndentLevel; i++) {
70 indent += ' ';
71 }
72 s = indent + s.replace (/\n/g, "\n" + indent);
73 document.logElement.appendChild (document.createTextNode (s + "\n"));
74 } // log
75
76 function InputStream (s) {
77 this.s = s;
78 } // InputStream
79
80 function Parser (i, doc) {
81 this.parseMode = 'pcdata';
82 if (!doc) {
83 doc = new JSDocument (this);
84 doc.manakaiIsHTML = true;
85 }
86 this.doc = doc;
87 this.openElements = [doc];
88 this.input = i;
89 this.scriptsExecutedAfterParsing = [];
90 this.scriptsExecutedSoon = [];
91 this.scriptsExecutedAsynchronously = [];
92 } // Parser
93
94 Parser.prototype.getNextToken = function () {
95 var p = this;
96 var i = this.input;
97 if (this.parseMode == 'cdata') {
98 var tagName = this.endTagName;
99 var token;
100 if (p.insertionPoint <= 0) {
101 return {type: 'abort'};
102 }
103 i.s = i.s.replace (/^([^<]+)/,
104 function (s, t) {
105 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
106 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
107 var ip = p.insertionPoint;
108 p.insertionPoint = 0;
109 return t.substring (ip, t.length);
110 }
111 token = {type: 'char', value: t};
112 p.insertionPoint -= t.length;
113 return '';
114 });
115 if (token) return token;
116 var pattern = new RegExp ('^</' + tagName + '>', 'i');
117 i.s = i.s.replace (pattern, function (s) {
118 if (p.insertionPoint < s.length) {
119 token = {type: 'abort'};
120 return s;
121 }
122 token = {type: 'end-tag', value: tagName};
123 p.insertionPoint -= s.length;
124 return '';
125 });
126 if (token) return token;
127 var m;
128 if ((p.insertionPoint < ('</' + tagName).length) &&
129 (m = i.s.match (/^<\/([A-Za-z]+)/))) {
130 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
131 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
132 return {type: 'abort'};
133 }
134 }
135 i.s = i.s.replace (/^</,
136 function (s) {
137 token = {type: 'char', value: s};
138 p.insertionPoint -= s.length;
139 return '';
140 });
141 if (token) return token;
142 return {type: 'eof'};
143 }
144
145 var token;
146 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
147 if (p.insertionPoint < s.length ||
148 (p.insertionPoint <= s.length &&
149 s.substring (s.length - 1, 1) != '>')) {
150 token = {type: 'abort'};
151 return s;
152 }
153 token = {type: 'end-tag', value: e.toLowerCase ()};
154 p.insertionPoint -= s.length;
155 return '';
156 });
157 if (token) return token;
158 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
159 if (p.insertionPoint < s.length ||
160 (p.insertionPoint <= s.length &&
161 s.substring (s.length - 1, 1) != '>')) {
162 token = {type: 'abort'};
163 return s;
164 }
165 var tagName;
166 var attrs = {};
167 e = e.replace (/^[\S]+/, function (v) {
168 tagName = v.toLowerCase ();
169 return '';
170 });
171 while (true) {
172 var m = false;
173 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
174 function (x, attrName, attrValue1, attrValue2, attrValue3) {
175 v = attrValue1 || attrValue2 || attrValue3;
176 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
177 .replace (/&amp;/g, '&');
178 attrs[attrName.toLowerCase ()] = v;
179 m = true;
180 return '';
181 });
182 if (!m) break;
183 }
184 if (e.length) {
185 log ('Broken start tag: "' + e + '"');
186 }
187 token = {type: 'start-tag', value: tagName, attrs: attrs};
188 p.insertionPoint -= s.length;
189 return '';
190 });
191 if (token) return token;
192 if (p.insertionPoint <= 0) {
193 return {type: 'abort'};
194 }
195 i.s = i.s.replace (/^[^<]+/, function (s) {
196 if (p.insertionPoint < s.length) {
197 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
198 var ip = p.insertionPoint;
199 p.insertionPoint = 0;
200 return s.substring (ip, s.length);
201 }
202 token = {type: 'char', value: s};
203 p.insertionPoint -= s.length;
204 return '';
205 });
206 if (token) return token;
207 i.s = i.s.replace (/^[\s\S]/, function (s) {
208 token = {type: 'char', value: s};
209 p.insertionPoint -= s.length;
210 return '';
211 });
212 if (token) return token;
213 return {type: 'eof'};
214 } // getNextToken
215
216 Parser.prototype.parse = function () {
217 logIndentLevel++;
218 log ('parse: start');
219
220 while (true) {
221 var token = this.getNextToken ();
222 log ('token: ' + token.type + ' "' + token.value + '"');
223
224 if (token.type == 'start-tag') {
225 if (token.value == 'script') {
226 // 1. Create an element for the token in the HTML namespace.
227 var el = new JSElement (this.doc, token.value);
228 if (token.attrs.async != null) el.async = true;
229 if (token.attrs.defer != null) el.defer = true;
230 if (token.attrs.src != null) el.src = token.attrs.src;
231
232 // 2. Mark the element as being "parser-inserted".
233 el.manakaiParserInserted = true;
234
235 // 3. Switch the tokeniser's content model flag to the CDATA state.
236 this.parseMode = 'cdata';
237 this.endTagName = 'script';
238
239 // 4.1. Collect all the character tokens.
240 while (true) {
241 var token = this.getNextToken ();
242 log ('token: ' + token.type + ' "' + token.value + '"');
243
244 if (token.type == 'char') {
245 // 5. Append a single Text node to the script element node.
246 el.manakaiAppendText (token.value);
247
248 // 4.2. Until it returns a token that is not a character token, or
249 // until it stops tokenising.
250 } else if (token.type == 'eof' ||
251 token.type == 'end-tag' ||
252 token.type == 'abort') {
253 // 6. Switched back to the PCDATA state.
254 this.parseMode = 'pcdata';
255
256 // 7.1. If the next token is not an end tag token with ...
257 if (!(token.type == 'end-tag' && token.value == 'script')) {
258 // 7.2. This is a parse error.
259 log ('Parse error: no </' + 'script>');
260
261 // 7.3. Mark the script element as "already executed".
262 el.manakaiAlreadyExecuted = true;
263 } else {
264 // 7.4. Ignore it.
265 //
266 }
267 break;
268 }
269 }
270
271 // 8.1. If the parser were originally created for the ...
272 if (this.fragmentParsingMode) {
273 // 8.2. Mark the script element as "already executed" and ...
274 el.alreadyExecuted = true;
275 continue;
276 }
277
278 // 9.1. Let the old insertion point have the same value as the ...
279 var oldInsertionPoint = this.insertionPoint;
280 // 9.2. Let the insertion point be just before the next input ...
281 this.setInsertionPoint (0);
282
283 // 10. Append the new element to the current node.
284 this.openElements[this.openElements.length - 1].appendChild (el);
285
286 // 11. Let the insertion point have the value of the old ...
287
288 oldInsertionPoint += this.insertionPoint;
289 this.setInsertionPoint (oldInsertionPoint);
290
291 // 12. If there is a script that will execute as soon as ...
292 while (this.scriptExecutedWhenParserResumes) {
293 // 12.1. If the tree construction stage is being called reentrantly
294 if (this.reentrant) {
295 log ('parse: abort (reentrance)');
296 logIndentLevel--;
297 return;
298
299 // 12.2. Otherwise
300 } else {
301 // 1.
302 var script = this.scriptExecutedWhenParserResumes;
303 this.scriptExecutedWhenParserResumes = null;
304
305 // 2. Pause until the script has completed loading.
306 //
307
308 // 3. Let the insertion point to just before the next input char.
309 this.setInsertionPoint (0);
310
311 // 4. Execute the script.
312 executeScript (this.doc, script);
313
314 // 5. Let the insertion point be undefined again.
315 this.setInsertionPoint (undefined);
316
317 // 6. If there is once again a script that will execute ...
318 //
319 }
320 }
321 } else if (token.value == 'style' ||
322 token.value == 'noscript' ||
323 token.value == 'xmp') {
324 // 1. Create an element for the token in the HTML namespace.
325 var el = new JSElement (this.doc, token.value);
326
327 // 2. Append the new element to the current node.
328 this.openElements[this.openElements.length - 1].appendChild (el);
329
330 // 3. Switch the tokeniser's content model flag to the CDATA state.
331 this.parseMode = 'cdata';
332 this.endTagName = token.value;
333
334 // 4.1. Collect all the character tokens.
335 while (true) {
336 var token = this.getNextToken ();
337 log ('token: ' + token.type + ' "' + token.value + '"');
338
339 if (token.type == 'char') {
340 // 5. Append a single Text node to the script element node.
341 el.manakaiAppendText (token.value);
342
343 // 4.2. Until it returns a token that is not a character token, or
344 // until it stops tokenising.
345 } else if (token.type == 'eof' ||
346 token.type == 'end-tag' ||
347 token.type == 'abort') {
348 // 6. Switched back to the PCDATA state.
349 this.parseMode = 'pcdata';
350
351 // 7.1. If the next token is not an end tag token with ...
352 if (!(token.type == 'end-tag' &&
353 token.value == this.endTagName)) {
354 // 7.2. This is a parse error.
355 log ('Parse error: no </' + this.endTagName + '>');
356
357 // 7.3. Mark the script element as "already executed".
358 el.manakaiAlreadyExecuted = true;
359 } else {
360 // 7.4. Ignore it.
361 //
362 }
363 break;
364 }
365 }
366 } else {
367 var el = new JSElement (this.doc, token.value);
368 this.openElements[this.openElements.length - 1].appendChild (el);
369 this.openElements.push (el);
370 }
371 } else if (token.type == 'end-tag') {
372 if (this.openElements[this.openElements.length - 1].localName ==
373 token.value) {
374 this.openElements.pop ();
375 } else {
376 log ('parse error: unmatched end tag: ' + token.value);
377 }
378 } else if (token.type == 'char') {
379 this.openElements[this.openElements.length - 1].manakaiAppendText
380 (token.value);
381 } else if (token.type == 'eof') {
382 break;
383 } else if (token.type == 'abort') {
384 log ('parse: abort');
385 logIndentLevel--;
386 return;
387 }
388 }
389
390 log ('stop parsing');
391
392 // readyState = 'interactive'
393
394 // "When a script completes loading" rules start applying.
395
396 while (this.scriptsExecutedSoon.length > 0 ||
397 this.scriptsExecutedAsynchronously.length > 0) {
398 // Handle "list of scripts that will execute as soon as possible".
399 while (this.scriptsExecutedSoon.length > 0) {
400 var e = this.scriptsExecutedSoon.shift ();
401
402 // If it has completed loading
403 log ('Execute an external script not inserted by parser...');
404 executeScript (this.doc, e);
405
406 // NOTE: It MAY be executed before the end of the parsing, according
407 // to the spec.
408 this.hasAsyncScript = true;
409 }
410
411 // Handle "list of scripts that will execute asynchronously".
412 while (this.scriptsExecutedAsynchronously.length > 0) {
413 var e = this.scriptsExecutedAsynchronously.shift ();
414
415 // Step 1.
416 // We assume that all scripts have been loaded at this time.
417
418 // Step 2.
419 log ('Execute an asynchronous script...');
420 executeScript (this.doc, e);
421
422 // Step 3.
423 //
424
425 // Step 4.
426 //
427
428 this.hasAsyncScript = true;
429 }
430 }
431
432 // Handle "list of scripts that will execute when the document has finished
433 // parsing".
434 var list = this.scriptsExecutedAfterParsing;
435 while (list.length > 0) {
436 // TODO: break unless completed loading
437
438 // Step 1.
439 //
440
441 // Step 2. and Step 3.
442 log ('Executing a |defer|red script...');
443 executeScript (this.doc, list.shift ());
444
445 // Step 4.
446 }
447
448 log ('DOMContentLoaded event fired');
449
450 // "delays the load event" things has completed:
451 // readyState = 'complete'
452 log ('load event fired');
453
454 logIndentLevel--;
455 } // parse
456
457 Parser.prototype.setInsertionPoint = function (ip) {
458 if (ip == undefined || ip == null || isNaN (ip)) {
459 log ('insertion point: set to undefined');
460 this.insertionPoint = undefined;
461 } else if (ip == this.input.s.length) {
462 log ('insertion point: end of file');
463 this.insertionPoint = ip;
464 } else {
465 log ('insertion point: set to ' + ip +
466 ' (before "' + this.input.s.substring (0, 10) + '")');
467 this.insertionPoint = ip;
468 }
469 }; // setInsertionPoint
470
471 function JSDocument (p) {
472 this.childNodes = [];
473 this._parser = p;
474 } // JSDocument
475
476 function JSElement (doc, localName) {
477 this.localName = localName;
478 this.ownerDocument = doc;
479 this.childNodes = [];
480 } // JSElement
481
482 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
483 function (e) {
484 this.childNodes.push (e);
485 e.parentNode = this;
486
487 if (e.localName == 'script') {
488 logIndentLevel++;
489 log ('Running a script: start');
490
491 var doc = this.ownerDocument || this;
492 var p = doc._parser;
493
494 // 1. Script type
495 //
496
497 // 2.1. If scripting is disabled
498 //
499 // 2.2. If the script element was created by an XML ... innerHTML ...
500 //
501 // 2.3. If the user agent does not support the scripting language ...
502 //
503 // 2.4. If the script element has its "already executed" flag set
504 if (e.manakaiAlreadyExecuted) {
505 // 2.5. Abort these steps at this point.
506 log ('Running a script: aborted (already executed)');
507 logIndentLevel--;
508 return e;
509 }
510
511 // 3. Set the element's "already executed" flag.
512 e.manakaiAlreadyExecuted = true;
513
514 // 4. If the element has a src attribute, then a load for ...
515 // TODO: load an external resource
516
517 // 5. The first of the following options:
518
519 // 5.1.
520 if (/* TODO: If the document is still being parsed && */
521 e.defer && !e.async) {
522 p.scriptsExecutedAfterParsing.push (e);
523 log ('Running a script: aborted (defer)');
524 } else if (e.async && e.src != null) {
525 p.scriptsExecutedAsynchronously.push (e);
526 log ('Running a script: aborted (async src)');
527 } else if (e.async && e.src == null &&
528 p.scriptsExecutedAsynchronously.length > 0) {
529 p.scriptsExecutedAsynchronously.push (e);
530 log ('Running a script: aborted (async)');
531 // ISSUE: What is the difference with the case above?
532 } else if (e.src != null && e.manakaiParserInserted) {
533 if (p.scriptExecutedWhenParserResumes) {
534 log ('Error: There is a script that will execute as soon as the parser resumes.');
535 }
536 p.scriptExecutedWhenParserResumes = e;
537 log ('Running a script: aborted (src parser-inserted)');
538 } else if (e.src != null) {
539 p.scriptsExecutedSoon.push (e);
540 log ('Running a script: aborted (src)');
541 } else {
542 executeScript (doc, e); // even if other scripts are already executing.
543 }
544
545 log ('Running a script: end');
546 logIndentLevel--;
547 }
548
549 return e;
550 }; // appendChild
551
552 function executeScript (doc, e) {
553 log ('executing a script block: start');
554
555 var s;
556 if (e.src != null) {
557 s = getExternalScript (e.src);
558
559 // If the load resulted in an error, then ... firing an error event ...
560 if (s == null) {
561 log ('error event fired at the script element');
562 return;
563 }
564
565 log ('External script loaded: "' + s + '"');
566 } else {
567 s = e.text;
568 }
569
570 // If the load was successful
571 log ('load event fired at the script element');
572
573 if (true) {
574 // Scripting is enabled, Document.designMode is disabled,
575 // Document is the active document in its browsing context
576
577 parseAndRunScript (doc, s);
578 }
579
580 log ('executing a script block: end');
581 } // executeScript
582
583 function getExternalScript (uri) {
584 if (uri.match (/^javascript:/i)) {
585 var m;
586 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
587 if (m[1]) {
588 return unescapeJSLiteral (m[1]);
589 } else if (m[2]) {
590 return unescapeJSLiteral (m[2]);
591 } else {
592 return null;
593 }
594 } else {
595 log ('Complex javascript: URI is not supported: <' + uri + '>');
596 return null;
597 }
598 } else {
599 log ('URI scheme not supported: <' + uri + '>');
600 return null;
601 }
602 } // getExternalScript
603
604 function parseAndRunScript (doc, s) {
605 while (true) {
606 var matched = false;
607 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
608 matched = true;
609 var args = [];
610 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
611 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
612 return '';
613 });
614 doc.write.apply (doc, args);
615 return '';
616 });
617 var noDocumentElement = false;
618 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
619 function (s, t, u) {
620 matched = true;
621 var args = [unescapeJSLiteral (t ? t : u)];
622 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
623 return '';
624 });
625 if (noDocumentElement) {
626 log ('Script error: documentElement is null');
627 break;
628 }
629 s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
630 function (s, t) {
631 matched = true;
632 log (dumpTree (doc, ''));
633 return '';
634 });
635 if (s == '') break;
636 if (!matched) {
637 log ('Script parse error: "' + s + '"');
638 break;
639 }
640 }
641 } // parseAndRunScript
642
643 function unescapeJSLiteral (s) {
644 return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
645 return String.fromCharCode (parseInt ('0x' + v));
646 });
647 } // unescapeJSLiteral
648
649 function JSText (data) {
650 this.data = data;
651 } // JSText
652
653 JSDocument.prototype.manakaiAppendText =
654 JSElement.prototype.manakaiAppendText =
655 function (s) {
656 if (this.childNodes.length > 0 &&
657 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
658 this.childNodes[this.childNodes.length - 1].data += s;
659 } else {
660 this.childNodes.push (new JSText (s));
661 }
662 }; // manakaiAppendText
663
664 JSDocument.prototype.open = function () {
665 // Two or fewer arguments
666
667 // Step 1.
668 var type = arguments[0] || 'text/html';
669
670 // Step 2.
671 var replace = arguments[1] == 'replace';
672
673 // Step 3.
674 if (this._parser &&
675 !this._parser.scriptCreated &&
676 this._parser.input.insertionPoint != undefined) {
677 log ('document.open () in parsing mode is ignored');
678 return this;
679 }
680
681 // Step 4.
682 log ('onbeforeunload event fired');
683 log ('onunload event fired');
684
685 // Step 5.
686 if (this._parser) {
687 // Discard the parser.
688 }
689
690 // Step 6.
691 log ('document cleared by document.open ()');
692 this.childNodes = [];
693
694 // Step 7.
695 this._parser = new Parser (new InputStream (''), this);
696 this._parser.scriptCreated = true;
697
698 // Step 8.
699 this.manakaiIsHTML = true;
700
701 // Step 9.
702 // If not text/html, ...
703
704 // Step 10.
705 if (!replace) {
706 // History
707 }
708
709 // Step 11.
710 this._parser.setInsertionPoint (this._parser.input.s.length);
711
712 // Step 12.
713 return this;
714 }; // document.open
715
716 JSDocument.prototype.write = function () {
717 log ('document.write: start');
718 logIndentLevel++;
719
720 var p = this._parser;
721
722 // 1. If the insertion point is undefined, the open() method must be ...
723 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
724 this.open ();
725 p = this._parser;
726 }
727
728 // 2. ... inserted into the input stream just before the insertion point.
729 var s = Array.join (arguments, '');
730 log ('document.write: insert "' + s + '"' +
731 ' before "' +
732 p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
733 p.input.s = p.input.s.substring (0, p.insertionPoint) + s
734 + p.input.s.substring (p.insertionPoint, p.input.s.length);
735 p.insertionPoint += s.length;
736
737 // 3. If there is a script that will execute as soon as the parser resumes
738 if (p.scriptExecutedAfterParserResumes) {
739 log ('document.write: processed later (there is an unprocessed <script src>)');
740 logIndentLevel--;
741 log ('document.write: return');
742 return;
743 }
744
745 // 4. Process the characters that were inserted, ...
746 var originalReentrant = p.reentrant;
747 p.reentrant = true;
748 p.parse ();
749 p.reentrant = originalReentrant;
750 // TODO: "Abort the processing of any nested invokations of the tokeniser,
751 // yielding control back to the caller." (<script> parsing). Do we need
752 // to do something here?
753
754 // 5. Return
755 logIndentLevel--;
756 log ('document.write: return');
757
758 return;
759 }; // document.write
760
761 JSDocument.prototype._insertExternalScript = function (uri) {
762 var s = new JSElement (this, 'script');
763 s.src = uri;
764 if (this.documentElement) {
765 this.documentElement.appendChild (s);
766 return true;
767 } else {
768 return false;
769 }
770 }; // _insertExternalScript
771
772 JSDocument.prototype.__defineGetter__ ('documentElement', function () {
773 var cn = this.childNodes;
774 for (var i = 0; i < cn.length; i++) {
775 if (cn[i] instanceof JSElement) {
776 return cn[i]
777 }
778 }
779 return null;
780 });
781
782 JSElement.prototype.__defineGetter__ ('text', function () {
783 var r = '';
784 for (var i = 0; i < this.childNodes.length; i++) {
785 if (this.childNodes[i] instanceof JSText) {
786 r += this.childNodes[i].data;
787 }
788 }
789 return r;
790 });
791
792 function dumpTree (n, indent) {
793 var r = '';
794 for (var i = 0; i < n.childNodes.length; i++) {
795 var node = n.childNodes[i];
796 if (node instanceof JSElement) {
797 r += '| ' + indent + node.localName + '\n';
798 if (node.async) r += '| ' + indent + ' async=""\n';
799 if (node.defer) r += '| ' + indent + ' defer=""\n';
800 if (node.src != null) {
801 r += '| ' + indent + ' src="' + node.src + '"\n';
802 }
803 r += dumpTree (node, indent + ' ');
804 } else if (node instanceof JSText) {
805 r += '| ' + indent + '"' + node.data + '"\n';
806 } else {
807 r += '| ' + indent + node + '\n';
808 }
809 }
810 return r;
811 } // dumpTree
812 </script>
813 </head>
814 <body onload="
815 document.sourceElement = document.getElementsByTagName ('textarea')[0];
816
817 var q = location.search;
818 if (q != null) {
819 q = q.substring (1).split (/;/);
820 for (var i = 0; i < q.length; i++) {
821 var v = q[i].split (/=/, 2);
822 v[0] = decodeURIComponent (v[0]);
823 v[1] = decodeURIComponent (v[1] || '');
824 if (v[0] == 's') {
825 document.sourceElement.value = v[1];
826 }
827 }
828 }
829
830 document.logElement = document.getElementsByTagName ('output')[0];
831 update ();
832 ">
833 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
834 Parser</h1>
835
836 <h2>Markup to test
837 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
838 <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
839 id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
840 Viewer</a>)</h2>
841 <p>
842 <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
843 &lt;head>&lt;/head>&lt;body>
844 &lt;p>
845 &lt;script>
846 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
847 &lt;/script>
848 &lt;p>
849 </textarea>
850
851 <h2 id=log>Log</h2>
852 <p><output></output>
853
854 <h2 id=notes>Notes</h2>
855
856 <p>This is a <em>simplified</em> implementation of
857 <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
858 Parsing Algorithm</a>. It only implements script-related part of the
859 algorithm. Especially, this parser:
860 <ul>
861 <li>Does not support <code>DOCTYPE</code> and comment tokens.
862 <li>Does not support entities except for <code>&amp;quot;</code>,
863 <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
864 <code>src</code> attribute value.
865 <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
866 algorithm, and so on.
867 <li>Does not raise parse errors for invalid attribute specifications in start
868 or end tags.
869 <li>Does not support PCDATA elements (<code>title</code> and
870 <code>textarea</code>).
871 <li>Does not strip the first newline in <code>pre</code> elements.
872 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
873 in <code>script</code> element.
874 <li>Does not support foreign (SVG or MathML) elements.
875 <li>Only supports <code>script</code> <code>type</code>
876 <code>text/javascript</code>. <code>type</code> and <code>language</code>
877 attributes are ignored.
878 <li>Only supports limited statements. It must consist of zero or more
879 of statements looking similar to the following statements, possibly
880 introduced, followed, or separated by white space characters:
881 <ul>
882 <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
883 <li><code>var s = document.createElement ("script");
884 s.src = "<var>string</var>";
885 document.documentElement.appendChild (s);</code>
886 <li><code>w (document.documentElement.innerHTML);</code> (This statement
887 can be used to dump the document, even when the document has no
888 document element. The output format is the tree dump format used
889 in html5lib test data, not <abbr>HTML</abbr>.)
890 </ul>
891 Note that strings may be delimited by <code>'</code>s instead of
892 <code>"</code>s.
893 <li>Only supports <code>javascript:</code>
894 <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
895 <code>src</code> attribute of the <code>script</code> element. In addition,
896 the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
897 the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
898 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
899 string literals.
900 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
901 replaced by <code>document.open ()</code> call. In other word, delayed
902 (deferred or asynchronous) script executions and event firings might be
903 treated in a wrong way if a <code>document.open ()</code> invocation
904 is implicitly done by <code>document.write ()</code> in a delayed script.
905 </ul>
906
907 <p>For some reason, this parser does not work in browsers that do
908 not support JavaScript 1.5.
909
910 <!-- TODO: |src| attribute value should refer the value at the time
911 when it is inserted into the document, not the value when the script is
912 executed. Currently it does not matter, since we don't allow dynamic
913 modification to the |src| content/DOM attribute value yet. -->
914
915 </body>
916 </html>
917 <!-- $Date: 2008/04/29 02:50:00 $ -->
918 <!--
919
920 Copyright 2008 Wakaba <w@suika.fam.cx>
921
922 This program is free software; you can redistribute it and/or
923 modify it under the terms of the GNU General Public License
924 as published by the Free Software Foundation; either version 2
925 of the License, or (at your option) any later version.
926
927 This program is distributed in the hope that it will be useful,
928 but WITHOUT ANY WARRANTY; without even the implied warranty of
929 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
930 GNU General Public License for more details.
931
932 You should have received a copy of the GNU General Public License
933 along with this program; if not, write to the Free Software
934 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
935
936 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24