/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (show annotations) (download) (as text)
Tue Apr 29 02:50:00 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.13: +61 -11 lines
File MIME type: text/html
Support the other CDATA elements

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Live Scripting HTML Parser</title>
5 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6 <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7 title="GNU GPL2 or later">
8 <style>
9 h1 {
10 margin: 0;
11 font-size: 150%;
12 }
13 h2 {
14 margin: 0;
15 font-size: 100%;
16 }
17 p {
18 margin: 0 1em;
19 }
20 textarea {
21 width: 100%;
22 -width: 99%;
23 height: 10em;
24 }
25 output {
26 display: block;
27 font-family: monospace;
28 white-space: -moz-pre-wrap;
29 white-space: pre-wrap;
30 }
31 </style>
32 <script>
33 var delayedUpdater = 0;
34
35 function update () {
36 if (delayedUpdater) {
37 clearTimeout (delayedUpdater);
38 delayedUpdater = 0;
39 }
40 delayedUpdater = setTimeout (update2, 100);
41 } // update
42
43 function update2 () {
44 var v = document.sourceElement.value;
45 if (v != document.previousSourceText) {
46 document.previousSourceText = v;
47 document.links['permalink'].href
48 = location.pathname + '?s=' + encodeURIComponent (v);
49 document.links['ldvlink'].href
50 = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51 + encodeURIComponent (v);
52
53 document.logElement.textContent = '';
54 var p = new Parser (new InputStream (v));
55 var doc = p.doc;
56 p.parse ();
57
58 log (dumpTree (doc, ''));
59
60 if (p.hasAsyncScript) {
61 log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62 }
63 }
64 } // update2
65
66 var logIndentLevel = 0;
67 function log (s) {
68 for (var i = 0; i < logIndentLevel; i++) {
69 s = ' ' + s;
70 }
71 document.logElement.appendChild (document.createTextNode (s + "\n"));
72 } // log
73
74 function InputStream (s) {
75 this.s = s;
76 } // InputStream
77
78 function Parser (i, doc) {
79 this.parseMode = 'pcdata';
80 if (!doc) {
81 doc = new JSDocument (this);
82 doc.manakaiIsHTML = true;
83 }
84 this.doc = doc;
85 this.openElements = [doc];
86 this.input = i;
87 this.scriptsExecutedAfterParsing = [];
88 this.scriptsExecutedSoon = [];
89 this.scriptsExecutedAsynchronously = [];
90 } // Parser
91
92 Parser.prototype.getNextToken = function () {
93 var p = this;
94 var i = this.input;
95 if (this.parseMode == 'cdata') {
96 var tagName = this.endTagName;
97 var token;
98 if (p.insertionPoint <= 0) {
99 return {type: 'abort'};
100 }
101 i.s = i.s.replace (/^([^<]+)/,
102 function (s, t) {
103 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
104 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
105 var ip = p.insertionPoint;
106 p.insertionPoint = 0;
107 return t.substring (ip, t.length);
108 }
109 token = {type: 'char', value: t};
110 p.insertionPoint -= t.length;
111 return '';
112 });
113 if (token) return token;
114 var pattern = new RegExp ('^</' + tagName + '>', 'i');
115 i.s = i.s.replace (pattern, function (s) {
116 if (p.insertionPoint < s.length) {
117 token = {type: 'abort'};
118 return s;
119 }
120 token = {type: 'end-tag', value: tagName};
121 p.insertionPoint -= s.length;
122 return '';
123 });
124 if (token) return token;
125 var m;
126 if ((p.insertionPoint < ('</' + tagName).length) &&
127 (m = i.s.match (/^<\/([A-Za-z]+)/))) {
128 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
129 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
130 return {type: 'abort'};
131 }
132 }
133 i.s = i.s.replace (/^</,
134 function (s) {
135 token = {type: 'char', value: s};
136 p.insertionPoint -= s.length;
137 return '';
138 });
139 if (token) return token;
140 return {type: 'eof'};
141 }
142
143 var token;
144 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
145 if (p.insertionPoint < s.length ||
146 (p.insertionPoint <= s.length &&
147 s.substring (s.length - 1, 1) != '>')) {
148 token = {type: 'abort'};
149 return s;
150 }
151 token = {type: 'end-tag', value: e.toLowerCase ()};
152 p.insertionPoint -= s.length;
153 return '';
154 });
155 if (token) return token;
156 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
157 if (p.insertionPoint < s.length ||
158 (p.insertionPoint <= s.length &&
159 s.substring (s.length - 1, 1) != '>')) {
160 token = {type: 'abort'};
161 return s;
162 }
163 var tagName;
164 var attrs = {};
165 e = e.replace (/^[\S]+/, function (v) {
166 tagName = v.toLowerCase ();
167 return '';
168 });
169 while (true) {
170 var m = false;
171 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
172 function (x, attrName, attrValue1, attrValue2, attrValue3) {
173 v = attrValue1 || attrValue2 || attrValue3;
174 v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
175 .replace (/&amp;/g, '&');
176 attrs[attrName.toLowerCase ()] = v;
177 m = true;
178 return '';
179 });
180 if (!m) break;
181 }
182 if (e.length) {
183 log ('Broken start tag: "' + e + '"');
184 }
185 token = {type: 'start-tag', value: tagName, attrs: attrs};
186 p.insertionPoint -= s.length;
187 return '';
188 });
189 if (token) return token;
190 if (p.insertionPoint <= 0) {
191 return {type: 'abort'};
192 }
193 i.s = i.s.replace (/^[^<]+/, function (s) {
194 if (p.insertionPoint < s.length) {
195 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
196 var ip = p.insertionPoint;
197 p.insertionPoint = 0;
198 return s.substring (ip, s.length);
199 }
200 token = {type: 'char', value: s};
201 p.insertionPoint -= s.length;
202 return '';
203 });
204 if (token) return token;
205 i.s = i.s.replace (/^[\s\S]/, function (s) {
206 token = {type: 'char', value: s};
207 p.insertionPoint -= s.length;
208 return '';
209 });
210 if (token) return token;
211 return {type: 'eof'};
212 } // getNextToken
213
214 Parser.prototype.parse = function () {
215 logIndentLevel++;
216 log ('parse: start');
217
218 while (true) {
219 var token = this.getNextToken ();
220 log ('token: ' + token.type + ' "' + token.value + '"');
221
222 if (token.type == 'start-tag') {
223 if (token.value == 'script') {
224 // 1. Create an element for the token in the HTML namespace.
225 var el = new JSElement (this.doc, token.value);
226 if (token.attrs.async != null) el.async = true;
227 if (token.attrs.defer != null) el.defer = true;
228 if (token.attrs.src != null) el.src = token.attrs.src;
229
230 // 2. Mark the element as being "parser-inserted".
231 el.manakaiParserInserted = true;
232
233 // 3. Switch the tokeniser's content model flag to the CDATA state.
234 this.parseMode = 'cdata';
235 this.endTagName = 'script';
236
237 // 4.1. Collect all the character tokens.
238 while (true) {
239 var token = this.getNextToken ();
240 log ('token: ' + token.type + ' "' + token.value + '"');
241
242 if (token.type == 'char') {
243 // 5. Append a single Text node to the script element node.
244 el.manakaiAppendText (token.value);
245
246 // 4.2. Until it returns a token that is not a character token, or
247 // until it stops tokenising.
248 } else if (token.type == 'eof' ||
249 token.type == 'end-tag' ||
250 token.type == 'abort') {
251 // 6. Switched back to the PCDATA state.
252 this.parseMode = 'pcdata';
253
254 // 7.1. If the next token is not an end tag token with ...
255 if (!(token.type == 'end-tag' && token.value == 'script')) {
256 // 7.2. This is a parse error.
257 log ('Parse error: no </' + 'script>');
258
259 // 7.3. Mark the script element as "already executed".
260 el.manakaiAlreadyExecuted = true;
261 } else {
262 // 7.4. Ignore it.
263 //
264 }
265 break;
266 }
267 }
268
269 // 8.1. If the parser were originally created for the ...
270 if (this.fragmentParsingMode) {
271 // 8.2. Mark the script element as "already executed" and ...
272 el.alreadyExecuted = true;
273 continue;
274 }
275
276 // 9.1. Let the old insertion point have the same value as the ...
277 var oldInsertionPoint = this.insertionPoint;
278 // 9.2. Let the insertion point be just before the next input ...
279 this.setInsertionPoint (0);
280
281 // 10. Append the new element to the current node.
282 this.openElements[this.openElements.length - 1].appendChild (el);
283
284 // 11. Let the insertion point have the value of the old ...
285
286 oldInsertionPoint += this.insertionPoint;
287 this.setInsertionPoint (oldInsertionPoint);
288
289 // 12. If there is a script that will execute as soon as ...
290 while (this.scriptExecutedWhenParserResumes) {
291 // 12.1. If the tree construction stage is being called reentrantly
292 if (this.reentrant) {
293 log ('parse: abort (reentrance)');
294 logIndentLevel--;
295 return;
296
297 // 12.2. Otherwise
298 } else {
299 // 1.
300 var script = this.scriptExecutedWhenParserResumes;
301 this.scriptExecutedWhenParserResumes = null;
302
303 // 2. Pause until the script has completed loading.
304 //
305
306 // 3. Let the insertion point to just before the next input char.
307 this.setInsertionPoint (0);
308
309 // 4. Execute the script.
310 executeScript (this.doc, script);
311
312 // 5. Let the insertion point be undefined again.
313 this.setInsertionPoint (undefined);
314
315 // 6. If there is once again a script that will execute ...
316 //
317 }
318 }
319 } else if (token.value == 'style' ||
320 token.value == 'noscript' ||
321 token.value == 'xmp') {
322 // 1. Create an element for the token in the HTML namespace.
323 var el = new JSElement (this.doc, token.value);
324
325 // 2. Append the new element to the current node.
326 this.openElements[this.openElements.length - 1].appendChild (el);
327
328 // 3. Switch the tokeniser's content model flag to the CDATA state.
329 this.parseMode = 'cdata';
330 this.endTagName = token.value;
331
332 // 4.1. Collect all the character tokens.
333 while (true) {
334 var token = this.getNextToken ();
335 log ('token: ' + token.type + ' "' + token.value + '"');
336
337 if (token.type == 'char') {
338 // 5. Append a single Text node to the script element node.
339 el.manakaiAppendText (token.value);
340
341 // 4.2. Until it returns a token that is not a character token, or
342 // until it stops tokenising.
343 } else if (token.type == 'eof' ||
344 token.type == 'end-tag' ||
345 token.type == 'abort') {
346 // 6. Switched back to the PCDATA state.
347 this.parseMode = 'pcdata';
348
349 // 7.1. If the next token is not an end tag token with ...
350 if (!(token.type == 'end-tag' &&
351 token.value == this.endTagName)) {
352 // 7.2. This is a parse error.
353 log ('Parse error: no </' + this.endTagName + '>');
354
355 // 7.3. Mark the script element as "already executed".
356 el.manakaiAlreadyExecuted = true;
357 } else {
358 // 7.4. Ignore it.
359 //
360 }
361 break;
362 }
363 }
364 } else {
365 var el = new JSElement (this.doc, token.value);
366 this.openElements[this.openElements.length - 1].appendChild (el);
367 this.openElements.push (el);
368 }
369 } else if (token.type == 'end-tag') {
370 if (this.openElements[this.openElements.length - 1].localName ==
371 token.value) {
372 this.openElements.pop ();
373 } else {
374 log ('parse error: unmatched end tag: ' + token.value);
375 }
376 } else if (token.type == 'char') {
377 this.openElements[this.openElements.length - 1].manakaiAppendText
378 (token.value);
379 } else if (token.type == 'eof') {
380 break;
381 } else if (token.type == 'abort') {
382 log ('parse: abort');
383 logIndentLevel--;
384 return;
385 }
386 }
387
388 log ('stop parsing');
389
390 // readyState = 'interactive'
391
392 // "When a script completes loading" rules start applying.
393
394 while (this.scriptsExecutedSoon.length > 0 ||
395 this.scriptsExecutedAsynchronously.length > 0) {
396 // Handle "list of scripts that will execute as soon as possible".
397 while (this.scriptsExecutedSoon.length > 0) {
398 var e = this.scriptsExecutedSoon.shift ();
399
400 // If it has completed loading
401 log ('Execute an external script not inserted by parser...');
402 executeScript (this.doc, e);
403
404 // NOTE: It MAY be executed before the end of the parsing, according
405 // to the spec.
406 this.hasAsyncScript = true;
407 }
408
409 // Handle "list of scripts that will execute asynchronously".
410 while (this.scriptsExecutedAsynchronously.length > 0) {
411 var e = this.scriptsExecutedAsynchronously.shift ();
412
413 // Step 1.
414 // We assume that all scripts have been loaded at this time.
415
416 // Step 2.
417 log ('Execute an asynchronous script...');
418 executeScript (this.doc, e);
419
420 // Step 3.
421 //
422
423 // Step 4.
424 //
425
426 this.hasAsyncScript = true;
427 }
428 }
429
430 // Handle "list of scripts that will execute when the document has finished
431 // parsing".
432 var list = this.scriptsExecutedAfterParsing;
433 while (list.length > 0) {
434 // TODO: break unless completed loading
435
436 // Step 1.
437 //
438
439 // Step 2. and Step 3.
440 log ('Executing a |defer|red script...');
441 executeScript (this.doc, list.shift ());
442
443 // Step 4.
444 }
445
446 log ('DOMContentLoaded event fired');
447
448 // "delays the load event" things has completed:
449 // readyState = 'complete'
450 log ('load event fired');
451
452 logIndentLevel--;
453 } // parse
454
455 Parser.prototype.setInsertionPoint = function (ip) {
456 if (ip == undefined || ip == null || isNaN (ip)) {
457 log ('insertion point: set to undefined');
458 this.insertionPoint = undefined;
459 } else if (ip == this.input.s.length) {
460 log ('insertion point: end of file');
461 this.insertionPoint = ip;
462 } else {
463 log ('insertion point: set to ' + ip +
464 ' (before "' + this.input.s.substring (0, 10) + '")');
465 this.insertionPoint = ip;
466 }
467 }; // setInsertionPoint
468
469 function JSDocument (p) {
470 this.childNodes = [];
471 this._parser = p;
472 } // JSDocument
473
474 function JSElement (doc, localName) {
475 this.localName = localName;
476 this.ownerDocument = doc;
477 this.childNodes = [];
478 } // JSElement
479
480 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
481 function (e) {
482 this.childNodes.push (e);
483 e.parentNode = this;
484
485 if (e.localName == 'script') {
486 logIndentLevel++;
487 log ('Running a script: start');
488
489 var doc = this.ownerDocument || this;
490 var p = doc._parser;
491
492 // 1. Script type
493 //
494
495 // 2.1. If scripting is disabled
496 //
497 // 2.2. If the script element was created by an XML ... innerHTML ...
498 //
499 // 2.3. If the user agent does not support the scripting language ...
500 //
501 // 2.4. If the script element has its "already executed" flag set
502 if (e.manakaiAlreadyExecuted) {
503 // 2.5. Abort these steps at this point.
504 log ('Running a script: aborted');
505 logIndentLevel--;
506 return e;
507 }
508
509 // 3. Set the element's "already executed" flag.
510 e.manakaiAlreadyExecuted = true;
511
512 // 4. If the element has a src attribute, then a load for ...
513 // TODO: load an external resource
514
515 // 5. The first of the following options:
516
517 // 5.1.
518 if (/* TODO: If the document is still being parsed && */
519 e.defer && !e.async) {
520 p.scriptsExecutedAfterParsing.push (e);
521 log ('Running a script: aborted (defer)');
522 } else if (e.async && e.src != null) {
523 p.scriptsExecutedAsynchronously.push (e);
524 log ('Running a script: aborted (async src)');
525 } else if (e.async && e.src == null &&
526 p.scriptsExecutedAsynchronously.length > 0) {
527 p.scriptsExecutedAsynchronously.push (e);
528 log ('Running a script: aborted (async)');
529 // ISSUE: What is the difference with the case above?
530 } else if (e.src != null && e.manakaiParserInserted) {
531 if (p.scriptExecutedWhenParserResumes) {
532 log ('Error: There is a script that will execute as soon as the parser resumes.');
533 }
534 p.scriptExecutedWhenParserResumes = e;
535 log ('Running a script: aborted (src parser-inserted)');
536 } else if (e.src != null) {
537 p.scriptsExecutedSoon.push (e);
538 log ('Running a script: aborted (src)');
539 } else {
540 executeScript (doc, e); // even if other scripts are already executing.
541 }
542
543 log ('Running a script: end');
544 logIndentLevel--;
545 }
546
547 return e;
548 }; // appendChild
549
550 function executeScript (doc, e) {
551 log ('executing a script block: start');
552
553 var s;
554 if (e.src != null) {
555 s = getExternalScript (e.src);
556
557 // If the load resulted in an error, then ... firing an error event ...
558 if (s == null) {
559 log ('error event fired at the script element');
560 return;
561 }
562
563 log ('External script loaded: "' + s + '"');
564 } else {
565 s = e.text;
566 }
567
568 // If the load was successful
569 log ('load event fired at the script element');
570
571 if (true) {
572 // Scripting is enabled, Document.designMode is disabled,
573 // Document is the active document in its browsing context
574
575 parseAndRunScript (doc, s);
576 }
577
578 log ('executing a script block: end');
579 } // executeScript
580
581 function getExternalScript (uri) {
582 if (uri.match (/^javascript:/i)) {
583 var m;
584 if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
585 if (m[1]) {
586 return unescapeJSLiteral (m[1]);
587 } else if (m[2]) {
588 return unescapeJSLiteral (m[2]);
589 } else {
590 return null;
591 }
592 } else {
593 log ('Complex javascript: URI is not supported: <' + uri + '>');
594 return null;
595 }
596 } else {
597 log ('URI scheme not supported: <' + uri + '>');
598 return null;
599 }
600 } // getExternalScript
601
602 function parseAndRunScript (doc, s) {
603 while (true) {
604 var matched = false;
605 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
606 matched = true;
607 var args = [];
608 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
609 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
610 return '';
611 });
612 doc.write.apply (doc, args);
613 return '';
614 });
615 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
616 function (s, t, u) {
617 matched = true;
618 var args = [unescapeJSLiteral (t ? t : u)];
619 doc._insertExternalScript.apply (doc, args);
620 return '';
621 });
622 if (s == '') break;
623 if (!matched) {
624 log ('Script parse error: "' + s + '"');
625 break;
626 }
627 }
628 } // parseAndRunScript
629
630 function unescapeJSLiteral (s) {
631 return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
632 return String.fromCharCode (parseInt ('0x' + v));
633 });
634 } // unescapeJSLiteral
635
636 function JSText (data) {
637 this.data = data;
638 } // JSText
639
640 JSDocument.prototype.manakaiAppendText =
641 JSElement.prototype.manakaiAppendText =
642 function (s) {
643 if (this.childNodes.length > 0 &&
644 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
645 this.childNodes[this.childNodes.length - 1].data += s;
646 } else {
647 this.childNodes.push (new JSText (s));
648 }
649 }; // manakaiAppendText
650
651 JSDocument.prototype.open = function () {
652 // Two or fewer arguments
653
654 // Step 1.
655 var type = arguments[0] || 'text/html';
656
657 // Step 2.
658 var replace = arguments[1] == 'replace';
659
660 // Step 3.
661 if (this._parser &&
662 !this._parser.scriptCreated &&
663 this._parser.input.insertionPoint != undefined) {
664 log ('document.open () in parsing mode is ignored');
665 return this;
666 }
667
668 // Step 4.
669 log ('onbeforeunload event fired');
670 log ('onunload event fired');
671
672 // Step 5.
673 if (this._parser) {
674 // Discard the parser.
675 }
676
677 // Step 6.
678 log ('document cleared by document.open ()');
679 this.childNodes = [];
680
681 // Step 7.
682 this._parser = new Parser (new InputStream (''), this);
683 this._parser.scriptCreated = true;
684
685 // Step 8.
686 this.manakaiIsHTML = true;
687
688 // Step 9.
689 // If not text/html, ...
690
691 // Step 10.
692 if (!replace) {
693 // History
694 }
695
696 // Step 11.
697 this._parser.setInsertionPoint (this._parser.input.s.length);
698
699 // Step 12.
700 return this;
701 }; // document.open
702
703 JSDocument.prototype.write = function () {
704 logIndentLevel++;
705
706 var p = this._parser;
707
708 // 1. If the insertion point is undefined, the open() method must be ...
709 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
710 this.open ();
711 p = this._parser;
712 }
713
714 // 2. ... inserted into the input stream just before the insertion point.
715 var s = Array.join (arguments, '');
716 log ('document.write: insert "' + s + '"' +
717 ' before "' +
718 p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
719 p.input.s = p.input.s.substring (0, p.insertionPoint) + s
720 + p.input.s.substring (p.insertionPoint, p.input.s.length);
721 p.insertionPoint += s.length;
722
723 // 3. If there is a script that will execute as soon as the parser resumes
724 if (p.scriptExecutedAfterParserResumes) {
725 log ('document.write: processed later (there is an unprocessed <script src>)');
726 logIndentLevel--;
727 return;
728 }
729
730 // 4. Process the characters that were inserted, ...
731 var originalReentrant = p.reentrant;
732 p.reentrant = true;
733 p.parse ();
734 p.reentrant = originalReentrant;
735 // TODO: "Abort the processing of any nested invokations of the tokeniser,
736 // yielding control back to the caller." (<script> parsing). Do we need
737 // to do something here?
738
739 // 5. Return
740 log ('document.write: return');
741
742 logIndentLevel--;
743 return;
744 }; // document.write
745
746 JSDocument.prototype._insertExternalScript = function (uri) {
747 var s = new JSElement (this, 'script');
748 s.src = uri;
749 this.documentElement.appendChild (s);
750 }; // _insertExternalScript
751
752 JSDocument.prototype.__defineGetter__ ('documentElement', function () {
753 var cn = this.childNodes;
754 for (var i = 0; i < cn.length; i++) {
755 if (cn[i] instanceof JSElement) {
756 return cn[i]
757 }
758 }
759 return null;
760 });
761
762 JSElement.prototype.__defineGetter__ ('text', function () {
763 var r = '';
764 for (var i = 0; i < this.childNodes.length; i++) {
765 if (this.childNodes[i] instanceof JSText) {
766 r += this.childNodes[i].data;
767 }
768 }
769 return r;
770 });
771
772 function dumpTree (n, indent) {
773 var r = '';
774 for (var i = 0; i < n.childNodes.length; i++) {
775 var node = n.childNodes[i];
776 if (node instanceof JSElement) {
777 r += '| ' + indent + node.localName + '\n';
778 if (node.async) r += '| ' + indent + ' async=""\n';
779 if (node.defer) r += '| ' + indent + ' defer=""\n';
780 if (node.src != null) {
781 r += '| ' + indent + ' src="' + node.src + '"\n';
782 }
783 r += dumpTree (node, indent + ' ');
784 } else if (node instanceof JSText) {
785 r += '| ' + indent + '"' + node.data + '"\n';
786 } else {
787 r += '| ' + indent + node + '\n';
788 }
789 }
790 return r;
791 } // dumpTree
792 </script>
793 </head>
794 <body onload="
795 document.sourceElement = document.getElementsByTagName ('textarea')[0];
796
797 var q = location.search;
798 if (q != null) {
799 q = q.substring (1).split (/;/);
800 for (var i = 0; i < q.length; i++) {
801 var v = q[i].split (/=/, 2);
802 v[0] = decodeURIComponent (v[0]);
803 v[1] = decodeURIComponent (v[1] || '');
804 if (v[0] == 's') {
805 document.sourceElement.value = v[1];
806 }
807 }
808 }
809
810 document.logElement = document.getElementsByTagName ('output')[0];
811 update ();
812 ">
813 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
814 Parser</h1>
815
816 <h2>Markup to test
817 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
818 <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
819 id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
820 Viewer</a>)</h2>
821 <p>
822 <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
823 &lt;head>&lt;/head>&lt;body>
824 &lt;p>
825 &lt;script>
826 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
827 &lt;/script>
828 &lt;p>
829 </textarea>
830
831 <h2 id=log>Log</h2>
832 <p><output></output>
833
834 <h2 id=notes>Notes</h2>
835
836 <p>This is a <em>simplified</em> implementation of
837 <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
838 Parsing Algorithm</a>. It only implements script-related part of the
839 algorithm. Especially, this parser:
840 <ul>
841 <li>Does not support <code>DOCTYPE</code> and comment tokens.
842 <li>Does not support entities except for <code>&amp;quot;</code>,
843 <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
844 <code>src</code> attribute value.
845 <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
846 algorithm, and so on.
847 <li>Does not raise parse errors for invalid attribute specifications in start
848 or end tags.
849 <li>Does not support PCDATA elements (<code>title</code> and
850 <code>textarea</code>).
851 <li>Does not strip the first newline in <code>pre</code> elements.
852 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
853 in <code>script</code> element.
854 <li>Does not support foreign (SVG or MathML) elements.
855 <li>Only supports <code>script</code> <code>type</code>
856 <code>text/javascript</code>. <code>type</code> and <code>language</code>
857 attributes are ignored.
858 <li>Only supports limited statements. It must consist of zero or more
859 of statements looking similar to the following statements, possibly
860 introduced, followed, or separated by white space characters:
861 <ul>
862 <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
863 <li><code>var s = document.createElement ("script");
864 s.src = "<var>string</var>";
865 document.documentElement.appendChild (s);</code>
866 </ul>
867 Note that strings may be delimited by <code>'</code>s instead of
868 <code>"</code>s.
869 <li>Only supports <code>javascript:</code>
870 <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
871 <code>src</code> attribute of the <code>script</code> element. In addition,
872 the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
873 the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
874 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
875 string literals.
876 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
877 replaced by <code>document.open ()</code> call. In other word, delayed
878 (deferred or asynchronous) script executions and event firings might be
879 treated in a wrong way if a <code>document.open ()</code> invocation
880 is implicitly done by <code>document.write ()</code> in a delayed script.
881 </ul>
882
883 <p>For some reason, this parser does not work in browsers that do
884 not support JavaScript 1.5.
885
886 <!-- TODO: |src| attribute value should refer the value at the time
887 when it is inserted into the document, not the value when the script is
888 executed. Currently it does not matter, since we don't allow dynamic
889 modification to the |src| content/DOM attribute value yet. -->
890
891 </body>
892 </html>
893 <!-- $Date: 2008/01/19 06:47:07 $ -->
894 <!--
895
896 Copyright 2008 Wakaba <w@suika.fam.cx>
897
898 This program is free software; you can redistribute it and/or
899 modify it under the terms of the GNU General Public License
900 as published by the Free Software Foundation; either version 2
901 of the License, or (at your option) any later version.
902
903 This program is distributed in the hope that it will be useful,
904 but WITHOUT ANY WARRANTY; without even the implied warranty of
905 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
906 GNU General Public License for more details.
907
908 You should have received a copy of the GNU General Public License
909 along with this program; if not, write to the Free Software
910 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
911
912 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24