/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download) (as text)
Sun Apr 27 09:16:11 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +16 -11 lines
File MIME type: text/html
Attribute parsing bug fixed

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.1 <style>
6 wakaba 1.7 h1, h2 {
7     margin: 0;
8     font-size: 100%;
9     }
10     p, pre {
11     margin: 0;
12     }
13 wakaba 1.1 textarea {
14 wakaba 1.7 width: 100%;
15     -width: 99%;
16     height: 10em;
17 wakaba 1.1 }
18     output {
19     display: block;
20     font-family: monospace;
21 wakaba 1.4 white-space: -moz-pre-wrap;
22     white-space: pre-wrap;
23 wakaba 1.1 }
24     </style>
25     <script>
26 wakaba 1.7 var delayedUpdater = 0;
27    
28 wakaba 1.1 function update () {
29 wakaba 1.7 if (delayedUpdater) {
30     clearTimeout (delayedUpdater);
31     delayedUpdater = 0;
32     }
33     delayedUpdater = setTimeout (update2, 100);
34     } // update
35    
36     function update2 () {
37     var v = document.sourceElement.value;
38 wakaba 1.8 if (v != document.previousSourceText) {
39     document.previousSourceText = v;
40     document.links['permalink'].href
41     = location.pathname + '?s=' + encodeURIComponent (v);
42     document.links['ldvlink'].href
43     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44     + encodeURIComponent (v);
45    
46     document.logElement.textContent = '';
47     var p = new Parser (new InputStream (v));
48     var doc = p.doc;
49     p.parse ();
50     log (dumpTree (doc, ''));
51     }
52 wakaba 1.7 } // update2
53 wakaba 1.1
54 wakaba 1.6 var logIndentLevel = 0;
55 wakaba 1.1 function log (s) {
56 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
57     s = ' ' + s;
58     }
59 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
60     } // log
61    
62     function InputStream (s) {
63     this.s = s;
64     } // InputStream
65    
66 wakaba 1.4 function Parser (i, doc) {
67 wakaba 1.1 this.parseMode = 'pcdata';
68 wakaba 1.4 if (!doc) {
69     doc = new JSDocument (this);
70     doc.manakaiIsHTML = true;
71     }
72     this.doc = doc;
73     this.openElements = [doc];
74 wakaba 1.8 this.input = i;
75 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
76 wakaba 1.1 } // Parser
77    
78 wakaba 1.2 Parser.prototype.getNextToken = function () {
79 wakaba 1.3 var p = this;
80 wakaba 1.8 var i = this.input;
81 wakaba 1.1 if (this.parseMode == 'script') {
82     var token;
83 wakaba 1.3 if (p.insertionPoint <= 0) {
84     return {type: 'abort'};
85     }
86 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
87 wakaba 1.1 function (s, t) {
88 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
89     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
90     var ip = p.insertionPoint;
91     p.insertionPoint = 0;
92 wakaba 1.4 return t.substring (ip, t.length);
93 wakaba 1.3 }
94 wakaba 1.1 token = {type: 'char', value: t};
95 wakaba 1.4 p.insertionPoint -= t.length;
96     return '';
97 wakaba 1.1 });
98     if (token) return token;
99 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
100 wakaba 1.4 if (p.insertionPoint < s.length) {
101 wakaba 1.3 token = {type: 'abort'};
102     return s;
103     }
104 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
105 wakaba 1.3 p.insertionPoint -= s.length;
106 wakaba 1.1 return '';
107     });
108     if (token) return token;
109 wakaba 1.5 var m;
110     if ((p.insertionPoint < '</script'.length) &&
111     (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
112     var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
113     if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
114     return {type: 'abort'};
115     }
116     }
117 wakaba 1.4 i.s = i.s.replace (/^</,
118     function (s) {
119     token = {type: 'char', value: s};
120     p.insertionPoint -= s.length;
121     return '';
122     });
123     if (token) return token;
124 wakaba 1.1 return {type: 'eof'};
125     }
126    
127     var token;
128 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
129     if (p.insertionPoint < s.length ||
130     (p.insertionPoint <= s.length &&
131     s.substring (s.length - 1, 1) != '>')) {
132 wakaba 1.3 token = {type: 'abort'};
133     return s;
134     }
135 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
136 wakaba 1.3 p.insertionPoint -= s.length;
137 wakaba 1.1 return '';
138     });
139     if (token) return token;
140 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
141     if (p.insertionPoint < s.length ||
142     (p.insertionPoint <= s.length &&
143     s.substring (s.length - 1, 1) != '>')) {
144 wakaba 1.3 token = {type: 'abort'};
145     return s;
146     }
147 wakaba 1.4 var tagName;
148     var attrs = {};
149     e = e.replace (/^[\S]+/, function (v) {
150     tagName = v.toLowerCase ();
151     return '';
152     });
153 wakaba 1.9 while (true) {
154     var m = false;
155     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
156     function (x, attrName, attrValue1, attrValue2, attrValue3) {
157     v = attrValue1 || attrValue2 || attrValue3;
158     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
159     .replace (/&amp;/g, '&');
160     attrs[attrName.toLowerCase ()] = v;
161     m = true;
162     return '';
163     });
164     if (!m) break;
165     }
166 wakaba 1.6 if (e.length) {
167     log ('Broken start tag: "' + e + '"');
168     }
169 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
170 wakaba 1.3 p.insertionPoint -= s.length;
171 wakaba 1.1 return '';
172     });
173     if (token) return token;
174 wakaba 1.3 if (p.insertionPoint <= 0) {
175     return {type: 'abort'};
176     }
177 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
178 wakaba 1.3 if (p.insertionPoint < s.length) {
179     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
180     var ip = p.insertionPoint;
181     p.insertionPoint = 0;
182     return s.substring (ip, s.length);
183     }
184 wakaba 1.1 token = {type: 'char', value: s};
185 wakaba 1.3 p.insertionPoint -= s.length;
186 wakaba 1.1 return '';
187     });
188     if (token) return token;
189     i.s = i.s.replace (/^[\s\S]/, function (s) {
190     token = {type: 'char', value: s};
191 wakaba 1.3 p.insertionPoint -= s.length;
192 wakaba 1.1 return '';
193     });
194     if (token) return token;
195     return {type: 'eof'};
196     } // getNextToken
197    
198 wakaba 1.2 Parser.prototype.parse = function () {
199 wakaba 1.6 logIndentLevel++;
200     log ('parse: start');
201 wakaba 1.1
202     while (true) {
203 wakaba 1.2 var token = this.getNextToken ();
204 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
205    
206     if (token.type == 'start-tag') {
207     if (token.value == 'script') {
208 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
209     var el = new JSElement (this.doc, token.value);
210 wakaba 1.4 if (token.attrs.async != null) el.async = true;
211     if (token.attrs.defer != null) el.defer = true;
212     if (token.attrs.src != null) el.src = token.attrs.src;
213 wakaba 1.2
214     // 2. Mark the element as being "parser-inserted".
215     el.manakaiParserInserted = true;
216    
217     // 3. Switch the tokeniser's content model flag to the CDATA state.
218 wakaba 1.1 this.parseMode = 'script';
219    
220 wakaba 1.2 // 4.1. Collect all the character tokens.
221 wakaba 1.1 while (true) {
222 wakaba 1.2 var token = this.getNextToken ();
223 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
224    
225     if (token.type == 'char') {
226 wakaba 1.2 // 5. Append a single Text node to the script element node.
227 wakaba 1.1 el.manakaiAppendText (token.value);
228 wakaba 1.2
229     // 4.2. Until it returns a token that is not a character token, or
230 wakaba 1.3 // until it stops tokenising.
231 wakaba 1.1 } else if (token.type == 'eof' ||
232 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
233     token.type == 'abort') {
234 wakaba 1.2 // 6. Switched back to the PCDATA state.
235 wakaba 1.1 this.parseMode = 'pcdata';
236 wakaba 1.2
237     // 7.1. If the next token is not an end tag token with ...
238     if (token.type != 'end-tag') {
239     // 7.2. This is a parse error.
240     log ('Parse error: no </' + 'script>');
241    
242     // 7.3. Mark the script element as "already executed".
243     el.manakaiAlreadyExecuted = true;
244     } else {
245     // 7.4. Ignore it.
246     //
247     }
248 wakaba 1.1 break;
249     }
250     }
251    
252 wakaba 1.2 // 8.1. If the parser were originally created for the ...
253     if (this.fragmentParsingMode) {
254     // 8.2. Mark the script element as "already executed" and ...
255     el.alreadyExecuted = true;
256     continue;
257     }
258    
259     // 9.1. Let the old insertion point have the same value as the ...
260 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
261 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
262 wakaba 1.3 this.setInsertionPoint (0);
263 wakaba 1.2
264     // 10. Append the new element to the current node.
265 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
266 wakaba 1.2
267     // 11. Let the insertion point have the value of the old ...
268 wakaba 1.7
269 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
270 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
271 wakaba 1.2
272     // 12. If there is a script that will execute as soon as ...
273 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
274     // 12.1. If the tree construction stage is being called reentrantly
275     if (this.reentrant) {
276     log ('parse: abort (reentrance)');
277     logIndentLevel--;
278     return;
279    
280     // 12.2. Otherwise
281     } else {
282     // 1.
283     var script = this.scriptExecutedWhenParserResumes;
284     this.scriptExecutedWhenParserResumes = null;
285    
286     // 2. Pause until the script has completed loading.
287     //
288    
289     // 3. Let the insertion point to just before the next input char.
290     this.setInsertionPoint (0);
291    
292     // 4. Execute the script.
293     executeScript (this.doc, script);
294    
295     // 5. Let the insertion point be undefined again.
296     this.setInsertionPoint (undefined);
297 wakaba 1.2
298 wakaba 1.6 // 6. If there is once again a script that will execute ...
299     //
300     }
301     }
302 wakaba 1.1 } else {
303 wakaba 1.2 var el = new JSElement (this.doc, token.value);
304 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
305     this.openElements.push (el);
306     }
307     } else if (token.type == 'end-tag') {
308     if (this.openElements[this.openElements.length - 1].localName ==
309     token.value) {
310     this.openElements.pop ();
311     } else {
312     log ('parse error: unmatched end tag: ' + token.value);
313     }
314 wakaba 1.3 } else if (token.type == 'char') {
315     this.openElements[this.openElements.length - 1].manakaiAppendText
316     (token.value);
317 wakaba 1.1 } else if (token.type == 'eof') {
318     break;
319 wakaba 1.3 } else if (token.type == 'abort') {
320     log ('parse: abort');
321 wakaba 1.6 logIndentLevel--;
322 wakaba 1.3 return;
323 wakaba 1.1 }
324     }
325    
326     log ('stop parsing');
327 wakaba 1.4
328     // readyState = 'interactive'
329    
330     // "When a script completes loading" rules start applying.
331    
332     // TODO: Handles "list of scripts that will execute as soon as possible"
333     // and "list of scripts that will execute asynchronously"
334    
335     // Handle "list of scripts that will execute when the document has finished
336     // parsing".
337     var list = this.scriptsExecutedAfterParsing;
338     while (list.length > 0) {
339     // TODO: break unless completed loading
340    
341     // Step 1.
342     //
343    
344     // Step 2. and Step 3.
345     log ('Executing a |defer|red script...');
346     executeScript (this.doc, list.shift ());
347    
348     // Step 4.
349     }
350    
351     log ('DOMContentLoaded event fired');
352    
353     // "delays tha load event" things has completed:
354     // readyState = 'complete'
355     log ('load event fired');
356 wakaba 1.6
357     logIndentLevel--;
358 wakaba 1.1 } // parse
359    
360 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
361     if (ip == undefined || ip == null || isNaN (ip)) {
362     log ('insertion point: set to undefined');
363     this.insertionPoint = undefined;
364 wakaba 1.8 } else if (ip == this.input.s.length) {
365 wakaba 1.4 log ('insertion point: end of file');
366     this.insertionPoint = ip;
367 wakaba 1.3 } else {
368     log ('insertion point: set to ' + ip +
369 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
370 wakaba 1.3 this.insertionPoint = ip;
371     }
372     }; // setInsertionPoint
373    
374 wakaba 1.2 function JSDocument (p) {
375 wakaba 1.1 this.childNodes = [];
376 wakaba 1.2 this._parser = p;
377 wakaba 1.1 } // JSDocument
378    
379 wakaba 1.2 function JSElement (doc, localName) {
380 wakaba 1.1 this.localName = localName;
381 wakaba 1.2 this.ownerDocument = doc;
382 wakaba 1.1 this.childNodes = [];
383     } // JSElement
384    
385     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
386     function (e) {
387     this.childNodes.push (e);
388     e.parentNode = this;
389 wakaba 1.2
390     if (e.localName == 'script') {
391 wakaba 1.6 logIndentLevel++;
392 wakaba 1.4 log ('Running a script: start');
393 wakaba 1.2
394 wakaba 1.3 var doc = this.ownerDocument || this;
395 wakaba 1.2 var p = doc._parser;
396    
397     // 1. Script type
398     //
399    
400     // 2.1. If scripting is disabled
401     //
402     // 2.2. If the script element was created by an XML ... innerHTML ...
403     //
404     // 2.3. If the user agent does not support the scripting language ...
405     //
406     // 2.4. If the script element has its "already executed" flag set
407     if (e.manakaiAlreadyExecuted) {
408     // 2.5. Abort these steps at this point.
409 wakaba 1.4 log ('Running a script: aborted');
410 wakaba 1.6 logIndentLevel--;
411 wakaba 1.2 return e;
412     }
413    
414     // 3. Set the element's "already executed" flag.
415     e.manakaiAlreadyExecuted = true;
416    
417     // 4. If the element has a src attribute, then a load for ...
418     // TODO: load an external resource
419    
420     // 5. The first of the following options:
421    
422     // 5.1.
423     if (/* TODO: If the document is still being parsed && */
424     e.defer && !e.async) {
425 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
426     log ('Running a script: aborted (defer)');
427 wakaba 1.2 } else if (e.async && e.src != null) {
428     // TODO
429     } else if (e.async && e.src == null
430     /* && list of scripts that will execute asynchronously is not empty */) {
431     // TODO
432     } else if (e.src != null && e.manakaiParserInserted) {
433 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
434     log ('Error: There is a script that will execute as soon as the parser resumes.');
435     }
436     p.scriptExecutedWhenParserResumes = e;
437     log ('Running a script: aborted (src)');
438 wakaba 1.2 } else if (e.src != null) {
439     // TODO
440     } else {
441     executeScript (doc, e); // even if other scripts are already executing.
442     }
443    
444 wakaba 1.4 log ('Running a script: end');
445 wakaba 1.6 logIndentLevel--;
446 wakaba 1.2 }
447    
448 wakaba 1.1 return e;
449     }; // appendChild
450    
451 wakaba 1.2 function executeScript (doc, e) {
452     log ('executing a script block: start');
453    
454 wakaba 1.6 var s;
455     if (e.src != null) {
456     s = getExternalScript (e.src);
457    
458     // If the load resulted in an error, then ... firing an error event ...
459     if (s == null) {
460     log ('error event fired at the script element');
461     return;
462     }
463    
464     log ('External script loaded: "' + s + '"');
465     } else {
466     s = e.text;
467     }
468 wakaba 1.2
469     // If the load was successful
470     log ('load event fired at the script element');
471    
472     if (true) {
473     // Scripting is enabled, Document.designMode is disabled,
474     // Document is the active document in its browsing context
475    
476     parseAndRunScript (doc, s);
477     }
478    
479     log ('executing a script block: end');
480     } // executeScript
481    
482 wakaba 1.6 function getExternalScript (uri) {
483     if (uri.match (/^javascript:/i)) {
484     var m;
485     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
486     if (m[1]) {
487     return m[1];
488     } else if (m[2]) {
489     return m[2];
490     } else {
491     return null;
492     }
493     } else {
494     log ('Complex javascript: URI is not supported: <' + uri + '>');
495     return null;
496     }
497     } else {
498     log ('URI scheme not supported: <' + uri + '>');
499     return null;
500     }
501     } // getExternalScript
502    
503 wakaba 1.2 function parseAndRunScript (doc, s) {
504     while (true) {
505     var matched = false;
506     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
507     matched = true;
508     var args = [];
509     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
510     args.push (v.substring (1, v.length - 1));
511     return '';
512     });
513     doc.write.apply (doc, args);
514     return '';
515     });
516     if (s == '') break;
517     if (!matched) {
518     log ('Script parse error: "' + s + '"');
519     break;
520     }
521     }
522     } // parseAndRunScript
523    
524 wakaba 1.1 function JSText (data) {
525     this.data = data;
526     } // JSText
527    
528     JSDocument.prototype.manakaiAppendText =
529     JSElement.prototype.manakaiAppendText =
530     function (s) {
531     if (this.childNodes.length > 0 &&
532     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
533     this.childNodes[this.childNodes.length - 1].data += s;
534     } else {
535     this.childNodes.push (new JSText (s));
536     }
537     }; // manakaiAppendText
538 wakaba 1.2
539 wakaba 1.4 JSDocument.prototype.open = function () {
540     // Two or fewer arguments
541    
542     // Step 1.
543     var type = arguments[0] || 'text/html';
544    
545     // Step 2.
546     var replace = arguments[1] == 'replace';
547    
548     // Step 3.
549     if (this._parser &&
550     !this._parser.scriptCreated &&
551 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
552 wakaba 1.4 log ('document.open () in parsing mode is ignored');
553     return this;
554     }
555    
556     // Step 4.
557     log ('onbeforeunload event fired');
558     log ('onunload event fired');
559    
560     // Step 5.
561     if (this._parser) {
562     // Discard the parser.
563     }
564    
565     // Step 6.
566     log ('document cleared by document.open ()');
567     this.childNodes = [];
568    
569     // Step 7.
570     this._parser = new Parser (new InputStream (''), this);
571     this._parser.scriptCreated = true;
572    
573     // Step 8.
574     this.manakaiIsHTML = true;
575    
576     // Step 9.
577     // If not text/html, ...
578    
579     // Step 10.
580     if (!replace) {
581     // History
582     }
583    
584     // Step 11.
585 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
586 wakaba 1.4
587     // Step 12.
588     return this;
589     }; // document.open
590    
591 wakaba 1.2 JSDocument.prototype.write = function () {
592 wakaba 1.6 logIndentLevel++;
593    
594 wakaba 1.3 var p = this._parser;
595    
596 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
597 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
598     this.open ();
599     p = this._parser;
600 wakaba 1.3 }
601 wakaba 1.2
602     // 2. ... inserted into the input stream just before the insertion point.
603 wakaba 1.3 var s = Array.join (arguments, '');
604     log ('document.write: insert "' + s + '"' +
605 wakaba 1.8 ' before "' +
606     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
607     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
608     + p.input.s.substring (p.insertionPoint, p.input.s.length);
609 wakaba 1.3 p.insertionPoint += s.length;
610 wakaba 1.2
611     // 3. If there is a script that will execute as soon as the parser resumes
612 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
613     log ('document.write: processed later (there is an unprocessed <script src>)');
614     logIndentLevel--;
615     return;
616     }
617 wakaba 1.2
618     // 4. Process the characters that were inserted, ...
619 wakaba 1.6 var originalReentrant = p.reentrant;
620     p.reentrant = true;
621 wakaba 1.3 p.parse ();
622 wakaba 1.6 p.reentrant = originalReentrant;
623     // TODO: "Abort the processing of any nested invokations of the tokeniser,
624     // yielding control back to the caller." (<script> parsing). Do we need
625     // to do something here?
626 wakaba 1.2
627     // 5. Return
628     log ('document.write: return');
629 wakaba 1.6
630     logIndentLevel--;
631 wakaba 1.2 return;
632     }; // document.write
633    
634     JSElement.prototype.__defineGetter__ ('text', function () {
635     var r = '';
636     for (var i = 0; i < this.childNodes.length; i++) {
637     if (this.childNodes[i] instanceof JSText) {
638     r += this.childNodes[i].data;
639     }
640     }
641     return r;
642     });
643 wakaba 1.1
644     function dumpTree (n, indent) {
645     var r = '';
646     for (var i = 0; i < n.childNodes.length; i++) {
647     var node = n.childNodes[i];
648     if (node instanceof JSElement) {
649     r += '| ' + indent + node.localName + '\n';
650 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
651     if (node.defer) r += '| ' + indent + ' defer=""\n';
652 wakaba 1.9 if (node.src != null) {
653     r += '| ' + indent + ' src="' + node.src + '"\n';
654     }
655 wakaba 1.1 r += dumpTree (node, indent + ' ');
656     } else if (node instanceof JSText) {
657     r += '| ' + indent + '"' + node.data + '"\n';
658     } else {
659     r += '| ' + indent + node + '\n';
660     }
661     }
662     return r;
663     } // dumpTree
664     </script>
665     </head>
666     <body onload="
667     document.sourceElement = document.getElementsByTagName ('textarea')[0];
668 wakaba 1.8
669     var q = location.search;
670     if (q != null) {
671     q = q.substring (1).split (/;/);
672     for (var i = 0; i < q.length; i++) {
673     var v = q[i].split (/=/, 2);
674     v[0] = decodeURIComponent (v[0]);
675     v[1] = decodeURIComponent (v[1] || '');
676     if (v[0] == 's') {
677     document.sourceElement.value = v[1];
678     }
679     }
680     }
681    
682 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
683     update ();
684     ">
685 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
686     Parser</h1>
687 wakaba 1.1
688 wakaba 1.7 <h2>Markup to test
689 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
690     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
691     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
692     Viewer</a>)</h2>
693 wakaba 1.7 <p>
694     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
695 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
696     &lt;p>
697     &lt;script>
698 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
699 wakaba 1.1 &lt;/script>
700     &lt;p>
701     </textarea>
702    
703 wakaba 1.7 <h2>Log</h2>
704     <p><output></output>
705    
706 wakaba 1.8 <h2>Note</h2>
707    
708     <p>This is a <em>simplified</em> implementation of
709     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
710     Parsing Algorithm</a>. It only implements script-related part of the
711     algorithm. Especially, this parser:
712     <ul>
713     <li>Does not support <code>DOCTYPE</code> and comment tokens.
714     <li>Does not support entities except for <code>&amp;quot;</code>,
715     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
716     <code>src</code> attribute value.
717     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
718     algorithm, and so on.
719     <li>Does not raise parse errors for invalid attribute specifications in start
720     or end tags.
721     <li>Does not support CDATA/PCDATA element other than <code>script</code>.
722     <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
723     in <code>script</code> element.
724     <li>Does not support foreign (SVG or MathML) elements.
725     <li>Only supports <code>script</code> <code>type</code>
726     <code>text/javascript</code>. <code>type</code> and <code>language</code>
727     attributes are ignored.
728     <li>Only supports <code>document.write</code>.
729     The script code must be match to the regular expression
730     <code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code>
731     where <var>v</var> is <code>"[^"]*"|'[^']*'</code>.
732     <li>Only supports <code>javascript:</code>
733     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
734     <code>src</code> attribute of the <code>script</code> element. In addition,
735     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
736     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
737     </ul>
738 wakaba 1.7
739 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
740     not support JavaScript 1.5.
741 wakaba 1.1
742     </body>
743     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24