/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download) (as text)
Sun Apr 27 08:56:34 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +79 -22 lines
File MIME type: text/html
Does not reparse if the text is not changed; support for query strings; note on restrictions is added; s/.in/.input/g for ES3 compliance

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.1 <style>
6 wakaba 1.7 h1, h2 {
7     margin: 0;
8     font-size: 100%;
9     }
10     p, pre {
11     margin: 0;
12     }
13 wakaba 1.1 textarea {
14 wakaba 1.7 width: 100%;
15     -width: 99%;
16     height: 10em;
17 wakaba 1.1 }
18     output {
19     display: block;
20     font-family: monospace;
21 wakaba 1.4 white-space: -moz-pre-wrap;
22     white-space: pre-wrap;
23 wakaba 1.1 }
24     </style>
25     <script>
26 wakaba 1.7 var delayedUpdater = 0;
27    
28 wakaba 1.1 function update () {
29 wakaba 1.7 if (delayedUpdater) {
30     clearTimeout (delayedUpdater);
31     delayedUpdater = 0;
32     }
33     delayedUpdater = setTimeout (update2, 100);
34     } // update
35    
36     function update2 () {
37     var v = document.sourceElement.value;
38 wakaba 1.8 if (v != document.previousSourceText) {
39     document.previousSourceText = v;
40     document.links['permalink'].href
41     = location.pathname + '?s=' + encodeURIComponent (v);
42     document.links['ldvlink'].href
43     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44     + encodeURIComponent (v);
45    
46     document.logElement.textContent = '';
47     var p = new Parser (new InputStream (v));
48     var doc = p.doc;
49     p.parse ();
50     log (dumpTree (doc, ''));
51     }
52 wakaba 1.7 } // update2
53 wakaba 1.1
54 wakaba 1.6 var logIndentLevel = 0;
55 wakaba 1.1 function log (s) {
56 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
57     s = ' ' + s;
58     }
59 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
60     } // log
61    
62     function InputStream (s) {
63     this.s = s;
64     } // InputStream
65    
66 wakaba 1.4 function Parser (i, doc) {
67 wakaba 1.1 this.parseMode = 'pcdata';
68 wakaba 1.4 if (!doc) {
69     doc = new JSDocument (this);
70     doc.manakaiIsHTML = true;
71     }
72     this.doc = doc;
73     this.openElements = [doc];
74 wakaba 1.8 this.input = i;
75 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
76 wakaba 1.1 } // Parser
77    
78 wakaba 1.2 Parser.prototype.getNextToken = function () {
79 wakaba 1.3 var p = this;
80 wakaba 1.8 var i = this.input;
81 wakaba 1.1 if (this.parseMode == 'script') {
82     var token;
83 wakaba 1.3 if (p.insertionPoint <= 0) {
84     return {type: 'abort'};
85     }
86 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
87 wakaba 1.1 function (s, t) {
88 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
89     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
90     var ip = p.insertionPoint;
91     p.insertionPoint = 0;
92 wakaba 1.4 return t.substring (ip, t.length);
93 wakaba 1.3 }
94 wakaba 1.1 token = {type: 'char', value: t};
95 wakaba 1.4 p.insertionPoint -= t.length;
96     return '';
97 wakaba 1.1 });
98     if (token) return token;
99 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
100 wakaba 1.4 if (p.insertionPoint < s.length) {
101 wakaba 1.3 token = {type: 'abort'};
102     return s;
103     }
104 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
105 wakaba 1.3 p.insertionPoint -= s.length;
106 wakaba 1.1 return '';
107     });
108     if (token) return token;
109 wakaba 1.5 var m;
110     if ((p.insertionPoint < '</script'.length) &&
111     (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
112     var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
113     if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
114     return {type: 'abort'};
115     }
116     }
117 wakaba 1.4 i.s = i.s.replace (/^</,
118     function (s) {
119     token = {type: 'char', value: s};
120     p.insertionPoint -= s.length;
121     return '';
122     });
123     if (token) return token;
124 wakaba 1.1 return {type: 'eof'};
125     }
126    
127     var token;
128 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
129     if (p.insertionPoint < s.length ||
130     (p.insertionPoint <= s.length &&
131     s.substring (s.length - 1, 1) != '>')) {
132 wakaba 1.3 token = {type: 'abort'};
133     return s;
134     }
135 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
136 wakaba 1.3 p.insertionPoint -= s.length;
137 wakaba 1.1 return '';
138     });
139     if (token) return token;
140 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
141     if (p.insertionPoint < s.length ||
142     (p.insertionPoint <= s.length &&
143     s.substring (s.length - 1, 1) != '>')) {
144 wakaba 1.3 token = {type: 'abort'};
145     return s;
146     }
147 wakaba 1.4 var tagName;
148     var attrs = {};
149     e = e.replace (/^[\S]+/, function (v) {
150     tagName = v.toLowerCase ();
151     return '';
152     });
153 wakaba 1.6 e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/,
154 wakaba 1.4 function (x, attrName, attrValue1, attrValue2, attrValue3) {
155 wakaba 1.6 v = attrValue1 || attrValue2 || attrValue3;
156     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
157     .replace (/&amp;/g, '&');
158     attrs[attrName.toLowerCase ()] = v;
159 wakaba 1.4 return '';
160     });
161 wakaba 1.6 if (e.length) {
162     log ('Broken start tag: "' + e + '"');
163     }
164 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
165 wakaba 1.3 p.insertionPoint -= s.length;
166 wakaba 1.1 return '';
167     });
168     if (token) return token;
169 wakaba 1.3 if (p.insertionPoint <= 0) {
170     return {type: 'abort'};
171     }
172 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
173 wakaba 1.3 if (p.insertionPoint < s.length) {
174     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
175     var ip = p.insertionPoint;
176     p.insertionPoint = 0;
177     return s.substring (ip, s.length);
178     }
179 wakaba 1.1 token = {type: 'char', value: s};
180 wakaba 1.3 p.insertionPoint -= s.length;
181 wakaba 1.1 return '';
182     });
183     if (token) return token;
184     i.s = i.s.replace (/^[\s\S]/, function (s) {
185     token = {type: 'char', value: s};
186 wakaba 1.3 p.insertionPoint -= s.length;
187 wakaba 1.1 return '';
188     });
189     if (token) return token;
190     return {type: 'eof'};
191     } // getNextToken
192    
193 wakaba 1.2 Parser.prototype.parse = function () {
194 wakaba 1.6 logIndentLevel++;
195     log ('parse: start');
196 wakaba 1.1
197     while (true) {
198 wakaba 1.2 var token = this.getNextToken ();
199 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
200    
201     if (token.type == 'start-tag') {
202     if (token.value == 'script') {
203 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
204     var el = new JSElement (this.doc, token.value);
205 wakaba 1.4 if (token.attrs.async != null) el.async = true;
206     if (token.attrs.defer != null) el.defer = true;
207     if (token.attrs.src != null) el.src = token.attrs.src;
208 wakaba 1.2
209     // 2. Mark the element as being "parser-inserted".
210     el.manakaiParserInserted = true;
211    
212     // 3. Switch the tokeniser's content model flag to the CDATA state.
213 wakaba 1.1 this.parseMode = 'script';
214    
215 wakaba 1.2 // 4.1. Collect all the character tokens.
216 wakaba 1.1 while (true) {
217 wakaba 1.2 var token = this.getNextToken ();
218 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
219    
220     if (token.type == 'char') {
221 wakaba 1.2 // 5. Append a single Text node to the script element node.
222 wakaba 1.1 el.manakaiAppendText (token.value);
223 wakaba 1.2
224     // 4.2. Until it returns a token that is not a character token, or
225 wakaba 1.3 // until it stops tokenising.
226 wakaba 1.1 } else if (token.type == 'eof' ||
227 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
228     token.type == 'abort') {
229 wakaba 1.2 // 6. Switched back to the PCDATA state.
230 wakaba 1.1 this.parseMode = 'pcdata';
231 wakaba 1.2
232     // 7.1. If the next token is not an end tag token with ...
233     if (token.type != 'end-tag') {
234     // 7.2. This is a parse error.
235     log ('Parse error: no </' + 'script>');
236    
237     // 7.3. Mark the script element as "already executed".
238     el.manakaiAlreadyExecuted = true;
239     } else {
240     // 7.4. Ignore it.
241     //
242     }
243 wakaba 1.1 break;
244     }
245     }
246    
247 wakaba 1.2 // 8.1. If the parser were originally created for the ...
248     if (this.fragmentParsingMode) {
249     // 8.2. Mark the script element as "already executed" and ...
250     el.alreadyExecuted = true;
251     continue;
252     }
253    
254     // 9.1. Let the old insertion point have the same value as the ...
255 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
256 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
257 wakaba 1.3 this.setInsertionPoint (0);
258 wakaba 1.2
259     // 10. Append the new element to the current node.
260 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
261 wakaba 1.2
262     // 11. Let the insertion point have the value of the old ...
263 wakaba 1.7
264 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
265 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
266 wakaba 1.2
267     // 12. If there is a script that will execute as soon as ...
268 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
269     // 12.1. If the tree construction stage is being called reentrantly
270     if (this.reentrant) {
271     log ('parse: abort (reentrance)');
272     logIndentLevel--;
273     return;
274    
275     // 12.2. Otherwise
276     } else {
277     // 1.
278     var script = this.scriptExecutedWhenParserResumes;
279     this.scriptExecutedWhenParserResumes = null;
280    
281     // 2. Pause until the script has completed loading.
282     //
283    
284     // 3. Let the insertion point to just before the next input char.
285     this.setInsertionPoint (0);
286    
287     // 4. Execute the script.
288     executeScript (this.doc, script);
289    
290     // 5. Let the insertion point be undefined again.
291     this.setInsertionPoint (undefined);
292 wakaba 1.2
293 wakaba 1.6 // 6. If there is once again a script that will execute ...
294     //
295     }
296     }
297 wakaba 1.1 } else {
298 wakaba 1.2 var el = new JSElement (this.doc, token.value);
299 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
300     this.openElements.push (el);
301     }
302     } else if (token.type == 'end-tag') {
303     if (this.openElements[this.openElements.length - 1].localName ==
304     token.value) {
305     this.openElements.pop ();
306     } else {
307     log ('parse error: unmatched end tag: ' + token.value);
308     }
309 wakaba 1.3 } else if (token.type == 'char') {
310     this.openElements[this.openElements.length - 1].manakaiAppendText
311     (token.value);
312 wakaba 1.1 } else if (token.type == 'eof') {
313     break;
314 wakaba 1.3 } else if (token.type == 'abort') {
315     log ('parse: abort');
316 wakaba 1.6 logIndentLevel--;
317 wakaba 1.3 return;
318 wakaba 1.1 }
319     }
320    
321     log ('stop parsing');
322 wakaba 1.4
323     // readyState = 'interactive'
324    
325     // "When a script completes loading" rules start applying.
326    
327     // TODO: Handles "list of scripts that will execute as soon as possible"
328     // and "list of scripts that will execute asynchronously"
329    
330     // Handle "list of scripts that will execute when the document has finished
331     // parsing".
332     var list = this.scriptsExecutedAfterParsing;
333     while (list.length > 0) {
334     // TODO: break unless completed loading
335    
336     // Step 1.
337     //
338    
339     // Step 2. and Step 3.
340     log ('Executing a |defer|red script...');
341     executeScript (this.doc, list.shift ());
342    
343     // Step 4.
344     }
345    
346     log ('DOMContentLoaded event fired');
347    
348     // "delays tha load event" things has completed:
349     // readyState = 'complete'
350     log ('load event fired');
351 wakaba 1.6
352     logIndentLevel--;
353 wakaba 1.1 } // parse
354    
355 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
356     if (ip == undefined || ip == null || isNaN (ip)) {
357     log ('insertion point: set to undefined');
358     this.insertionPoint = undefined;
359 wakaba 1.8 } else if (ip == this.input.s.length) {
360 wakaba 1.4 log ('insertion point: end of file');
361     this.insertionPoint = ip;
362 wakaba 1.3 } else {
363     log ('insertion point: set to ' + ip +
364 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
365 wakaba 1.3 this.insertionPoint = ip;
366     }
367     }; // setInsertionPoint
368    
369 wakaba 1.2 function JSDocument (p) {
370 wakaba 1.1 this.childNodes = [];
371 wakaba 1.2 this._parser = p;
372 wakaba 1.1 } // JSDocument
373    
374 wakaba 1.2 function JSElement (doc, localName) {
375 wakaba 1.1 this.localName = localName;
376 wakaba 1.2 this.ownerDocument = doc;
377 wakaba 1.1 this.childNodes = [];
378     } // JSElement
379    
380     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
381     function (e) {
382     this.childNodes.push (e);
383     e.parentNode = this;
384 wakaba 1.2
385     if (e.localName == 'script') {
386 wakaba 1.6 logIndentLevel++;
387 wakaba 1.4 log ('Running a script: start');
388 wakaba 1.2
389 wakaba 1.3 var doc = this.ownerDocument || this;
390 wakaba 1.2 var p = doc._parser;
391    
392     // 1. Script type
393     //
394    
395     // 2.1. If scripting is disabled
396     //
397     // 2.2. If the script element was created by an XML ... innerHTML ...
398     //
399     // 2.3. If the user agent does not support the scripting language ...
400     //
401     // 2.4. If the script element has its "already executed" flag set
402     if (e.manakaiAlreadyExecuted) {
403     // 2.5. Abort these steps at this point.
404 wakaba 1.4 log ('Running a script: aborted');
405 wakaba 1.6 logIndentLevel--;
406 wakaba 1.2 return e;
407     }
408    
409     // 3. Set the element's "already executed" flag.
410     e.manakaiAlreadyExecuted = true;
411    
412     // 4. If the element has a src attribute, then a load for ...
413     // TODO: load an external resource
414    
415     // 5. The first of the following options:
416    
417     // 5.1.
418     if (/* TODO: If the document is still being parsed && */
419     e.defer && !e.async) {
420 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
421     log ('Running a script: aborted (defer)');
422 wakaba 1.2 } else if (e.async && e.src != null) {
423     // TODO
424     } else if (e.async && e.src == null
425     /* && list of scripts that will execute asynchronously is not empty */) {
426     // TODO
427     } else if (e.src != null && e.manakaiParserInserted) {
428 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
429     log ('Error: There is a script that will execute as soon as the parser resumes.');
430     }
431     p.scriptExecutedWhenParserResumes = e;
432     log ('Running a script: aborted (src)');
433 wakaba 1.2 } else if (e.src != null) {
434     // TODO
435     } else {
436     executeScript (doc, e); // even if other scripts are already executing.
437     }
438    
439 wakaba 1.4 log ('Running a script: end');
440 wakaba 1.6 logIndentLevel--;
441 wakaba 1.2 }
442    
443 wakaba 1.1 return e;
444     }; // appendChild
445    
446 wakaba 1.2 function executeScript (doc, e) {
447     log ('executing a script block: start');
448    
449 wakaba 1.6 var s;
450     if (e.src != null) {
451     s = getExternalScript (e.src);
452    
453     // If the load resulted in an error, then ... firing an error event ...
454     if (s == null) {
455     log ('error event fired at the script element');
456     return;
457     }
458    
459     log ('External script loaded: "' + s + '"');
460     } else {
461     s = e.text;
462     }
463 wakaba 1.2
464     // If the load was successful
465     log ('load event fired at the script element');
466    
467     if (true) {
468     // Scripting is enabled, Document.designMode is disabled,
469     // Document is the active document in its browsing context
470    
471     parseAndRunScript (doc, s);
472     }
473    
474     log ('executing a script block: end');
475     } // executeScript
476    
477 wakaba 1.6 function getExternalScript (uri) {
478     if (uri.match (/^javascript:/i)) {
479     var m;
480     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
481     if (m[1]) {
482     return m[1];
483     } else if (m[2]) {
484     return m[2];
485     } else {
486     return null;
487     }
488     } else {
489     log ('Complex javascript: URI is not supported: <' + uri + '>');
490     return null;
491     }
492     } else {
493     log ('URI scheme not supported: <' + uri + '>');
494     return null;
495     }
496     } // getExternalScript
497    
498 wakaba 1.2 function parseAndRunScript (doc, s) {
499     while (true) {
500     var matched = false;
501     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
502     matched = true;
503     var args = [];
504     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
505     args.push (v.substring (1, v.length - 1));
506     return '';
507     });
508     doc.write.apply (doc, args);
509     return '';
510     });
511     if (s == '') break;
512     if (!matched) {
513     log ('Script parse error: "' + s + '"');
514     break;
515     }
516     }
517     } // parseAndRunScript
518    
519 wakaba 1.1 function JSText (data) {
520     this.data = data;
521     } // JSText
522    
523     JSDocument.prototype.manakaiAppendText =
524     JSElement.prototype.manakaiAppendText =
525     function (s) {
526     if (this.childNodes.length > 0 &&
527     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
528     this.childNodes[this.childNodes.length - 1].data += s;
529     } else {
530     this.childNodes.push (new JSText (s));
531     }
532     }; // manakaiAppendText
533 wakaba 1.2
534 wakaba 1.4 JSDocument.prototype.open = function () {
535     // Two or fewer arguments
536    
537     // Step 1.
538     var type = arguments[0] || 'text/html';
539    
540     // Step 2.
541     var replace = arguments[1] == 'replace';
542    
543     // Step 3.
544     if (this._parser &&
545     !this._parser.scriptCreated &&
546 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
547 wakaba 1.4 log ('document.open () in parsing mode is ignored');
548     return this;
549     }
550    
551     // Step 4.
552     log ('onbeforeunload event fired');
553     log ('onunload event fired');
554    
555     // Step 5.
556     if (this._parser) {
557     // Discard the parser.
558     }
559    
560     // Step 6.
561     log ('document cleared by document.open ()');
562     this.childNodes = [];
563    
564     // Step 7.
565     this._parser = new Parser (new InputStream (''), this);
566     this._parser.scriptCreated = true;
567    
568     // Step 8.
569     this.manakaiIsHTML = true;
570    
571     // Step 9.
572     // If not text/html, ...
573    
574     // Step 10.
575     if (!replace) {
576     // History
577     }
578    
579     // Step 11.
580 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
581 wakaba 1.4
582     // Step 12.
583     return this;
584     }; // document.open
585    
586 wakaba 1.2 JSDocument.prototype.write = function () {
587 wakaba 1.6 logIndentLevel++;
588    
589 wakaba 1.3 var p = this._parser;
590    
591 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
592 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
593     this.open ();
594     p = this._parser;
595 wakaba 1.3 }
596 wakaba 1.2
597     // 2. ... inserted into the input stream just before the insertion point.
598 wakaba 1.3 var s = Array.join (arguments, '');
599     log ('document.write: insert "' + s + '"' +
600 wakaba 1.8 ' before "' +
601     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
602     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
603     + p.input.s.substring (p.insertionPoint, p.input.s.length);
604 wakaba 1.3 p.insertionPoint += s.length;
605 wakaba 1.2
606     // 3. If there is a script that will execute as soon as the parser resumes
607 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
608     log ('document.write: processed later (there is an unprocessed <script src>)');
609     logIndentLevel--;
610     return;
611     }
612 wakaba 1.2
613     // 4. Process the characters that were inserted, ...
614 wakaba 1.6 var originalReentrant = p.reentrant;
615     p.reentrant = true;
616 wakaba 1.3 p.parse ();
617 wakaba 1.6 p.reentrant = originalReentrant;
618     // TODO: "Abort the processing of any nested invokations of the tokeniser,
619     // yielding control back to the caller." (<script> parsing). Do we need
620     // to do something here?
621 wakaba 1.2
622     // 5. Return
623     log ('document.write: return');
624 wakaba 1.6
625     logIndentLevel--;
626 wakaba 1.2 return;
627     }; // document.write
628    
629     JSElement.prototype.__defineGetter__ ('text', function () {
630     var r = '';
631     for (var i = 0; i < this.childNodes.length; i++) {
632     if (this.childNodes[i] instanceof JSText) {
633     r += this.childNodes[i].data;
634     }
635     }
636     return r;
637     });
638 wakaba 1.1
639     function dumpTree (n, indent) {
640     var r = '';
641     for (var i = 0; i < n.childNodes.length; i++) {
642     var node = n.childNodes[i];
643     if (node instanceof JSElement) {
644     r += '| ' + indent + node.localName + '\n';
645 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
646     if (node.defer) r += '| ' + indent + ' defer=""\n';
647     if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n';
648 wakaba 1.1 r += dumpTree (node, indent + ' ');
649     } else if (node instanceof JSText) {
650     r += '| ' + indent + '"' + node.data + '"\n';
651     } else {
652     r += '| ' + indent + node + '\n';
653     }
654     }
655     return r;
656     } // dumpTree
657     </script>
658     </head>
659     <body onload="
660     document.sourceElement = document.getElementsByTagName ('textarea')[0];
661 wakaba 1.8
662     var q = location.search;
663     if (q != null) {
664     q = q.substring (1).split (/;/);
665     for (var i = 0; i < q.length; i++) {
666     var v = q[i].split (/=/, 2);
667     v[0] = decodeURIComponent (v[0]);
668     v[1] = decodeURIComponent (v[1] || '');
669     if (v[0] == 's') {
670     document.sourceElement.value = v[1];
671     }
672     }
673     }
674    
675 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
676     update ();
677     ">
678 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
679     Parser</h1>
680 wakaba 1.1
681 wakaba 1.7 <h2>Markup to test
682 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
683     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
684     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
685     Viewer</a>)</h2>
686 wakaba 1.7 <p>
687     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
688 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
689     &lt;p>
690     &lt;script>
691 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
692 wakaba 1.1 &lt;/script>
693     &lt;p>
694     </textarea>
695    
696 wakaba 1.7 <h2>Log</h2>
697     <p><output></output>
698    
699 wakaba 1.8 <h2>Note</h2>
700    
701     <p>This is a <em>simplified</em> implementation of
702     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
703     Parsing Algorithm</a>. It only implements script-related part of the
704     algorithm. Especially, this parser:
705     <ul>
706     <li>Does not support <code>DOCTYPE</code> and comment tokens.
707     <li>Does not support entities except for <code>&amp;quot;</code>,
708     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
709     <code>src</code> attribute value.
710     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
711     algorithm, and so on.
712     <li>Does not raise parse errors for invalid attribute specifications in start
713     or end tags.
714     <li>Does not support CDATA/PCDATA element other than <code>script</code>.
715     <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
716     in <code>script</code> element.
717     <li>Does not support foreign (SVG or MathML) elements.
718     <li>Only supports <code>script</code> <code>type</code>
719     <code>text/javascript</code>. <code>type</code> and <code>language</code>
720     attributes are ignored.
721     <li>Only supports <code>document.write</code>.
722     The script code must be match to the regular expression
723     <code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code>
724     where <var>v</var> is <code>"[^"]*"|'[^']*'</code>.
725     <li>Only supports <code>javascript:</code>
726     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
727     <code>src</code> attribute of the <code>script</code> element. In addition,
728     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
729     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
730     </ul>
731 wakaba 1.7
732 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
733     not support JavaScript 1.5.
734 wakaba 1.7
735     <!-- TODO: multiple attributes are not supported yet -->
736 wakaba 1.1
737     </body>
738     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24