/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.15 - (hide annotations) (download) (as text)
Tue Apr 29 03:29:41 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.14: +31 -7 lines
File MIME type: text/html
Support for w(innerHTML) dumpping

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.13 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6     <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7     title="GNU GPL2 or later">
8 wakaba 1.1 <style>
9 wakaba 1.13 h1 {
10     margin: 0;
11     font-size: 150%;
12     }
13     h2 {
14 wakaba 1.7 margin: 0;
15     font-size: 100%;
16     }
17 wakaba 1.13 p {
18     margin: 0 1em;
19 wakaba 1.7 }
20 wakaba 1.1 textarea {
21 wakaba 1.7 width: 100%;
22     -width: 99%;
23     height: 10em;
24 wakaba 1.1 }
25     output {
26     display: block;
27     font-family: monospace;
28 wakaba 1.4 white-space: -moz-pre-wrap;
29     white-space: pre-wrap;
30 wakaba 1.1 }
31     </style>
32     <script>
33 wakaba 1.7 var delayedUpdater = 0;
34    
35 wakaba 1.1 function update () {
36 wakaba 1.7 if (delayedUpdater) {
37     clearTimeout (delayedUpdater);
38     delayedUpdater = 0;
39     }
40     delayedUpdater = setTimeout (update2, 100);
41     } // update
42    
43     function update2 () {
44     var v = document.sourceElement.value;
45 wakaba 1.8 if (v != document.previousSourceText) {
46     document.previousSourceText = v;
47     document.links['permalink'].href
48     = location.pathname + '?s=' + encodeURIComponent (v);
49     document.links['ldvlink'].href
50     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51     + encodeURIComponent (v);
52    
53     document.logElement.textContent = '';
54     var p = new Parser (new InputStream (v));
55     var doc = p.doc;
56     p.parse ();
57 wakaba 1.10
58 wakaba 1.8 log (dumpTree (doc, ''));
59 wakaba 1.10
60     if (p.hasAsyncScript) {
61     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62     }
63 wakaba 1.8 }
64 wakaba 1.7 } // update2
65 wakaba 1.1
66 wakaba 1.6 var logIndentLevel = 0;
67 wakaba 1.1 function log (s) {
68 wakaba 1.15 var indent = '';
69 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
70 wakaba 1.15 indent += ' ';
71 wakaba 1.6 }
72 wakaba 1.15 s = indent + s.replace (/\n/g, "\n" + indent);
73 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
74     } // log
75    
76     function InputStream (s) {
77     this.s = s;
78     } // InputStream
79    
80 wakaba 1.4 function Parser (i, doc) {
81 wakaba 1.1 this.parseMode = 'pcdata';
82 wakaba 1.4 if (!doc) {
83     doc = new JSDocument (this);
84     doc.manakaiIsHTML = true;
85     }
86     this.doc = doc;
87     this.openElements = [doc];
88 wakaba 1.8 this.input = i;
89 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
90 wakaba 1.10 this.scriptsExecutedSoon = [];
91 wakaba 1.12 this.scriptsExecutedAsynchronously = [];
92 wakaba 1.1 } // Parser
93    
94 wakaba 1.2 Parser.prototype.getNextToken = function () {
95 wakaba 1.3 var p = this;
96 wakaba 1.8 var i = this.input;
97 wakaba 1.14 if (this.parseMode == 'cdata') {
98     var tagName = this.endTagName;
99 wakaba 1.1 var token;
100 wakaba 1.3 if (p.insertionPoint <= 0) {
101     return {type: 'abort'};
102     }
103 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
104 wakaba 1.1 function (s, t) {
105 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
106     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
107     var ip = p.insertionPoint;
108     p.insertionPoint = 0;
109 wakaba 1.4 return t.substring (ip, t.length);
110 wakaba 1.3 }
111 wakaba 1.1 token = {type: 'char', value: t};
112 wakaba 1.4 p.insertionPoint -= t.length;
113     return '';
114 wakaba 1.1 });
115     if (token) return token;
116 wakaba 1.14 var pattern = new RegExp ('^</' + tagName + '>', 'i');
117     i.s = i.s.replace (pattern, function (s) {
118 wakaba 1.4 if (p.insertionPoint < s.length) {
119 wakaba 1.3 token = {type: 'abort'};
120     return s;
121     }
122 wakaba 1.14 token = {type: 'end-tag', value: tagName};
123 wakaba 1.3 p.insertionPoint -= s.length;
124 wakaba 1.1 return '';
125     });
126     if (token) return token;
127 wakaba 1.5 var m;
128 wakaba 1.14 if ((p.insertionPoint < ('</' + tagName).length) &&
129     (m = i.s.match (/^<\/([A-Za-z]+)/))) {
130 wakaba 1.5 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
131 wakaba 1.14 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
132 wakaba 1.5 return {type: 'abort'};
133     }
134     }
135 wakaba 1.4 i.s = i.s.replace (/^</,
136     function (s) {
137     token = {type: 'char', value: s};
138     p.insertionPoint -= s.length;
139     return '';
140     });
141     if (token) return token;
142 wakaba 1.1 return {type: 'eof'};
143     }
144    
145     var token;
146 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
147     if (p.insertionPoint < s.length ||
148     (p.insertionPoint <= s.length &&
149     s.substring (s.length - 1, 1) != '>')) {
150 wakaba 1.3 token = {type: 'abort'};
151     return s;
152     }
153 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
154 wakaba 1.3 p.insertionPoint -= s.length;
155 wakaba 1.1 return '';
156     });
157     if (token) return token;
158 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
159     if (p.insertionPoint < s.length ||
160     (p.insertionPoint <= s.length &&
161     s.substring (s.length - 1, 1) != '>')) {
162 wakaba 1.3 token = {type: 'abort'};
163     return s;
164     }
165 wakaba 1.4 var tagName;
166     var attrs = {};
167     e = e.replace (/^[\S]+/, function (v) {
168     tagName = v.toLowerCase ();
169     return '';
170     });
171 wakaba 1.9 while (true) {
172     var m = false;
173     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
174     function (x, attrName, attrValue1, attrValue2, attrValue3) {
175     v = attrValue1 || attrValue2 || attrValue3;
176     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
177     .replace (/&amp;/g, '&');
178     attrs[attrName.toLowerCase ()] = v;
179     m = true;
180     return '';
181     });
182     if (!m) break;
183     }
184 wakaba 1.6 if (e.length) {
185     log ('Broken start tag: "' + e + '"');
186     }
187 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
188 wakaba 1.3 p.insertionPoint -= s.length;
189 wakaba 1.1 return '';
190     });
191     if (token) return token;
192 wakaba 1.3 if (p.insertionPoint <= 0) {
193     return {type: 'abort'};
194     }
195 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
196 wakaba 1.3 if (p.insertionPoint < s.length) {
197     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
198     var ip = p.insertionPoint;
199     p.insertionPoint = 0;
200     return s.substring (ip, s.length);
201     }
202 wakaba 1.1 token = {type: 'char', value: s};
203 wakaba 1.3 p.insertionPoint -= s.length;
204 wakaba 1.1 return '';
205     });
206     if (token) return token;
207     i.s = i.s.replace (/^[\s\S]/, function (s) {
208     token = {type: 'char', value: s};
209 wakaba 1.3 p.insertionPoint -= s.length;
210 wakaba 1.1 return '';
211     });
212     if (token) return token;
213     return {type: 'eof'};
214     } // getNextToken
215    
216 wakaba 1.2 Parser.prototype.parse = function () {
217 wakaba 1.6 logIndentLevel++;
218     log ('parse: start');
219 wakaba 1.1
220     while (true) {
221 wakaba 1.2 var token = this.getNextToken ();
222 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
223    
224     if (token.type == 'start-tag') {
225     if (token.value == 'script') {
226 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
227     var el = new JSElement (this.doc, token.value);
228 wakaba 1.4 if (token.attrs.async != null) el.async = true;
229     if (token.attrs.defer != null) el.defer = true;
230     if (token.attrs.src != null) el.src = token.attrs.src;
231 wakaba 1.2
232     // 2. Mark the element as being "parser-inserted".
233     el.manakaiParserInserted = true;
234    
235     // 3. Switch the tokeniser's content model flag to the CDATA state.
236 wakaba 1.14 this.parseMode = 'cdata';
237     this.endTagName = 'script';
238 wakaba 1.1
239 wakaba 1.2 // 4.1. Collect all the character tokens.
240 wakaba 1.1 while (true) {
241 wakaba 1.2 var token = this.getNextToken ();
242 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
243    
244     if (token.type == 'char') {
245 wakaba 1.2 // 5. Append a single Text node to the script element node.
246 wakaba 1.1 el.manakaiAppendText (token.value);
247 wakaba 1.2
248     // 4.2. Until it returns a token that is not a character token, or
249 wakaba 1.3 // until it stops tokenising.
250 wakaba 1.1 } else if (token.type == 'eof' ||
251 wakaba 1.14 token.type == 'end-tag' ||
252 wakaba 1.3 token.type == 'abort') {
253 wakaba 1.2 // 6. Switched back to the PCDATA state.
254 wakaba 1.1 this.parseMode = 'pcdata';
255 wakaba 1.2
256     // 7.1. If the next token is not an end tag token with ...
257 wakaba 1.14 if (!(token.type == 'end-tag' && token.value == 'script')) {
258 wakaba 1.2 // 7.2. This is a parse error.
259     log ('Parse error: no </' + 'script>');
260    
261     // 7.3. Mark the script element as "already executed".
262     el.manakaiAlreadyExecuted = true;
263     } else {
264     // 7.4. Ignore it.
265     //
266     }
267 wakaba 1.1 break;
268     }
269     }
270    
271 wakaba 1.2 // 8.1. If the parser were originally created for the ...
272     if (this.fragmentParsingMode) {
273     // 8.2. Mark the script element as "already executed" and ...
274     el.alreadyExecuted = true;
275     continue;
276     }
277    
278     // 9.1. Let the old insertion point have the same value as the ...
279 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
280 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
281 wakaba 1.3 this.setInsertionPoint (0);
282 wakaba 1.2
283     // 10. Append the new element to the current node.
284 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
285 wakaba 1.2
286     // 11. Let the insertion point have the value of the old ...
287 wakaba 1.7
288 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
289 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
290 wakaba 1.2
291     // 12. If there is a script that will execute as soon as ...
292 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
293     // 12.1. If the tree construction stage is being called reentrantly
294     if (this.reentrant) {
295     log ('parse: abort (reentrance)');
296     logIndentLevel--;
297     return;
298    
299     // 12.2. Otherwise
300     } else {
301     // 1.
302     var script = this.scriptExecutedWhenParserResumes;
303     this.scriptExecutedWhenParserResumes = null;
304    
305     // 2. Pause until the script has completed loading.
306     //
307    
308     // 3. Let the insertion point to just before the next input char.
309     this.setInsertionPoint (0);
310    
311     // 4. Execute the script.
312     executeScript (this.doc, script);
313    
314     // 5. Let the insertion point be undefined again.
315     this.setInsertionPoint (undefined);
316 wakaba 1.2
317 wakaba 1.6 // 6. If there is once again a script that will execute ...
318     //
319     }
320     }
321 wakaba 1.14 } else if (token.value == 'style' ||
322     token.value == 'noscript' ||
323     token.value == 'xmp') {
324     // 1. Create an element for the token in the HTML namespace.
325     var el = new JSElement (this.doc, token.value);
326    
327     // 2. Append the new element to the current node.
328     this.openElements[this.openElements.length - 1].appendChild (el);
329    
330     // 3. Switch the tokeniser's content model flag to the CDATA state.
331     this.parseMode = 'cdata';
332     this.endTagName = token.value;
333    
334     // 4.1. Collect all the character tokens.
335     while (true) {
336     var token = this.getNextToken ();
337     log ('token: ' + token.type + ' "' + token.value + '"');
338    
339     if (token.type == 'char') {
340     // 5. Append a single Text node to the script element node.
341     el.manakaiAppendText (token.value);
342    
343     // 4.2. Until it returns a token that is not a character token, or
344     // until it stops tokenising.
345     } else if (token.type == 'eof' ||
346     token.type == 'end-tag' ||
347     token.type == 'abort') {
348     // 6. Switched back to the PCDATA state.
349     this.parseMode = 'pcdata';
350    
351     // 7.1. If the next token is not an end tag token with ...
352     if (!(token.type == 'end-tag' &&
353     token.value == this.endTagName)) {
354     // 7.2. This is a parse error.
355     log ('Parse error: no </' + this.endTagName + '>');
356    
357     // 7.3. Mark the script element as "already executed".
358     el.manakaiAlreadyExecuted = true;
359     } else {
360     // 7.4. Ignore it.
361     //
362     }
363     break;
364     }
365     }
366 wakaba 1.1 } else {
367 wakaba 1.2 var el = new JSElement (this.doc, token.value);
368 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
369     this.openElements.push (el);
370     }
371     } else if (token.type == 'end-tag') {
372     if (this.openElements[this.openElements.length - 1].localName ==
373     token.value) {
374     this.openElements.pop ();
375     } else {
376     log ('parse error: unmatched end tag: ' + token.value);
377     }
378 wakaba 1.3 } else if (token.type == 'char') {
379     this.openElements[this.openElements.length - 1].manakaiAppendText
380     (token.value);
381 wakaba 1.1 } else if (token.type == 'eof') {
382     break;
383 wakaba 1.3 } else if (token.type == 'abort') {
384     log ('parse: abort');
385 wakaba 1.6 logIndentLevel--;
386 wakaba 1.3 return;
387 wakaba 1.1 }
388     }
389    
390     log ('stop parsing');
391 wakaba 1.4
392     // readyState = 'interactive'
393    
394     // "When a script completes loading" rules start applying.
395    
396 wakaba 1.12 while (this.scriptsExecutedSoon.length > 0 ||
397     this.scriptsExecutedAsynchronously.length > 0) {
398     // Handle "list of scripts that will execute as soon as possible".
399     while (this.scriptsExecutedSoon.length > 0) {
400     var e = this.scriptsExecutedSoon.shift ();
401    
402     // If it has completed loading
403     log ('Execute an external script not inserted by parser...');
404     executeScript (this.doc, e);
405    
406     // NOTE: It MAY be executed before the end of the parsing, according
407     // to the spec.
408     this.hasAsyncScript = true;
409     }
410    
411     // Handle "list of scripts that will execute asynchronously".
412     while (this.scriptsExecutedAsynchronously.length > 0) {
413     var e = this.scriptsExecutedAsynchronously.shift ();
414    
415     // Step 1.
416     // We assume that all scripts have been loaded at this time.
417    
418     // Step 2.
419     log ('Execute an asynchronous script...');
420     executeScript (this.doc, e);
421    
422     // Step 3.
423     //
424    
425     // Step 4.
426     //
427 wakaba 1.10
428 wakaba 1.12 this.hasAsyncScript = true;
429     }
430 wakaba 1.10 }
431    
432 wakaba 1.4 // Handle "list of scripts that will execute when the document has finished
433     // parsing".
434     var list = this.scriptsExecutedAfterParsing;
435     while (list.length > 0) {
436     // TODO: break unless completed loading
437    
438     // Step 1.
439     //
440    
441     // Step 2. and Step 3.
442     log ('Executing a |defer|red script...');
443     executeScript (this.doc, list.shift ());
444    
445     // Step 4.
446     }
447    
448     log ('DOMContentLoaded event fired');
449    
450 wakaba 1.14 // "delays the load event" things has completed:
451 wakaba 1.4 // readyState = 'complete'
452     log ('load event fired');
453 wakaba 1.6
454     logIndentLevel--;
455 wakaba 1.1 } // parse
456    
457 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
458     if (ip == undefined || ip == null || isNaN (ip)) {
459     log ('insertion point: set to undefined');
460     this.insertionPoint = undefined;
461 wakaba 1.8 } else if (ip == this.input.s.length) {
462 wakaba 1.4 log ('insertion point: end of file');
463     this.insertionPoint = ip;
464 wakaba 1.3 } else {
465     log ('insertion point: set to ' + ip +
466 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
467 wakaba 1.3 this.insertionPoint = ip;
468     }
469     }; // setInsertionPoint
470    
471 wakaba 1.2 function JSDocument (p) {
472 wakaba 1.1 this.childNodes = [];
473 wakaba 1.2 this._parser = p;
474 wakaba 1.1 } // JSDocument
475    
476 wakaba 1.2 function JSElement (doc, localName) {
477 wakaba 1.1 this.localName = localName;
478 wakaba 1.2 this.ownerDocument = doc;
479 wakaba 1.1 this.childNodes = [];
480     } // JSElement
481    
482     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
483     function (e) {
484     this.childNodes.push (e);
485     e.parentNode = this;
486 wakaba 1.2
487     if (e.localName == 'script') {
488 wakaba 1.6 logIndentLevel++;
489 wakaba 1.4 log ('Running a script: start');
490 wakaba 1.2
491 wakaba 1.3 var doc = this.ownerDocument || this;
492 wakaba 1.2 var p = doc._parser;
493    
494     // 1. Script type
495     //
496    
497     // 2.1. If scripting is disabled
498     //
499     // 2.2. If the script element was created by an XML ... innerHTML ...
500     //
501     // 2.3. If the user agent does not support the scripting language ...
502     //
503     // 2.4. If the script element has its "already executed" flag set
504     if (e.manakaiAlreadyExecuted) {
505     // 2.5. Abort these steps at this point.
506 wakaba 1.15 log ('Running a script: aborted (already executed)');
507 wakaba 1.6 logIndentLevel--;
508 wakaba 1.2 return e;
509     }
510    
511     // 3. Set the element's "already executed" flag.
512     e.manakaiAlreadyExecuted = true;
513    
514     // 4. If the element has a src attribute, then a load for ...
515     // TODO: load an external resource
516    
517     // 5. The first of the following options:
518    
519     // 5.1.
520     if (/* TODO: If the document is still being parsed && */
521     e.defer && !e.async) {
522 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
523     log ('Running a script: aborted (defer)');
524 wakaba 1.2 } else if (e.async && e.src != null) {
525 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
526     log ('Running a script: aborted (async src)');
527     } else if (e.async && e.src == null &&
528     p.scriptsExecutedAsynchronously.length > 0) {
529     p.scriptsExecutedAsynchronously.push (e);
530     log ('Running a script: aborted (async)');
531     // ISSUE: What is the difference with the case above?
532 wakaba 1.2 } else if (e.src != null && e.manakaiParserInserted) {
533 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
534     log ('Error: There is a script that will execute as soon as the parser resumes.');
535     }
536     p.scriptExecutedWhenParserResumes = e;
537 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
538     } else if (e.src != null) {
539     p.scriptsExecutedSoon.push (e);
540 wakaba 1.6 log ('Running a script: aborted (src)');
541 wakaba 1.2 } else {
542     executeScript (doc, e); // even if other scripts are already executing.
543     }
544    
545 wakaba 1.4 log ('Running a script: end');
546 wakaba 1.6 logIndentLevel--;
547 wakaba 1.2 }
548    
549 wakaba 1.1 return e;
550     }; // appendChild
551    
552 wakaba 1.2 function executeScript (doc, e) {
553     log ('executing a script block: start');
554    
555 wakaba 1.6 var s;
556     if (e.src != null) {
557     s = getExternalScript (e.src);
558    
559     // If the load resulted in an error, then ... firing an error event ...
560     if (s == null) {
561     log ('error event fired at the script element');
562     return;
563     }
564    
565     log ('External script loaded: "' + s + '"');
566     } else {
567     s = e.text;
568     }
569 wakaba 1.2
570     // If the load was successful
571     log ('load event fired at the script element');
572    
573     if (true) {
574     // Scripting is enabled, Document.designMode is disabled,
575     // Document is the active document in its browsing context
576    
577     parseAndRunScript (doc, s);
578     }
579    
580     log ('executing a script block: end');
581     } // executeScript
582    
583 wakaba 1.6 function getExternalScript (uri) {
584     if (uri.match (/^javascript:/i)) {
585     var m;
586     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
587     if (m[1]) {
588 wakaba 1.11 return unescapeJSLiteral (m[1]);
589 wakaba 1.6 } else if (m[2]) {
590 wakaba 1.11 return unescapeJSLiteral (m[2]);
591 wakaba 1.6 } else {
592     return null;
593     }
594     } else {
595     log ('Complex javascript: URI is not supported: <' + uri + '>');
596     return null;
597     }
598     } else {
599     log ('URI scheme not supported: <' + uri + '>');
600     return null;
601     }
602     } // getExternalScript
603    
604 wakaba 1.2 function parseAndRunScript (doc, s) {
605     while (true) {
606     var matched = false;
607     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
608     matched = true;
609     var args = [];
610     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
611 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
612 wakaba 1.2 return '';
613     });
614     doc.write.apply (doc, args);
615     return '';
616     });
617 wakaba 1.15 var noDocumentElement = false;
618     s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
619 wakaba 1.10 function (s, t, u) {
620     matched = true;
621 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
622 wakaba 1.15 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
623     return '';
624     });
625     if (noDocumentElement) {
626     log ('Script error: documentElement is null');
627     break;
628     }
629     s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
630     function (s, t) {
631     matched = true;
632     log (dumpTree (doc, ''));
633 wakaba 1.10 return '';
634     });
635 wakaba 1.2 if (s == '') break;
636     if (!matched) {
637     log ('Script parse error: "' + s + '"');
638     break;
639     }
640     }
641     } // parseAndRunScript
642    
643 wakaba 1.11 function unescapeJSLiteral (s) {
644     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
645     return String.fromCharCode (parseInt ('0x' + v));
646     });
647     } // unescapeJSLiteral
648    
649 wakaba 1.1 function JSText (data) {
650     this.data = data;
651     } // JSText
652    
653     JSDocument.prototype.manakaiAppendText =
654     JSElement.prototype.manakaiAppendText =
655     function (s) {
656     if (this.childNodes.length > 0 &&
657     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
658     this.childNodes[this.childNodes.length - 1].data += s;
659     } else {
660     this.childNodes.push (new JSText (s));
661     }
662     }; // manakaiAppendText
663 wakaba 1.2
664 wakaba 1.4 JSDocument.prototype.open = function () {
665     // Two or fewer arguments
666    
667     // Step 1.
668     var type = arguments[0] || 'text/html';
669    
670     // Step 2.
671     var replace = arguments[1] == 'replace';
672    
673     // Step 3.
674     if (this._parser &&
675     !this._parser.scriptCreated &&
676 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
677 wakaba 1.4 log ('document.open () in parsing mode is ignored');
678     return this;
679     }
680    
681     // Step 4.
682     log ('onbeforeunload event fired');
683     log ('onunload event fired');
684    
685     // Step 5.
686     if (this._parser) {
687     // Discard the parser.
688     }
689    
690     // Step 6.
691     log ('document cleared by document.open ()');
692     this.childNodes = [];
693    
694     // Step 7.
695     this._parser = new Parser (new InputStream (''), this);
696     this._parser.scriptCreated = true;
697    
698     // Step 8.
699     this.manakaiIsHTML = true;
700    
701     // Step 9.
702     // If not text/html, ...
703    
704     // Step 10.
705     if (!replace) {
706     // History
707     }
708    
709     // Step 11.
710 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
711 wakaba 1.4
712     // Step 12.
713     return this;
714     }; // document.open
715    
716 wakaba 1.2 JSDocument.prototype.write = function () {
717 wakaba 1.15 log ('document.write: start');
718 wakaba 1.6 logIndentLevel++;
719    
720 wakaba 1.3 var p = this._parser;
721    
722 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
723 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
724     this.open ();
725     p = this._parser;
726 wakaba 1.3 }
727 wakaba 1.2
728     // 2. ... inserted into the input stream just before the insertion point.
729 wakaba 1.3 var s = Array.join (arguments, '');
730     log ('document.write: insert "' + s + '"' +
731 wakaba 1.8 ' before "' +
732     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
733     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
734     + p.input.s.substring (p.insertionPoint, p.input.s.length);
735 wakaba 1.3 p.insertionPoint += s.length;
736 wakaba 1.2
737     // 3. If there is a script that will execute as soon as the parser resumes
738 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
739     log ('document.write: processed later (there is an unprocessed <script src>)');
740     logIndentLevel--;
741 wakaba 1.15 log ('document.write: return');
742 wakaba 1.6 return;
743     }
744 wakaba 1.2
745     // 4. Process the characters that were inserted, ...
746 wakaba 1.6 var originalReentrant = p.reentrant;
747     p.reentrant = true;
748 wakaba 1.3 p.parse ();
749 wakaba 1.6 p.reentrant = originalReentrant;
750     // TODO: "Abort the processing of any nested invokations of the tokeniser,
751     // yielding control back to the caller." (<script> parsing). Do we need
752     // to do something here?
753 wakaba 1.2
754     // 5. Return
755 wakaba 1.15 logIndentLevel--;
756 wakaba 1.2 log ('document.write: return');
757 wakaba 1.6
758 wakaba 1.2 return;
759     }; // document.write
760    
761 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
762     var s = new JSElement (this, 'script');
763     s.src = uri;
764 wakaba 1.15 if (this.documentElement) {
765     this.documentElement.appendChild (s);
766     return true;
767     } else {
768     return false;
769     }
770 wakaba 1.10 }; // _insertExternalScript
771    
772     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
773     var cn = this.childNodes;
774     for (var i = 0; i < cn.length; i++) {
775     if (cn[i] instanceof JSElement) {
776     return cn[i]
777     }
778     }
779     return null;
780     });
781    
782 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
783     var r = '';
784     for (var i = 0; i < this.childNodes.length; i++) {
785     if (this.childNodes[i] instanceof JSText) {
786     r += this.childNodes[i].data;
787     }
788     }
789     return r;
790     });
791 wakaba 1.1
792     function dumpTree (n, indent) {
793     var r = '';
794     for (var i = 0; i < n.childNodes.length; i++) {
795     var node = n.childNodes[i];
796     if (node instanceof JSElement) {
797     r += '| ' + indent + node.localName + '\n';
798 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
799     if (node.defer) r += '| ' + indent + ' defer=""\n';
800 wakaba 1.9 if (node.src != null) {
801     r += '| ' + indent + ' src="' + node.src + '"\n';
802     }
803 wakaba 1.1 r += dumpTree (node, indent + ' ');
804     } else if (node instanceof JSText) {
805     r += '| ' + indent + '"' + node.data + '"\n';
806     } else {
807     r += '| ' + indent + node + '\n';
808     }
809     }
810     return r;
811     } // dumpTree
812     </script>
813     </head>
814     <body onload="
815     document.sourceElement = document.getElementsByTagName ('textarea')[0];
816 wakaba 1.8
817     var q = location.search;
818     if (q != null) {
819     q = q.substring (1).split (/;/);
820     for (var i = 0; i < q.length; i++) {
821     var v = q[i].split (/=/, 2);
822     v[0] = decodeURIComponent (v[0]);
823     v[1] = decodeURIComponent (v[1] || '');
824     if (v[0] == 's') {
825     document.sourceElement.value = v[1];
826     }
827     }
828     }
829    
830 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
831     update ();
832     ">
833 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
834     Parser</h1>
835 wakaba 1.1
836 wakaba 1.7 <h2>Markup to test
837 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
838     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
839     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
840     Viewer</a>)</h2>
841 wakaba 1.7 <p>
842     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
843 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
844     &lt;p>
845     &lt;script>
846 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
847 wakaba 1.1 &lt;/script>
848     &lt;p>
849     </textarea>
850    
851 wakaba 1.10 <h2 id=log>Log</h2>
852 wakaba 1.7 <p><output></output>
853    
854 wakaba 1.10 <h2 id=notes>Notes</h2>
855 wakaba 1.8
856     <p>This is a <em>simplified</em> implementation of
857     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
858     Parsing Algorithm</a>. It only implements script-related part of the
859     algorithm. Especially, this parser:
860     <ul>
861     <li>Does not support <code>DOCTYPE</code> and comment tokens.
862     <li>Does not support entities except for <code>&amp;quot;</code>,
863     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
864     <code>src</code> attribute value.
865     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
866     algorithm, and so on.
867     <li>Does not raise parse errors for invalid attribute specifications in start
868     or end tags.
869 wakaba 1.14 <li>Does not support PCDATA elements (<code>title</code> and
870     <code>textarea</code>).
871     <li>Does not strip the first newline in <code>pre</code> elements.
872 wakaba 1.8 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
873     in <code>script</code> element.
874     <li>Does not support foreign (SVG or MathML) elements.
875     <li>Only supports <code>script</code> <code>type</code>
876     <code>text/javascript</code>. <code>type</code> and <code>language</code>
877     attributes are ignored.
878 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
879     of statements looking similar to the following statements, possibly
880     introduced, followed, or separated by white space characters:
881     <ul>
882     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
883     <li><code>var s = document.createElement ("script");
884     s.src = "<var>string</var>";
885     document.documentElement.appendChild (s);</code>
886 wakaba 1.15 <li><code>w (document.documentElement.innerHTML);</code> (This statement
887     can be used to dump the document, even when the document has no
888     document element. The output format is the tree dump format used
889     in html5lib test data, not <abbr>HTML</abbr>.)
890 wakaba 1.10 </ul>
891     Note that strings may be delimited by <code>'</code>s instead of
892     <code>"</code>s.
893 wakaba 1.8 <li>Only supports <code>javascript:</code>
894     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
895     <code>src</code> attribute of the <code>script</code> element. In addition,
896     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
897     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
898 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
899     string literals.
900 wakaba 1.12 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
901     replaced by <code>document.open ()</code> call. In other word, delayed
902     (deferred or asynchronous) script executions and event firings might be
903     treated in a wrong way if a <code>document.open ()</code> invocation
904     is implicitly done by <code>document.write ()</code> in a delayed script.
905 wakaba 1.8 </ul>
906 wakaba 1.7
907 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
908     not support JavaScript 1.5.
909 wakaba 1.12
910     <!-- TODO: |src| attribute value should refer the value at the time
911     when it is inserted into the document, not the value when the script is
912     executed. Currently it does not matter, since we don't allow dynamic
913     modification to the |src| content/DOM attribute value yet. -->
914 wakaba 1.10
915 wakaba 1.13 </body>
916     </html>
917 wakaba 1.15 <!-- $Date: 2008/04/29 02:50:00 $ -->
918 wakaba 1.13 <!--
919    
920     Copyright 2008 Wakaba <w@suika.fam.cx>
921    
922     This program is free software; you can redistribute it and/or
923     modify it under the terms of the GNU General Public License
924     as published by the Free Software Foundation; either version 2
925     of the License, or (at your option) any later version.
926    
927     This program is distributed in the hope that it will be useful,
928     but WITHOUT ANY WARRANTY; without even the implied warranty of
929     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
930     GNU General Public License for more details.
931    
932     You should have received a copy of the GNU General Public License
933     along with this program; if not, write to the Free Software
934     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
935 wakaba 1.1
936 wakaba 1.13 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24