/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (hide annotations) (download) (as text)
Sun Apr 27 11:27:04 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.12: +31 -6 lines
File MIME type: text/html
license and styling refinement

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.13 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6     <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7     title="GNU GPL2 or later">
8 wakaba 1.1 <style>
9 wakaba 1.13 h1 {
10     margin: 0;
11     font-size: 150%;
12     }
13     h2 {
14 wakaba 1.7 margin: 0;
15     font-size: 100%;
16     }
17 wakaba 1.13 p {
18     margin: 0 1em;
19 wakaba 1.7 }
20 wakaba 1.1 textarea {
21 wakaba 1.7 width: 100%;
22     -width: 99%;
23     height: 10em;
24 wakaba 1.1 }
25     output {
26     display: block;
27     font-family: monospace;
28 wakaba 1.4 white-space: -moz-pre-wrap;
29     white-space: pre-wrap;
30 wakaba 1.1 }
31     </style>
32     <script>
33 wakaba 1.7 var delayedUpdater = 0;
34    
35 wakaba 1.1 function update () {
36 wakaba 1.7 if (delayedUpdater) {
37     clearTimeout (delayedUpdater);
38     delayedUpdater = 0;
39     }
40     delayedUpdater = setTimeout (update2, 100);
41     } // update
42    
43     function update2 () {
44     var v = document.sourceElement.value;
45 wakaba 1.8 if (v != document.previousSourceText) {
46     document.previousSourceText = v;
47     document.links['permalink'].href
48     = location.pathname + '?s=' + encodeURIComponent (v);
49     document.links['ldvlink'].href
50     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51     + encodeURIComponent (v);
52    
53     document.logElement.textContent = '';
54     var p = new Parser (new InputStream (v));
55     var doc = p.doc;
56     p.parse ();
57 wakaba 1.10
58 wakaba 1.8 log (dumpTree (doc, ''));
59 wakaba 1.10
60     if (p.hasAsyncScript) {
61     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62     }
63 wakaba 1.8 }
64 wakaba 1.7 } // update2
65 wakaba 1.1
66 wakaba 1.6 var logIndentLevel = 0;
67 wakaba 1.1 function log (s) {
68 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
69     s = ' ' + s;
70     }
71 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
72     } // log
73    
74     function InputStream (s) {
75     this.s = s;
76     } // InputStream
77    
78 wakaba 1.4 function Parser (i, doc) {
79 wakaba 1.1 this.parseMode = 'pcdata';
80 wakaba 1.4 if (!doc) {
81     doc = new JSDocument (this);
82     doc.manakaiIsHTML = true;
83     }
84     this.doc = doc;
85     this.openElements = [doc];
86 wakaba 1.8 this.input = i;
87 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
88 wakaba 1.10 this.scriptsExecutedSoon = [];
89 wakaba 1.12 this.scriptsExecutedAsynchronously = [];
90 wakaba 1.1 } // Parser
91    
92 wakaba 1.2 Parser.prototype.getNextToken = function () {
93 wakaba 1.3 var p = this;
94 wakaba 1.8 var i = this.input;
95 wakaba 1.1 if (this.parseMode == 'script') {
96     var token;
97 wakaba 1.3 if (p.insertionPoint <= 0) {
98     return {type: 'abort'};
99     }
100 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
101 wakaba 1.1 function (s, t) {
102 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
103     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
104     var ip = p.insertionPoint;
105     p.insertionPoint = 0;
106 wakaba 1.4 return t.substring (ip, t.length);
107 wakaba 1.3 }
108 wakaba 1.1 token = {type: 'char', value: t};
109 wakaba 1.4 p.insertionPoint -= t.length;
110     return '';
111 wakaba 1.1 });
112     if (token) return token;
113 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
114 wakaba 1.4 if (p.insertionPoint < s.length) {
115 wakaba 1.3 token = {type: 'abort'};
116     return s;
117     }
118 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
119 wakaba 1.3 p.insertionPoint -= s.length;
120 wakaba 1.1 return '';
121     });
122     if (token) return token;
123 wakaba 1.5 var m;
124     if ((p.insertionPoint < '</script'.length) &&
125     (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
126     var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
127     if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
128     return {type: 'abort'};
129     }
130     }
131 wakaba 1.4 i.s = i.s.replace (/^</,
132     function (s) {
133     token = {type: 'char', value: s};
134     p.insertionPoint -= s.length;
135     return '';
136     });
137     if (token) return token;
138 wakaba 1.1 return {type: 'eof'};
139     }
140    
141     var token;
142 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
143     if (p.insertionPoint < s.length ||
144     (p.insertionPoint <= s.length &&
145     s.substring (s.length - 1, 1) != '>')) {
146 wakaba 1.3 token = {type: 'abort'};
147     return s;
148     }
149 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
150 wakaba 1.3 p.insertionPoint -= s.length;
151 wakaba 1.1 return '';
152     });
153     if (token) return token;
154 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
155     if (p.insertionPoint < s.length ||
156     (p.insertionPoint <= s.length &&
157     s.substring (s.length - 1, 1) != '>')) {
158 wakaba 1.3 token = {type: 'abort'};
159     return s;
160     }
161 wakaba 1.4 var tagName;
162     var attrs = {};
163     e = e.replace (/^[\S]+/, function (v) {
164     tagName = v.toLowerCase ();
165     return '';
166     });
167 wakaba 1.9 while (true) {
168     var m = false;
169     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
170     function (x, attrName, attrValue1, attrValue2, attrValue3) {
171     v = attrValue1 || attrValue2 || attrValue3;
172     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
173     .replace (/&amp;/g, '&');
174     attrs[attrName.toLowerCase ()] = v;
175     m = true;
176     return '';
177     });
178     if (!m) break;
179     }
180 wakaba 1.6 if (e.length) {
181     log ('Broken start tag: "' + e + '"');
182     }
183 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
184 wakaba 1.3 p.insertionPoint -= s.length;
185 wakaba 1.1 return '';
186     });
187     if (token) return token;
188 wakaba 1.3 if (p.insertionPoint <= 0) {
189     return {type: 'abort'};
190     }
191 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
192 wakaba 1.3 if (p.insertionPoint < s.length) {
193     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
194     var ip = p.insertionPoint;
195     p.insertionPoint = 0;
196     return s.substring (ip, s.length);
197     }
198 wakaba 1.1 token = {type: 'char', value: s};
199 wakaba 1.3 p.insertionPoint -= s.length;
200 wakaba 1.1 return '';
201     });
202     if (token) return token;
203     i.s = i.s.replace (/^[\s\S]/, function (s) {
204     token = {type: 'char', value: s};
205 wakaba 1.3 p.insertionPoint -= s.length;
206 wakaba 1.1 return '';
207     });
208     if (token) return token;
209     return {type: 'eof'};
210     } // getNextToken
211    
212 wakaba 1.2 Parser.prototype.parse = function () {
213 wakaba 1.6 logIndentLevel++;
214     log ('parse: start');
215 wakaba 1.1
216     while (true) {
217 wakaba 1.2 var token = this.getNextToken ();
218 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
219    
220     if (token.type == 'start-tag') {
221     if (token.value == 'script') {
222 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
223     var el = new JSElement (this.doc, token.value);
224 wakaba 1.4 if (token.attrs.async != null) el.async = true;
225     if (token.attrs.defer != null) el.defer = true;
226     if (token.attrs.src != null) el.src = token.attrs.src;
227 wakaba 1.2
228     // 2. Mark the element as being "parser-inserted".
229     el.manakaiParserInserted = true;
230    
231     // 3. Switch the tokeniser's content model flag to the CDATA state.
232 wakaba 1.1 this.parseMode = 'script';
233    
234 wakaba 1.2 // 4.1. Collect all the character tokens.
235 wakaba 1.1 while (true) {
236 wakaba 1.2 var token = this.getNextToken ();
237 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
238    
239     if (token.type == 'char') {
240 wakaba 1.2 // 5. Append a single Text node to the script element node.
241 wakaba 1.1 el.manakaiAppendText (token.value);
242 wakaba 1.2
243     // 4.2. Until it returns a token that is not a character token, or
244 wakaba 1.3 // until it stops tokenising.
245 wakaba 1.1 } else if (token.type == 'eof' ||
246 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
247     token.type == 'abort') {
248 wakaba 1.2 // 6. Switched back to the PCDATA state.
249 wakaba 1.1 this.parseMode = 'pcdata';
250 wakaba 1.2
251     // 7.1. If the next token is not an end tag token with ...
252     if (token.type != 'end-tag') {
253     // 7.2. This is a parse error.
254     log ('Parse error: no </' + 'script>');
255    
256     // 7.3. Mark the script element as "already executed".
257     el.manakaiAlreadyExecuted = true;
258     } else {
259     // 7.4. Ignore it.
260     //
261     }
262 wakaba 1.1 break;
263     }
264     }
265    
266 wakaba 1.2 // 8.1. If the parser were originally created for the ...
267     if (this.fragmentParsingMode) {
268     // 8.2. Mark the script element as "already executed" and ...
269     el.alreadyExecuted = true;
270     continue;
271     }
272    
273     // 9.1. Let the old insertion point have the same value as the ...
274 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
275 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
276 wakaba 1.3 this.setInsertionPoint (0);
277 wakaba 1.2
278     // 10. Append the new element to the current node.
279 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
280 wakaba 1.2
281     // 11. Let the insertion point have the value of the old ...
282 wakaba 1.7
283 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
284 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
285 wakaba 1.2
286     // 12. If there is a script that will execute as soon as ...
287 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
288     // 12.1. If the tree construction stage is being called reentrantly
289     if (this.reentrant) {
290     log ('parse: abort (reentrance)');
291     logIndentLevel--;
292     return;
293    
294     // 12.2. Otherwise
295     } else {
296     // 1.
297     var script = this.scriptExecutedWhenParserResumes;
298     this.scriptExecutedWhenParserResumes = null;
299    
300     // 2. Pause until the script has completed loading.
301     //
302    
303     // 3. Let the insertion point to just before the next input char.
304     this.setInsertionPoint (0);
305    
306     // 4. Execute the script.
307     executeScript (this.doc, script);
308    
309     // 5. Let the insertion point be undefined again.
310     this.setInsertionPoint (undefined);
311 wakaba 1.2
312 wakaba 1.6 // 6. If there is once again a script that will execute ...
313     //
314     }
315     }
316 wakaba 1.1 } else {
317 wakaba 1.2 var el = new JSElement (this.doc, token.value);
318 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
319     this.openElements.push (el);
320     }
321     } else if (token.type == 'end-tag') {
322     if (this.openElements[this.openElements.length - 1].localName ==
323     token.value) {
324     this.openElements.pop ();
325     } else {
326     log ('parse error: unmatched end tag: ' + token.value);
327     }
328 wakaba 1.3 } else if (token.type == 'char') {
329     this.openElements[this.openElements.length - 1].manakaiAppendText
330     (token.value);
331 wakaba 1.1 } else if (token.type == 'eof') {
332     break;
333 wakaba 1.3 } else if (token.type == 'abort') {
334     log ('parse: abort');
335 wakaba 1.6 logIndentLevel--;
336 wakaba 1.3 return;
337 wakaba 1.1 }
338     }
339    
340     log ('stop parsing');
341 wakaba 1.4
342     // readyState = 'interactive'
343    
344     // "When a script completes loading" rules start applying.
345    
346 wakaba 1.12 while (this.scriptsExecutedSoon.length > 0 ||
347     this.scriptsExecutedAsynchronously.length > 0) {
348     // Handle "list of scripts that will execute as soon as possible".
349     while (this.scriptsExecutedSoon.length > 0) {
350     var e = this.scriptsExecutedSoon.shift ();
351    
352     // If it has completed loading
353     log ('Execute an external script not inserted by parser...');
354     executeScript (this.doc, e);
355    
356     // NOTE: It MAY be executed before the end of the parsing, according
357     // to the spec.
358     this.hasAsyncScript = true;
359     }
360    
361     // Handle "list of scripts that will execute asynchronously".
362     while (this.scriptsExecutedAsynchronously.length > 0) {
363     var e = this.scriptsExecutedAsynchronously.shift ();
364    
365     // Step 1.
366     // We assume that all scripts have been loaded at this time.
367    
368     // Step 2.
369     log ('Execute an asynchronous script...');
370     executeScript (this.doc, e);
371    
372     // Step 3.
373     //
374    
375     // Step 4.
376     //
377 wakaba 1.10
378 wakaba 1.12 this.hasAsyncScript = true;
379     }
380 wakaba 1.10 }
381    
382 wakaba 1.4 // Handle "list of scripts that will execute when the document has finished
383     // parsing".
384     var list = this.scriptsExecutedAfterParsing;
385     while (list.length > 0) {
386     // TODO: break unless completed loading
387    
388     // Step 1.
389     //
390    
391     // Step 2. and Step 3.
392     log ('Executing a |defer|red script...');
393     executeScript (this.doc, list.shift ());
394    
395     // Step 4.
396     }
397    
398     log ('DOMContentLoaded event fired');
399    
400     // "delays tha load event" things has completed:
401     // readyState = 'complete'
402     log ('load event fired');
403 wakaba 1.6
404     logIndentLevel--;
405 wakaba 1.1 } // parse
406    
407 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
408     if (ip == undefined || ip == null || isNaN (ip)) {
409     log ('insertion point: set to undefined');
410     this.insertionPoint = undefined;
411 wakaba 1.8 } else if (ip == this.input.s.length) {
412 wakaba 1.4 log ('insertion point: end of file');
413     this.insertionPoint = ip;
414 wakaba 1.3 } else {
415     log ('insertion point: set to ' + ip +
416 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
417 wakaba 1.3 this.insertionPoint = ip;
418     }
419     }; // setInsertionPoint
420    
421 wakaba 1.2 function JSDocument (p) {
422 wakaba 1.1 this.childNodes = [];
423 wakaba 1.2 this._parser = p;
424 wakaba 1.1 } // JSDocument
425    
426 wakaba 1.2 function JSElement (doc, localName) {
427 wakaba 1.1 this.localName = localName;
428 wakaba 1.2 this.ownerDocument = doc;
429 wakaba 1.1 this.childNodes = [];
430     } // JSElement
431    
432     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
433     function (e) {
434     this.childNodes.push (e);
435     e.parentNode = this;
436 wakaba 1.2
437     if (e.localName == 'script') {
438 wakaba 1.6 logIndentLevel++;
439 wakaba 1.4 log ('Running a script: start');
440 wakaba 1.2
441 wakaba 1.3 var doc = this.ownerDocument || this;
442 wakaba 1.2 var p = doc._parser;
443    
444     // 1. Script type
445     //
446    
447     // 2.1. If scripting is disabled
448     //
449     // 2.2. If the script element was created by an XML ... innerHTML ...
450     //
451     // 2.3. If the user agent does not support the scripting language ...
452     //
453     // 2.4. If the script element has its "already executed" flag set
454     if (e.manakaiAlreadyExecuted) {
455     // 2.5. Abort these steps at this point.
456 wakaba 1.4 log ('Running a script: aborted');
457 wakaba 1.6 logIndentLevel--;
458 wakaba 1.2 return e;
459     }
460    
461     // 3. Set the element's "already executed" flag.
462     e.manakaiAlreadyExecuted = true;
463    
464     // 4. If the element has a src attribute, then a load for ...
465     // TODO: load an external resource
466    
467     // 5. The first of the following options:
468    
469     // 5.1.
470     if (/* TODO: If the document is still being parsed && */
471     e.defer && !e.async) {
472 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
473     log ('Running a script: aborted (defer)');
474 wakaba 1.2 } else if (e.async && e.src != null) {
475 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
476     log ('Running a script: aborted (async src)');
477     } else if (e.async && e.src == null &&
478     p.scriptsExecutedAsynchronously.length > 0) {
479     p.scriptsExecutedAsynchronously.push (e);
480     log ('Running a script: aborted (async)');
481     // ISSUE: What is the difference with the case above?
482 wakaba 1.2 } else if (e.src != null && e.manakaiParserInserted) {
483 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
484     log ('Error: There is a script that will execute as soon as the parser resumes.');
485     }
486     p.scriptExecutedWhenParserResumes = e;
487 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
488     } else if (e.src != null) {
489     p.scriptsExecutedSoon.push (e);
490 wakaba 1.6 log ('Running a script: aborted (src)');
491 wakaba 1.2 } else {
492     executeScript (doc, e); // even if other scripts are already executing.
493     }
494    
495 wakaba 1.4 log ('Running a script: end');
496 wakaba 1.6 logIndentLevel--;
497 wakaba 1.2 }
498    
499 wakaba 1.1 return e;
500     }; // appendChild
501    
502 wakaba 1.2 function executeScript (doc, e) {
503     log ('executing a script block: start');
504    
505 wakaba 1.6 var s;
506     if (e.src != null) {
507     s = getExternalScript (e.src);
508    
509     // If the load resulted in an error, then ... firing an error event ...
510     if (s == null) {
511     log ('error event fired at the script element');
512     return;
513     }
514    
515     log ('External script loaded: "' + s + '"');
516     } else {
517     s = e.text;
518     }
519 wakaba 1.2
520     // If the load was successful
521     log ('load event fired at the script element');
522    
523     if (true) {
524     // Scripting is enabled, Document.designMode is disabled,
525     // Document is the active document in its browsing context
526    
527     parseAndRunScript (doc, s);
528     }
529    
530     log ('executing a script block: end');
531     } // executeScript
532    
533 wakaba 1.6 function getExternalScript (uri) {
534     if (uri.match (/^javascript:/i)) {
535     var m;
536     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
537     if (m[1]) {
538 wakaba 1.11 return unescapeJSLiteral (m[1]);
539 wakaba 1.6 } else if (m[2]) {
540 wakaba 1.11 return unescapeJSLiteral (m[2]);
541 wakaba 1.6 } else {
542     return null;
543     }
544     } else {
545     log ('Complex javascript: URI is not supported: <' + uri + '>');
546     return null;
547     }
548     } else {
549     log ('URI scheme not supported: <' + uri + '>');
550     return null;
551     }
552     } // getExternalScript
553    
554 wakaba 1.2 function parseAndRunScript (doc, s) {
555     while (true) {
556     var matched = false;
557     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
558     matched = true;
559     var args = [];
560     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
561 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
562 wakaba 1.2 return '';
563     });
564     doc.write.apply (doc, args);
565     return '';
566     });
567 wakaba 1.10 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
568     function (s, t, u) {
569     matched = true;
570 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
571 wakaba 1.10 doc._insertExternalScript.apply (doc, args);
572     return '';
573     });
574 wakaba 1.2 if (s == '') break;
575     if (!matched) {
576     log ('Script parse error: "' + s + '"');
577     break;
578     }
579     }
580     } // parseAndRunScript
581    
582 wakaba 1.11 function unescapeJSLiteral (s) {
583     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
584     return String.fromCharCode (parseInt ('0x' + v));
585     });
586     } // unescapeJSLiteral
587    
588 wakaba 1.1 function JSText (data) {
589     this.data = data;
590     } // JSText
591    
592     JSDocument.prototype.manakaiAppendText =
593     JSElement.prototype.manakaiAppendText =
594     function (s) {
595     if (this.childNodes.length > 0 &&
596     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
597     this.childNodes[this.childNodes.length - 1].data += s;
598     } else {
599     this.childNodes.push (new JSText (s));
600     }
601     }; // manakaiAppendText
602 wakaba 1.2
603 wakaba 1.4 JSDocument.prototype.open = function () {
604     // Two or fewer arguments
605    
606     // Step 1.
607     var type = arguments[0] || 'text/html';
608    
609     // Step 2.
610     var replace = arguments[1] == 'replace';
611    
612     // Step 3.
613     if (this._parser &&
614     !this._parser.scriptCreated &&
615 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
616 wakaba 1.4 log ('document.open () in parsing mode is ignored');
617     return this;
618     }
619    
620     // Step 4.
621     log ('onbeforeunload event fired');
622     log ('onunload event fired');
623    
624     // Step 5.
625     if (this._parser) {
626     // Discard the parser.
627     }
628    
629     // Step 6.
630     log ('document cleared by document.open ()');
631     this.childNodes = [];
632    
633     // Step 7.
634     this._parser = new Parser (new InputStream (''), this);
635     this._parser.scriptCreated = true;
636    
637     // Step 8.
638     this.manakaiIsHTML = true;
639    
640     // Step 9.
641     // If not text/html, ...
642    
643     // Step 10.
644     if (!replace) {
645     // History
646     }
647    
648     // Step 11.
649 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
650 wakaba 1.4
651     // Step 12.
652     return this;
653     }; // document.open
654    
655 wakaba 1.2 JSDocument.prototype.write = function () {
656 wakaba 1.6 logIndentLevel++;
657    
658 wakaba 1.3 var p = this._parser;
659    
660 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
661 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
662     this.open ();
663     p = this._parser;
664 wakaba 1.3 }
665 wakaba 1.2
666     // 2. ... inserted into the input stream just before the insertion point.
667 wakaba 1.3 var s = Array.join (arguments, '');
668     log ('document.write: insert "' + s + '"' +
669 wakaba 1.8 ' before "' +
670     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
671     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
672     + p.input.s.substring (p.insertionPoint, p.input.s.length);
673 wakaba 1.3 p.insertionPoint += s.length;
674 wakaba 1.2
675     // 3. If there is a script that will execute as soon as the parser resumes
676 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
677     log ('document.write: processed later (there is an unprocessed <script src>)');
678     logIndentLevel--;
679     return;
680     }
681 wakaba 1.2
682     // 4. Process the characters that were inserted, ...
683 wakaba 1.6 var originalReentrant = p.reentrant;
684     p.reentrant = true;
685 wakaba 1.3 p.parse ();
686 wakaba 1.6 p.reentrant = originalReentrant;
687     // TODO: "Abort the processing of any nested invokations of the tokeniser,
688     // yielding control back to the caller." (<script> parsing). Do we need
689     // to do something here?
690 wakaba 1.2
691     // 5. Return
692     log ('document.write: return');
693 wakaba 1.6
694     logIndentLevel--;
695 wakaba 1.2 return;
696     }; // document.write
697    
698 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
699     var s = new JSElement (this, 'script');
700     s.src = uri;
701     this.documentElement.appendChild (s);
702     }; // _insertExternalScript
703    
704     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
705     var cn = this.childNodes;
706     for (var i = 0; i < cn.length; i++) {
707     if (cn[i] instanceof JSElement) {
708     return cn[i]
709     }
710     }
711     return null;
712     });
713    
714 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
715     var r = '';
716     for (var i = 0; i < this.childNodes.length; i++) {
717     if (this.childNodes[i] instanceof JSText) {
718     r += this.childNodes[i].data;
719     }
720     }
721     return r;
722     });
723 wakaba 1.1
724     function dumpTree (n, indent) {
725     var r = '';
726     for (var i = 0; i < n.childNodes.length; i++) {
727     var node = n.childNodes[i];
728     if (node instanceof JSElement) {
729     r += '| ' + indent + node.localName + '\n';
730 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
731     if (node.defer) r += '| ' + indent + ' defer=""\n';
732 wakaba 1.9 if (node.src != null) {
733     r += '| ' + indent + ' src="' + node.src + '"\n';
734     }
735 wakaba 1.1 r += dumpTree (node, indent + ' ');
736     } else if (node instanceof JSText) {
737     r += '| ' + indent + '"' + node.data + '"\n';
738     } else {
739     r += '| ' + indent + node + '\n';
740     }
741     }
742     return r;
743     } // dumpTree
744     </script>
745     </head>
746     <body onload="
747     document.sourceElement = document.getElementsByTagName ('textarea')[0];
748 wakaba 1.8
749     var q = location.search;
750     if (q != null) {
751     q = q.substring (1).split (/;/);
752     for (var i = 0; i < q.length; i++) {
753     var v = q[i].split (/=/, 2);
754     v[0] = decodeURIComponent (v[0]);
755     v[1] = decodeURIComponent (v[1] || '');
756     if (v[0] == 's') {
757     document.sourceElement.value = v[1];
758     }
759     }
760     }
761    
762 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
763     update ();
764     ">
765 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
766     Parser</h1>
767 wakaba 1.1
768 wakaba 1.7 <h2>Markup to test
769 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
770     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
771     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
772     Viewer</a>)</h2>
773 wakaba 1.7 <p>
774     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
775 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
776     &lt;p>
777     &lt;script>
778 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
779 wakaba 1.1 &lt;/script>
780     &lt;p>
781     </textarea>
782    
783 wakaba 1.10 <h2 id=log>Log</h2>
784 wakaba 1.7 <p><output></output>
785    
786 wakaba 1.10 <h2 id=notes>Notes</h2>
787 wakaba 1.8
788     <p>This is a <em>simplified</em> implementation of
789     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
790     Parsing Algorithm</a>. It only implements script-related part of the
791     algorithm. Especially, this parser:
792     <ul>
793     <li>Does not support <code>DOCTYPE</code> and comment tokens.
794     <li>Does not support entities except for <code>&amp;quot;</code>,
795     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
796     <code>src</code> attribute value.
797     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
798     algorithm, and so on.
799     <li>Does not raise parse errors for invalid attribute specifications in start
800     or end tags.
801     <li>Does not support CDATA/PCDATA element other than <code>script</code>.
802     <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
803     in <code>script</code> element.
804     <li>Does not support foreign (SVG or MathML) elements.
805     <li>Only supports <code>script</code> <code>type</code>
806     <code>text/javascript</code>. <code>type</code> and <code>language</code>
807     attributes are ignored.
808 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
809     of statements looking similar to the following statements, possibly
810     introduced, followed, or separated by white space characters:
811     <ul>
812     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
813     <li><code>var s = document.createElement ("script");
814     s.src = "<var>string</var>";
815     document.documentElement.appendChild (s);</code>
816     </ul>
817     Note that strings may be delimited by <code>'</code>s instead of
818     <code>"</code>s.
819 wakaba 1.8 <li>Only supports <code>javascript:</code>
820     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
821     <code>src</code> attribute of the <code>script</code> element. In addition,
822     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
823     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
824 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
825     string literals.
826 wakaba 1.12 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
827     replaced by <code>document.open ()</code> call. In other word, delayed
828     (deferred or asynchronous) script executions and event firings might be
829     treated in a wrong way if a <code>document.open ()</code> invocation
830     is implicitly done by <code>document.write ()</code> in a delayed script.
831 wakaba 1.8 </ul>
832 wakaba 1.7
833 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
834     not support JavaScript 1.5.
835 wakaba 1.12
836     <!-- TODO: |src| attribute value should refer the value at the time
837     when it is inserted into the document, not the value when the script is
838     executed. Currently it does not matter, since we don't allow dynamic
839     modification to the |src| content/DOM attribute value yet. -->
840 wakaba 1.10
841 wakaba 1.13 </body>
842     </html>
843     <!-- $Date: 2008/01/19 06:47:07 $ -->
844     <!--
845    
846     Copyright 2008 Wakaba <w@suika.fam.cx>
847    
848     This program is free software; you can redistribute it and/or
849     modify it under the terms of the GNU General Public License
850     as published by the Free Software Foundation; either version 2
851     of the License, or (at your option) any later version.
852    
853     This program is distributed in the hope that it will be useful,
854     but WITHOUT ANY WARRANTY; without even the implied warranty of
855     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
856     GNU General Public License for more details.
857    
858     You should have received a copy of the GNU General Public License
859     along with this program; if not, write to the Free Software
860     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
861 wakaba 1.1
862 wakaba 1.13 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24