/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (hide annotations) (download) (as text)
Sun Apr 27 10:44:36 2008 UTC (17 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.10: +12 -10 lines
File MIME type: text/html
JS string literal escape everywhere

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.1 <style>
6 wakaba 1.7 h1, h2 {
7     margin: 0;
8     font-size: 100%;
9     }
10     p, pre {
11     margin: 0;
12     }
13 wakaba 1.1 textarea {
14 wakaba 1.7 width: 100%;
15     -width: 99%;
16     height: 10em;
17 wakaba 1.1 }
18     output {
19     display: block;
20     font-family: monospace;
21 wakaba 1.4 white-space: -moz-pre-wrap;
22     white-space: pre-wrap;
23 wakaba 1.1 }
24     </style>
25     <script>
26 wakaba 1.7 var delayedUpdater = 0;
27    
28 wakaba 1.1 function update () {
29 wakaba 1.7 if (delayedUpdater) {
30     clearTimeout (delayedUpdater);
31     delayedUpdater = 0;
32     }
33     delayedUpdater = setTimeout (update2, 100);
34     } // update
35    
36     function update2 () {
37     var v = document.sourceElement.value;
38 wakaba 1.8 if (v != document.previousSourceText) {
39     document.previousSourceText = v;
40     document.links['permalink'].href
41     = location.pathname + '?s=' + encodeURIComponent (v);
42     document.links['ldvlink'].href
43     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
44     + encodeURIComponent (v);
45    
46     document.logElement.textContent = '';
47     var p = new Parser (new InputStream (v));
48     var doc = p.doc;
49     p.parse ();
50 wakaba 1.10
51 wakaba 1.8 log (dumpTree (doc, ''));
52 wakaba 1.10
53     if (p.hasAsyncScript) {
54     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
55     }
56 wakaba 1.8 }
57 wakaba 1.7 } // update2
58 wakaba 1.1
59 wakaba 1.6 var logIndentLevel = 0;
60 wakaba 1.1 function log (s) {
61 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
62     s = ' ' + s;
63     }
64 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
65     } // log
66    
67     function InputStream (s) {
68     this.s = s;
69     } // InputStream
70    
71 wakaba 1.4 function Parser (i, doc) {
72 wakaba 1.1 this.parseMode = 'pcdata';
73 wakaba 1.4 if (!doc) {
74     doc = new JSDocument (this);
75     doc.manakaiIsHTML = true;
76     }
77     this.doc = doc;
78     this.openElements = [doc];
79 wakaba 1.8 this.input = i;
80 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
81 wakaba 1.10 this.scriptsExecutedSoon = [];
82 wakaba 1.1 } // Parser
83    
84 wakaba 1.2 Parser.prototype.getNextToken = function () {
85 wakaba 1.3 var p = this;
86 wakaba 1.8 var i = this.input;
87 wakaba 1.1 if (this.parseMode == 'script') {
88     var token;
89 wakaba 1.3 if (p.insertionPoint <= 0) {
90     return {type: 'abort'};
91     }
92 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
93 wakaba 1.1 function (s, t) {
94 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
95     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
96     var ip = p.insertionPoint;
97     p.insertionPoint = 0;
98 wakaba 1.4 return t.substring (ip, t.length);
99 wakaba 1.3 }
100 wakaba 1.1 token = {type: 'char', value: t};
101 wakaba 1.4 p.insertionPoint -= t.length;
102     return '';
103 wakaba 1.1 });
104     if (token) return token;
105 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
106 wakaba 1.4 if (p.insertionPoint < s.length) {
107 wakaba 1.3 token = {type: 'abort'};
108     return s;
109     }
110 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
111 wakaba 1.3 p.insertionPoint -= s.length;
112 wakaba 1.1 return '';
113     });
114     if (token) return token;
115 wakaba 1.5 var m;
116     if ((p.insertionPoint < '</script'.length) &&
117     (m = i.s.match (/^<\/([SCRIPTscript]+)/))) {
118     var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
119     if (v == 'script'.substring (0, p.insertionPoint - '</'.length)) {
120     return {type: 'abort'};
121     }
122     }
123 wakaba 1.4 i.s = i.s.replace (/^</,
124     function (s) {
125     token = {type: 'char', value: s};
126     p.insertionPoint -= s.length;
127     return '';
128     });
129     if (token) return token;
130 wakaba 1.1 return {type: 'eof'};
131     }
132    
133     var token;
134 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
135     if (p.insertionPoint < s.length ||
136     (p.insertionPoint <= s.length &&
137     s.substring (s.length - 1, 1) != '>')) {
138 wakaba 1.3 token = {type: 'abort'};
139     return s;
140     }
141 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
142 wakaba 1.3 p.insertionPoint -= s.length;
143 wakaba 1.1 return '';
144     });
145     if (token) return token;
146 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
147     if (p.insertionPoint < s.length ||
148     (p.insertionPoint <= s.length &&
149     s.substring (s.length - 1, 1) != '>')) {
150 wakaba 1.3 token = {type: 'abort'};
151     return s;
152     }
153 wakaba 1.4 var tagName;
154     var attrs = {};
155     e = e.replace (/^[\S]+/, function (v) {
156     tagName = v.toLowerCase ();
157     return '';
158     });
159 wakaba 1.9 while (true) {
160     var m = false;
161     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
162     function (x, attrName, attrValue1, attrValue2, attrValue3) {
163     v = attrValue1 || attrValue2 || attrValue3;
164     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
165     .replace (/&amp;/g, '&');
166     attrs[attrName.toLowerCase ()] = v;
167     m = true;
168     return '';
169     });
170     if (!m) break;
171     }
172 wakaba 1.6 if (e.length) {
173     log ('Broken start tag: "' + e + '"');
174     }
175 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
176 wakaba 1.3 p.insertionPoint -= s.length;
177 wakaba 1.1 return '';
178     });
179     if (token) return token;
180 wakaba 1.3 if (p.insertionPoint <= 0) {
181     return {type: 'abort'};
182     }
183 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
184 wakaba 1.3 if (p.insertionPoint < s.length) {
185     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
186     var ip = p.insertionPoint;
187     p.insertionPoint = 0;
188     return s.substring (ip, s.length);
189     }
190 wakaba 1.1 token = {type: 'char', value: s};
191 wakaba 1.3 p.insertionPoint -= s.length;
192 wakaba 1.1 return '';
193     });
194     if (token) return token;
195     i.s = i.s.replace (/^[\s\S]/, function (s) {
196     token = {type: 'char', value: s};
197 wakaba 1.3 p.insertionPoint -= s.length;
198 wakaba 1.1 return '';
199     });
200     if (token) return token;
201     return {type: 'eof'};
202     } // getNextToken
203    
204 wakaba 1.2 Parser.prototype.parse = function () {
205 wakaba 1.6 logIndentLevel++;
206     log ('parse: start');
207 wakaba 1.1
208     while (true) {
209 wakaba 1.2 var token = this.getNextToken ();
210 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
211    
212     if (token.type == 'start-tag') {
213     if (token.value == 'script') {
214 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
215     var el = new JSElement (this.doc, token.value);
216 wakaba 1.4 if (token.attrs.async != null) el.async = true;
217     if (token.attrs.defer != null) el.defer = true;
218     if (token.attrs.src != null) el.src = token.attrs.src;
219 wakaba 1.2
220     // 2. Mark the element as being "parser-inserted".
221     el.manakaiParserInserted = true;
222    
223     // 3. Switch the tokeniser's content model flag to the CDATA state.
224 wakaba 1.1 this.parseMode = 'script';
225    
226 wakaba 1.2 // 4.1. Collect all the character tokens.
227 wakaba 1.1 while (true) {
228 wakaba 1.2 var token = this.getNextToken ();
229 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
230    
231     if (token.type == 'char') {
232 wakaba 1.2 // 5. Append a single Text node to the script element node.
233 wakaba 1.1 el.manakaiAppendText (token.value);
234 wakaba 1.2
235     // 4.2. Until it returns a token that is not a character token, or
236 wakaba 1.3 // until it stops tokenising.
237 wakaba 1.1 } else if (token.type == 'eof' ||
238 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
239     token.type == 'abort') {
240 wakaba 1.2 // 6. Switched back to the PCDATA state.
241 wakaba 1.1 this.parseMode = 'pcdata';
242 wakaba 1.2
243     // 7.1. If the next token is not an end tag token with ...
244     if (token.type != 'end-tag') {
245     // 7.2. This is a parse error.
246     log ('Parse error: no </' + 'script>');
247    
248     // 7.3. Mark the script element as "already executed".
249     el.manakaiAlreadyExecuted = true;
250     } else {
251     // 7.4. Ignore it.
252     //
253     }
254 wakaba 1.1 break;
255     }
256     }
257    
258 wakaba 1.2 // 8.1. If the parser were originally created for the ...
259     if (this.fragmentParsingMode) {
260     // 8.2. Mark the script element as "already executed" and ...
261     el.alreadyExecuted = true;
262     continue;
263     }
264    
265     // 9.1. Let the old insertion point have the same value as the ...
266 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
267 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
268 wakaba 1.3 this.setInsertionPoint (0);
269 wakaba 1.2
270     // 10. Append the new element to the current node.
271 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
272 wakaba 1.2
273     // 11. Let the insertion point have the value of the old ...
274 wakaba 1.7
275 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
276 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
277 wakaba 1.2
278     // 12. If there is a script that will execute as soon as ...
279 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
280     // 12.1. If the tree construction stage is being called reentrantly
281     if (this.reentrant) {
282     log ('parse: abort (reentrance)');
283     logIndentLevel--;
284     return;
285    
286     // 12.2. Otherwise
287     } else {
288     // 1.
289     var script = this.scriptExecutedWhenParserResumes;
290     this.scriptExecutedWhenParserResumes = null;
291    
292     // 2. Pause until the script has completed loading.
293     //
294    
295     // 3. Let the insertion point to just before the next input char.
296     this.setInsertionPoint (0);
297    
298     // 4. Execute the script.
299     executeScript (this.doc, script);
300    
301     // 5. Let the insertion point be undefined again.
302     this.setInsertionPoint (undefined);
303 wakaba 1.2
304 wakaba 1.6 // 6. If there is once again a script that will execute ...
305     //
306     }
307     }
308 wakaba 1.1 } else {
309 wakaba 1.2 var el = new JSElement (this.doc, token.value);
310 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
311     this.openElements.push (el);
312     }
313     } else if (token.type == 'end-tag') {
314     if (this.openElements[this.openElements.length - 1].localName ==
315     token.value) {
316     this.openElements.pop ();
317     } else {
318     log ('parse error: unmatched end tag: ' + token.value);
319     }
320 wakaba 1.3 } else if (token.type == 'char') {
321     this.openElements[this.openElements.length - 1].manakaiAppendText
322     (token.value);
323 wakaba 1.1 } else if (token.type == 'eof') {
324     break;
325 wakaba 1.3 } else if (token.type == 'abort') {
326     log ('parse: abort');
327 wakaba 1.6 logIndentLevel--;
328 wakaba 1.3 return;
329 wakaba 1.1 }
330     }
331    
332     log ('stop parsing');
333 wakaba 1.4
334     // readyState = 'interactive'
335    
336     // "When a script completes loading" rules start applying.
337    
338 wakaba 1.10 // List of scripts that will execute as soon as possible
339     for (var i = 0; i < this.scriptsExecutedSoon.length; i++) {
340     var e = this.scriptsExecutedSoon[i];
341    
342     // If it has completed loading
343     log ('Execute an external script not inserted by parser...');
344     executeScript (this.doc, e);
345    
346     // NOTE: It MAY be executed before the end of the parsing, according
347     // to the spec.
348     this.hasAsyncScript = true;
349     }
350    
351     // TODO: Handles
352     // "list of scripts that will execute asynchronously"
353 wakaba 1.4
354     // Handle "list of scripts that will execute when the document has finished
355     // parsing".
356     var list = this.scriptsExecutedAfterParsing;
357     while (list.length > 0) {
358     // TODO: break unless completed loading
359    
360     // Step 1.
361     //
362    
363     // Step 2. and Step 3.
364     log ('Executing a |defer|red script...');
365     executeScript (this.doc, list.shift ());
366    
367     // Step 4.
368     }
369    
370     log ('DOMContentLoaded event fired');
371    
372     // "delays tha load event" things has completed:
373     // readyState = 'complete'
374     log ('load event fired');
375 wakaba 1.6
376     logIndentLevel--;
377 wakaba 1.1 } // parse
378    
379 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
380     if (ip == undefined || ip == null || isNaN (ip)) {
381     log ('insertion point: set to undefined');
382     this.insertionPoint = undefined;
383 wakaba 1.8 } else if (ip == this.input.s.length) {
384 wakaba 1.4 log ('insertion point: end of file');
385     this.insertionPoint = ip;
386 wakaba 1.3 } else {
387     log ('insertion point: set to ' + ip +
388 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
389 wakaba 1.3 this.insertionPoint = ip;
390     }
391     }; // setInsertionPoint
392    
393 wakaba 1.2 function JSDocument (p) {
394 wakaba 1.1 this.childNodes = [];
395 wakaba 1.2 this._parser = p;
396 wakaba 1.1 } // JSDocument
397    
398 wakaba 1.2 function JSElement (doc, localName) {
399 wakaba 1.1 this.localName = localName;
400 wakaba 1.2 this.ownerDocument = doc;
401 wakaba 1.1 this.childNodes = [];
402     } // JSElement
403    
404     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
405     function (e) {
406     this.childNodes.push (e);
407     e.parentNode = this;
408 wakaba 1.2
409     if (e.localName == 'script') {
410 wakaba 1.6 logIndentLevel++;
411 wakaba 1.4 log ('Running a script: start');
412 wakaba 1.2
413 wakaba 1.3 var doc = this.ownerDocument || this;
414 wakaba 1.2 var p = doc._parser;
415    
416     // 1. Script type
417     //
418    
419     // 2.1. If scripting is disabled
420     //
421     // 2.2. If the script element was created by an XML ... innerHTML ...
422     //
423     // 2.3. If the user agent does not support the scripting language ...
424     //
425     // 2.4. If the script element has its "already executed" flag set
426     if (e.manakaiAlreadyExecuted) {
427     // 2.5. Abort these steps at this point.
428 wakaba 1.4 log ('Running a script: aborted');
429 wakaba 1.6 logIndentLevel--;
430 wakaba 1.2 return e;
431     }
432    
433     // 3. Set the element's "already executed" flag.
434     e.manakaiAlreadyExecuted = true;
435    
436     // 4. If the element has a src attribute, then a load for ...
437     // TODO: load an external resource
438    
439     // 5. The first of the following options:
440    
441     // 5.1.
442     if (/* TODO: If the document is still being parsed && */
443     e.defer && !e.async) {
444 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
445     log ('Running a script: aborted (defer)');
446 wakaba 1.2 } else if (e.async && e.src != null) {
447     // TODO
448     } else if (e.async && e.src == null
449     /* && list of scripts that will execute asynchronously is not empty */) {
450     // TODO
451     } else if (e.src != null && e.manakaiParserInserted) {
452 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
453     log ('Error: There is a script that will execute as soon as the parser resumes.');
454     }
455     p.scriptExecutedWhenParserResumes = e;
456 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
457     } else if (e.src != null) {
458     p.scriptsExecutedSoon.push (e);
459 wakaba 1.6 log ('Running a script: aborted (src)');
460 wakaba 1.2 } else {
461     executeScript (doc, e); // even if other scripts are already executing.
462     }
463    
464 wakaba 1.4 log ('Running a script: end');
465 wakaba 1.6 logIndentLevel--;
466 wakaba 1.2 }
467    
468 wakaba 1.1 return e;
469     }; // appendChild
470    
471 wakaba 1.2 function executeScript (doc, e) {
472     log ('executing a script block: start');
473    
474 wakaba 1.6 var s;
475     if (e.src != null) {
476     s = getExternalScript (e.src);
477    
478     // If the load resulted in an error, then ... firing an error event ...
479     if (s == null) {
480     log ('error event fired at the script element');
481     return;
482     }
483    
484     log ('External script loaded: "' + s + '"');
485     } else {
486     s = e.text;
487     }
488 wakaba 1.2
489     // If the load was successful
490     log ('load event fired at the script element');
491    
492     if (true) {
493     // Scripting is enabled, Document.designMode is disabled,
494     // Document is the active document in its browsing context
495    
496     parseAndRunScript (doc, s);
497     }
498    
499     log ('executing a script block: end');
500     } // executeScript
501    
502 wakaba 1.6 function getExternalScript (uri) {
503     if (uri.match (/^javascript:/i)) {
504     var m;
505     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
506     if (m[1]) {
507 wakaba 1.11 return unescapeJSLiteral (m[1]);
508 wakaba 1.6 } else if (m[2]) {
509 wakaba 1.11 return unescapeJSLiteral (m[2]);
510 wakaba 1.6 } else {
511     return null;
512     }
513     } else {
514     log ('Complex javascript: URI is not supported: <' + uri + '>');
515     return null;
516     }
517     } else {
518     log ('URI scheme not supported: <' + uri + '>');
519     return null;
520     }
521     } // getExternalScript
522    
523 wakaba 1.2 function parseAndRunScript (doc, s) {
524     while (true) {
525     var matched = false;
526     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
527     matched = true;
528     var args = [];
529     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
530 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
531 wakaba 1.2 return '';
532     });
533     doc.write.apply (doc, args);
534     return '';
535     });
536 wakaba 1.10 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
537     function (s, t, u) {
538     matched = true;
539 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
540 wakaba 1.10 doc._insertExternalScript.apply (doc, args);
541     return '';
542     });
543 wakaba 1.2 if (s == '') break;
544     if (!matched) {
545     log ('Script parse error: "' + s + '"');
546     break;
547     }
548     }
549     } // parseAndRunScript
550    
551 wakaba 1.11 function unescapeJSLiteral (s) {
552     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
553     return String.fromCharCode (parseInt ('0x' + v));
554     });
555     } // unescapeJSLiteral
556    
557 wakaba 1.1 function JSText (data) {
558     this.data = data;
559     } // JSText
560    
561     JSDocument.prototype.manakaiAppendText =
562     JSElement.prototype.manakaiAppendText =
563     function (s) {
564     if (this.childNodes.length > 0 &&
565     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
566     this.childNodes[this.childNodes.length - 1].data += s;
567     } else {
568     this.childNodes.push (new JSText (s));
569     }
570     }; // manakaiAppendText
571 wakaba 1.2
572 wakaba 1.4 JSDocument.prototype.open = function () {
573     // Two or fewer arguments
574    
575     // Step 1.
576     var type = arguments[0] || 'text/html';
577    
578     // Step 2.
579     var replace = arguments[1] == 'replace';
580    
581     // Step 3.
582     if (this._parser &&
583     !this._parser.scriptCreated &&
584 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
585 wakaba 1.4 log ('document.open () in parsing mode is ignored');
586     return this;
587     }
588    
589     // Step 4.
590     log ('onbeforeunload event fired');
591     log ('onunload event fired');
592    
593     // Step 5.
594     if (this._parser) {
595     // Discard the parser.
596     }
597    
598     // Step 6.
599     log ('document cleared by document.open ()');
600     this.childNodes = [];
601    
602     // Step 7.
603     this._parser = new Parser (new InputStream (''), this);
604     this._parser.scriptCreated = true;
605    
606     // Step 8.
607     this.manakaiIsHTML = true;
608    
609     // Step 9.
610     // If not text/html, ...
611    
612     // Step 10.
613     if (!replace) {
614     // History
615     }
616    
617     // Step 11.
618 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
619 wakaba 1.4
620     // Step 12.
621     return this;
622     }; // document.open
623    
624 wakaba 1.2 JSDocument.prototype.write = function () {
625 wakaba 1.6 logIndentLevel++;
626    
627 wakaba 1.3 var p = this._parser;
628    
629 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
630 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
631     this.open ();
632     p = this._parser;
633 wakaba 1.3 }
634 wakaba 1.2
635     // 2. ... inserted into the input stream just before the insertion point.
636 wakaba 1.3 var s = Array.join (arguments, '');
637     log ('document.write: insert "' + s + '"' +
638 wakaba 1.8 ' before "' +
639     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
640     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
641     + p.input.s.substring (p.insertionPoint, p.input.s.length);
642 wakaba 1.3 p.insertionPoint += s.length;
643 wakaba 1.2
644     // 3. If there is a script that will execute as soon as the parser resumes
645 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
646     log ('document.write: processed later (there is an unprocessed <script src>)');
647     logIndentLevel--;
648     return;
649     }
650 wakaba 1.2
651     // 4. Process the characters that were inserted, ...
652 wakaba 1.6 var originalReentrant = p.reentrant;
653     p.reentrant = true;
654 wakaba 1.3 p.parse ();
655 wakaba 1.6 p.reentrant = originalReentrant;
656     // TODO: "Abort the processing of any nested invokations of the tokeniser,
657     // yielding control back to the caller." (<script> parsing). Do we need
658     // to do something here?
659 wakaba 1.2
660     // 5. Return
661     log ('document.write: return');
662 wakaba 1.6
663     logIndentLevel--;
664 wakaba 1.2 return;
665     }; // document.write
666    
667 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
668     var s = new JSElement (this, 'script');
669     s.src = uri;
670     this.documentElement.appendChild (s);
671     }; // _insertExternalScript
672    
673     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
674     var cn = this.childNodes;
675     for (var i = 0; i < cn.length; i++) {
676     if (cn[i] instanceof JSElement) {
677     return cn[i]
678     }
679     }
680     return null;
681     });
682    
683 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
684     var r = '';
685     for (var i = 0; i < this.childNodes.length; i++) {
686     if (this.childNodes[i] instanceof JSText) {
687     r += this.childNodes[i].data;
688     }
689     }
690     return r;
691     });
692 wakaba 1.1
693     function dumpTree (n, indent) {
694     var r = '';
695     for (var i = 0; i < n.childNodes.length; i++) {
696     var node = n.childNodes[i];
697     if (node instanceof JSElement) {
698     r += '| ' + indent + node.localName + '\n';
699 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
700     if (node.defer) r += '| ' + indent + ' defer=""\n';
701 wakaba 1.9 if (node.src != null) {
702     r += '| ' + indent + ' src="' + node.src + '"\n';
703     }
704 wakaba 1.1 r += dumpTree (node, indent + ' ');
705     } else if (node instanceof JSText) {
706     r += '| ' + indent + '"' + node.data + '"\n';
707     } else {
708     r += '| ' + indent + node + '\n';
709     }
710     }
711     return r;
712     } // dumpTree
713     </script>
714     </head>
715     <body onload="
716     document.sourceElement = document.getElementsByTagName ('textarea')[0];
717 wakaba 1.8
718     var q = location.search;
719     if (q != null) {
720     q = q.substring (1).split (/;/);
721     for (var i = 0; i < q.length; i++) {
722     var v = q[i].split (/=/, 2);
723     v[0] = decodeURIComponent (v[0]);
724     v[1] = decodeURIComponent (v[1] || '');
725     if (v[0] == 's') {
726     document.sourceElement.value = v[1];
727     }
728     }
729     }
730    
731 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
732     update ();
733     ">
734 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
735     Parser</h1>
736 wakaba 1.1
737 wakaba 1.7 <h2>Markup to test
738 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
739     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
740     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
741     Viewer</a>)</h2>
742 wakaba 1.7 <p>
743     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
744 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
745     &lt;p>
746     &lt;script>
747 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
748 wakaba 1.1 &lt;/script>
749     &lt;p>
750     </textarea>
751    
752 wakaba 1.10 <h2 id=log>Log</h2>
753 wakaba 1.7 <p><output></output>
754    
755 wakaba 1.10 <h2 id=notes>Notes</h2>
756 wakaba 1.8
757     <p>This is a <em>simplified</em> implementation of
758     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
759     Parsing Algorithm</a>. It only implements script-related part of the
760     algorithm. Especially, this parser:
761     <ul>
762     <li>Does not support <code>DOCTYPE</code> and comment tokens.
763     <li>Does not support entities except for <code>&amp;quot;</code>,
764     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
765     <code>src</code> attribute value.
766     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
767     algorithm, and so on.
768     <li>Does not raise parse errors for invalid attribute specifications in start
769     or end tags.
770     <li>Does not support CDATA/PCDATA element other than <code>script</code>.
771     <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
772     in <code>script</code> element.
773     <li>Does not support foreign (SVG or MathML) elements.
774     <li>Only supports <code>script</code> <code>type</code>
775     <code>text/javascript</code>. <code>type</code> and <code>language</code>
776     attributes are ignored.
777 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
778     of statements looking similar to the following statements, possibly
779     introduced, followed, or separated by white space characters:
780     <ul>
781     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
782     <li><code>var s = document.createElement ("script");
783     s.src = "<var>string</var>";
784     document.documentElement.appendChild (s);</code>
785     </ul>
786     Note that strings may be delimited by <code>'</code>s instead of
787     <code>"</code>s.
788 wakaba 1.8 <li>Only supports <code>javascript:</code>
789     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
790     <code>src</code> attribute of the <code>script</code> element. In addition,
791     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
792     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
793 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
794     string literals.
795 wakaba 1.8 </ul>
796 wakaba 1.7
797 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
798     not support JavaScript 1.5.
799 wakaba 1.10
800     <!-- TODO: license -->
801 wakaba 1.1
802     </body>
803     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24