/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (hide annotations) (download) (as text)
Tue Apr 29 04:07:18 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.15: +33 -3 lines
File MIME type: text/html
Allow CDATA end tag on or after an insertion point since the spec can be interpreted such that this is allowed

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.13 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6     <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7     title="GNU GPL2 or later">
8 wakaba 1.1 <style>
9 wakaba 1.13 h1 {
10     margin: 0;
11     font-size: 150%;
12     }
13     h2 {
14 wakaba 1.7 margin: 0;
15     font-size: 100%;
16     }
17 wakaba 1.13 p {
18     margin: 0 1em;
19 wakaba 1.7 }
20 wakaba 1.1 textarea {
21 wakaba 1.7 width: 100%;
22     -width: 99%;
23     height: 10em;
24 wakaba 1.1 }
25     output {
26     display: block;
27     font-family: monospace;
28 wakaba 1.4 white-space: -moz-pre-wrap;
29     white-space: pre-wrap;
30 wakaba 1.1 }
31     </style>
32     <script>
33 wakaba 1.7 var delayedUpdater = 0;
34    
35 wakaba 1.1 function update () {
36 wakaba 1.7 if (delayedUpdater) {
37     clearTimeout (delayedUpdater);
38     delayedUpdater = 0;
39     }
40     delayedUpdater = setTimeout (update2, 100);
41     } // update
42    
43     function update2 () {
44     var v = document.sourceElement.value;
45 wakaba 1.8 if (v != document.previousSourceText) {
46     document.previousSourceText = v;
47     document.links['permalink'].href
48     = location.pathname + '?s=' + encodeURIComponent (v);
49     document.links['ldvlink'].href
50     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51     + encodeURIComponent (v);
52    
53     document.logElement.textContent = '';
54     var p = new Parser (new InputStream (v));
55     var doc = p.doc;
56     p.parse ();
57 wakaba 1.10
58 wakaba 1.8 log (dumpTree (doc, ''));
59 wakaba 1.10
60     if (p.hasAsyncScript) {
61     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62     }
63 wakaba 1.8 }
64 wakaba 1.7 } // update2
65 wakaba 1.1
66 wakaba 1.6 var logIndentLevel = 0;
67 wakaba 1.1 function log (s) {
68 wakaba 1.15 var indent = '';
69 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
70 wakaba 1.15 indent += ' ';
71 wakaba 1.6 }
72 wakaba 1.15 s = indent + s.replace (/\n/g, "\n" + indent);
73 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
74     } // log
75    
76     function InputStream (s) {
77     this.s = s;
78     } // InputStream
79    
80 wakaba 1.4 function Parser (i, doc) {
81 wakaba 1.1 this.parseMode = 'pcdata';
82 wakaba 1.4 if (!doc) {
83     doc = new JSDocument (this);
84     doc.manakaiIsHTML = true;
85     }
86 wakaba 1.16 this.nextToken = [];
87 wakaba 1.4 this.doc = doc;
88     this.openElements = [doc];
89 wakaba 1.8 this.input = i;
90 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
91 wakaba 1.10 this.scriptsExecutedSoon = [];
92 wakaba 1.12 this.scriptsExecutedAsynchronously = [];
93 wakaba 1.1 } // Parser
94    
95 wakaba 1.2 Parser.prototype.getNextToken = function () {
96 wakaba 1.16 if (this.nextToken.length) {
97     return this.nextToken.shift ();
98     }
99    
100 wakaba 1.3 var p = this;
101 wakaba 1.8 var i = this.input;
102 wakaba 1.14 if (this.parseMode == 'cdata') {
103     var tagName = this.endTagName;
104 wakaba 1.1 var token;
105 wakaba 1.3 if (p.insertionPoint <= 0) {
106     return {type: 'abort'};
107     }
108 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
109 wakaba 1.1 function (s, t) {
110 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112     var ip = p.insertionPoint;
113     p.insertionPoint = 0;
114 wakaba 1.4 return t.substring (ip, t.length);
115 wakaba 1.3 }
116 wakaba 1.1 token = {type: 'char', value: t};
117 wakaba 1.4 p.insertionPoint -= t.length;
118     return '';
119 wakaba 1.1 });
120     if (token) return token;
121 wakaba 1.14 var pattern = new RegExp ('^</' + tagName + '>', 'i');
122     i.s = i.s.replace (pattern, function (s) {
123 wakaba 1.4 if (p.insertionPoint < s.length) {
124 wakaba 1.3 token = {type: 'abort'};
125     return s;
126     }
127 wakaba 1.14 token = {type: 'end-tag', value: tagName};
128 wakaba 1.3 p.insertionPoint -= s.length;
129 wakaba 1.1 return '';
130     });
131     if (token) return token;
132 wakaba 1.5 var m;
133 wakaba 1.14 if ((p.insertionPoint < ('</' + tagName).length) &&
134     (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135 wakaba 1.5 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136 wakaba 1.14 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137 wakaba 1.5 return {type: 'abort'};
138     }
139     }
140 wakaba 1.4 i.s = i.s.replace (/^</,
141     function (s) {
142     token = {type: 'char', value: s};
143     p.insertionPoint -= s.length;
144     return '';
145     });
146     if (token) return token;
147 wakaba 1.1 return {type: 'eof'};
148     }
149    
150     var token;
151 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152     if (p.insertionPoint < s.length ||
153     (p.insertionPoint <= s.length &&
154 wakaba 1.16 s.substring (s.length - 1, s.length) != '>')) {
155 wakaba 1.3 token = {type: 'abort'};
156     return s;
157     }
158 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
159 wakaba 1.3 p.insertionPoint -= s.length;
160 wakaba 1.1 return '';
161     });
162     if (token) return token;
163 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164     if (p.insertionPoint < s.length ||
165     (p.insertionPoint <= s.length &&
166 wakaba 1.16 s.substring (s.length - 1, s.length) != '>')) {
167 wakaba 1.3 token = {type: 'abort'};
168     return s;
169     }
170 wakaba 1.4 var tagName;
171     var attrs = {};
172     e = e.replace (/^[\S]+/, function (v) {
173     tagName = v.toLowerCase ();
174     return '';
175     });
176 wakaba 1.9 while (true) {
177     var m = false;
178     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179     function (x, attrName, attrValue1, attrValue2, attrValue3) {
180     v = attrValue1 || attrValue2 || attrValue3;
181     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182     .replace (/&amp;/g, '&');
183     attrs[attrName.toLowerCase ()] = v;
184     m = true;
185     return '';
186     });
187     if (!m) break;
188     }
189 wakaba 1.6 if (e.length) {
190     log ('Broken start tag: "' + e + '"');
191     }
192 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
193 wakaba 1.3 p.insertionPoint -= s.length;
194 wakaba 1.1 return '';
195     });
196     if (token) return token;
197 wakaba 1.3 if (p.insertionPoint <= 0) {
198     return {type: 'abort'};
199     }
200 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
201 wakaba 1.3 if (p.insertionPoint < s.length) {
202     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203     var ip = p.insertionPoint;
204     p.insertionPoint = 0;
205     return s.substring (ip, s.length);
206     }
207 wakaba 1.1 token = {type: 'char', value: s};
208 wakaba 1.3 p.insertionPoint -= s.length;
209 wakaba 1.1 return '';
210     });
211     if (token) return token;
212     i.s = i.s.replace (/^[\s\S]/, function (s) {
213     token = {type: 'char', value: s};
214 wakaba 1.3 p.insertionPoint -= s.length;
215 wakaba 1.1 return '';
216     });
217     if (token) return token;
218     return {type: 'eof'};
219     } // getNextToken
220    
221 wakaba 1.2 Parser.prototype.parse = function () {
222 wakaba 1.6 logIndentLevel++;
223     log ('parse: start');
224 wakaba 1.1
225     while (true) {
226 wakaba 1.2 var token = this.getNextToken ();
227 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
228    
229 wakaba 1.16 if (this.cdataEndTagRequired) {
230     // Generic CDATA parsing algorithm
231    
232     if (token.type != 'abort') {
233     // 7.
234     if (token.type == 'end-tag' && token.value == this.endTagName) {
235     // 7.1. Ignores it.
236     //
237     } else {
238     // 7.2. Parse error.
239     log ('Parse error: no </' + this.endTagName + '>');
240     this.nextToken.unshift (token);
241     }
242     this.cdataEndTagRequired = false;
243     continue;
244     }
245     }
246    
247 wakaba 1.1 if (token.type == 'start-tag') {
248     if (token.value == 'script') {
249 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
250     var el = new JSElement (this.doc, token.value);
251 wakaba 1.4 if (token.attrs.async != null) el.async = true;
252     if (token.attrs.defer != null) el.defer = true;
253     if (token.attrs.src != null) el.src = token.attrs.src;
254 wakaba 1.2
255     // 2. Mark the element as being "parser-inserted".
256     el.manakaiParserInserted = true;
257    
258     // 3. Switch the tokeniser's content model flag to the CDATA state.
259 wakaba 1.14 this.parseMode = 'cdata';
260     this.endTagName = 'script';
261 wakaba 1.1
262 wakaba 1.2 // 4.1. Collect all the character tokens.
263 wakaba 1.1 while (true) {
264 wakaba 1.2 var token = this.getNextToken ();
265 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
266    
267     if (token.type == 'char') {
268 wakaba 1.2 // 5. Append a single Text node to the script element node.
269 wakaba 1.1 el.manakaiAppendText (token.value);
270 wakaba 1.2
271     // 4.2. Until it returns a token that is not a character token, or
272 wakaba 1.3 // until it stops tokenising.
273 wakaba 1.1 } else if (token.type == 'eof' ||
274 wakaba 1.14 token.type == 'end-tag' ||
275 wakaba 1.3 token.type == 'abort') {
276 wakaba 1.2 // 6. Switched back to the PCDATA state.
277 wakaba 1.1 this.parseMode = 'pcdata';
278 wakaba 1.2
279     // 7.1. If the next token is not an end tag token with ...
280 wakaba 1.14 if (!(token.type == 'end-tag' && token.value == 'script')) {
281 wakaba 1.2 // 7.2. This is a parse error.
282     log ('Parse error: no </' + 'script>');
283 wakaba 1.16 this.nextToken.unshift (token);
284 wakaba 1.2
285     // 7.3. Mark the script element as "already executed".
286     el.manakaiAlreadyExecuted = true;
287     } else {
288     // 7.4. Ignore it.
289     //
290     }
291 wakaba 1.1 break;
292     }
293     }
294    
295 wakaba 1.2 // 8.1. If the parser were originally created for the ...
296     if (this.fragmentParsingMode) {
297     // 8.2. Mark the script element as "already executed" and ...
298     el.alreadyExecuted = true;
299     continue;
300     }
301    
302     // 9.1. Let the old insertion point have the same value as the ...
303 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
304 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
305 wakaba 1.3 this.setInsertionPoint (0);
306 wakaba 1.2
307     // 10. Append the new element to the current node.
308 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
309 wakaba 1.2
310     // 11. Let the insertion point have the value of the old ...
311 wakaba 1.7
312 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
313 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
314 wakaba 1.2
315     // 12. If there is a script that will execute as soon as ...
316 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
317     // 12.1. If the tree construction stage is being called reentrantly
318     if (this.reentrant) {
319     log ('parse: abort (reentrance)');
320     logIndentLevel--;
321     return;
322    
323     // 12.2. Otherwise
324     } else {
325     // 1.
326     var script = this.scriptExecutedWhenParserResumes;
327     this.scriptExecutedWhenParserResumes = null;
328    
329     // 2. Pause until the script has completed loading.
330     //
331    
332     // 3. Let the insertion point to just before the next input char.
333     this.setInsertionPoint (0);
334    
335     // 4. Execute the script.
336     executeScript (this.doc, script);
337    
338     // 5. Let the insertion point be undefined again.
339     this.setInsertionPoint (undefined);
340 wakaba 1.2
341 wakaba 1.6 // 6. If there is once again a script that will execute ...
342     //
343     }
344     }
345 wakaba 1.14 } else if (token.value == 'style' ||
346     token.value == 'noscript' ||
347     token.value == 'xmp') {
348     // 1. Create an element for the token in the HTML namespace.
349     var el = new JSElement (this.doc, token.value);
350    
351     // 2. Append the new element to the current node.
352     this.openElements[this.openElements.length - 1].appendChild (el);
353    
354     // 3. Switch the tokeniser's content model flag to the CDATA state.
355     this.parseMode = 'cdata';
356     this.endTagName = token.value;
357    
358     // 4.1. Collect all the character tokens.
359     while (true) {
360     var token = this.getNextToken ();
361     log ('token: ' + token.type + ' "' + token.value + '"');
362    
363     if (token.type == 'char') {
364     // 5. Append a single Text node to the script element node.
365     el.manakaiAppendText (token.value);
366    
367     // 4.2. Until it returns a token that is not a character token, or
368     // until it stops tokenising.
369     } else if (token.type == 'eof' ||
370     token.type == 'end-tag' ||
371     token.type == 'abort') {
372     // 6. Switched back to the PCDATA state.
373     this.parseMode = 'pcdata';
374    
375 wakaba 1.16 if (token.type == 'abort') {
376     this.cdataEndTagRequired = true;
377     break;
378     }
379    
380 wakaba 1.14 // 7.1. If the next token is not an end tag token with ...
381     if (!(token.type == 'end-tag' &&
382     token.value == this.endTagName)) {
383     // 7.2. This is a parse error.
384     log ('Parse error: no </' + this.endTagName + '>');
385 wakaba 1.16 this.nextToken.unshift (token);
386 wakaba 1.14
387     // 7.3. Mark the script element as "already executed".
388     el.manakaiAlreadyExecuted = true;
389     } else {
390     // 7.4. Ignore it.
391     //
392     }
393     break;
394     }
395     }
396 wakaba 1.1 } else {
397 wakaba 1.2 var el = new JSElement (this.doc, token.value);
398 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
399     this.openElements.push (el);
400     }
401     } else if (token.type == 'end-tag') {
402     if (this.openElements[this.openElements.length - 1].localName ==
403     token.value) {
404     this.openElements.pop ();
405     } else {
406     log ('parse error: unmatched end tag: ' + token.value);
407     }
408 wakaba 1.3 } else if (token.type == 'char') {
409     this.openElements[this.openElements.length - 1].manakaiAppendText
410     (token.value);
411 wakaba 1.1 } else if (token.type == 'eof') {
412     break;
413 wakaba 1.3 } else if (token.type == 'abort') {
414     log ('parse: abort');
415 wakaba 1.6 logIndentLevel--;
416 wakaba 1.3 return;
417 wakaba 1.1 }
418     }
419    
420     log ('stop parsing');
421 wakaba 1.4
422     // readyState = 'interactive'
423    
424     // "When a script completes loading" rules start applying.
425    
426 wakaba 1.12 while (this.scriptsExecutedSoon.length > 0 ||
427     this.scriptsExecutedAsynchronously.length > 0) {
428     // Handle "list of scripts that will execute as soon as possible".
429     while (this.scriptsExecutedSoon.length > 0) {
430     var e = this.scriptsExecutedSoon.shift ();
431    
432     // If it has completed loading
433     log ('Execute an external script not inserted by parser...');
434     executeScript (this.doc, e);
435    
436     // NOTE: It MAY be executed before the end of the parsing, according
437     // to the spec.
438     this.hasAsyncScript = true;
439     }
440    
441     // Handle "list of scripts that will execute asynchronously".
442     while (this.scriptsExecutedAsynchronously.length > 0) {
443     var e = this.scriptsExecutedAsynchronously.shift ();
444    
445     // Step 1.
446     // We assume that all scripts have been loaded at this time.
447    
448     // Step 2.
449     log ('Execute an asynchronous script...');
450     executeScript (this.doc, e);
451    
452     // Step 3.
453     //
454    
455     // Step 4.
456     //
457 wakaba 1.10
458 wakaba 1.12 this.hasAsyncScript = true;
459     }
460 wakaba 1.10 }
461    
462 wakaba 1.4 // Handle "list of scripts that will execute when the document has finished
463     // parsing".
464     var list = this.scriptsExecutedAfterParsing;
465     while (list.length > 0) {
466     // TODO: break unless completed loading
467    
468     // Step 1.
469     //
470    
471     // Step 2. and Step 3.
472     log ('Executing a |defer|red script...');
473     executeScript (this.doc, list.shift ());
474    
475     // Step 4.
476     }
477    
478     log ('DOMContentLoaded event fired');
479    
480 wakaba 1.14 // "delays the load event" things has completed:
481 wakaba 1.4 // readyState = 'complete'
482     log ('load event fired');
483 wakaba 1.6
484     logIndentLevel--;
485 wakaba 1.1 } // parse
486    
487 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
488     if (ip == undefined || ip == null || isNaN (ip)) {
489     log ('insertion point: set to undefined');
490     this.insertionPoint = undefined;
491 wakaba 1.8 } else if (ip == this.input.s.length) {
492 wakaba 1.4 log ('insertion point: end of file');
493     this.insertionPoint = ip;
494 wakaba 1.3 } else {
495     log ('insertion point: set to ' + ip +
496 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
497 wakaba 1.3 this.insertionPoint = ip;
498     }
499     }; // setInsertionPoint
500    
501 wakaba 1.2 function JSDocument (p) {
502 wakaba 1.1 this.childNodes = [];
503 wakaba 1.2 this._parser = p;
504 wakaba 1.1 } // JSDocument
505    
506 wakaba 1.2 function JSElement (doc, localName) {
507 wakaba 1.1 this.localName = localName;
508 wakaba 1.2 this.ownerDocument = doc;
509 wakaba 1.1 this.childNodes = [];
510     } // JSElement
511    
512     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
513     function (e) {
514     this.childNodes.push (e);
515     e.parentNode = this;
516 wakaba 1.2
517     if (e.localName == 'script') {
518 wakaba 1.6 logIndentLevel++;
519 wakaba 1.4 log ('Running a script: start');
520 wakaba 1.2
521 wakaba 1.3 var doc = this.ownerDocument || this;
522 wakaba 1.2 var p = doc._parser;
523    
524     // 1. Script type
525     //
526    
527     // 2.1. If scripting is disabled
528     //
529     // 2.2. If the script element was created by an XML ... innerHTML ...
530     //
531     // 2.3. If the user agent does not support the scripting language ...
532     //
533     // 2.4. If the script element has its "already executed" flag set
534     if (e.manakaiAlreadyExecuted) {
535     // 2.5. Abort these steps at this point.
536 wakaba 1.15 log ('Running a script: aborted (already executed)');
537 wakaba 1.6 logIndentLevel--;
538 wakaba 1.2 return e;
539     }
540    
541     // 3. Set the element's "already executed" flag.
542     e.manakaiAlreadyExecuted = true;
543    
544     // 4. If the element has a src attribute, then a load for ...
545     // TODO: load an external resource
546    
547     // 5. The first of the following options:
548    
549     // 5.1.
550     if (/* TODO: If the document is still being parsed && */
551     e.defer && !e.async) {
552 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
553     log ('Running a script: aborted (defer)');
554 wakaba 1.2 } else if (e.async && e.src != null) {
555 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
556     log ('Running a script: aborted (async src)');
557     } else if (e.async && e.src == null &&
558     p.scriptsExecutedAsynchronously.length > 0) {
559     p.scriptsExecutedAsynchronously.push (e);
560     log ('Running a script: aborted (async)');
561     // ISSUE: What is the difference with the case above?
562 wakaba 1.2 } else if (e.src != null && e.manakaiParserInserted) {
563 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
564     log ('Error: There is a script that will execute as soon as the parser resumes.');
565     }
566     p.scriptExecutedWhenParserResumes = e;
567 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
568     } else if (e.src != null) {
569     p.scriptsExecutedSoon.push (e);
570 wakaba 1.6 log ('Running a script: aborted (src)');
571 wakaba 1.2 } else {
572     executeScript (doc, e); // even if other scripts are already executing.
573     }
574    
575 wakaba 1.4 log ('Running a script: end');
576 wakaba 1.6 logIndentLevel--;
577 wakaba 1.2 }
578    
579 wakaba 1.1 return e;
580     }; // appendChild
581    
582 wakaba 1.2 function executeScript (doc, e) {
583     log ('executing a script block: start');
584    
585 wakaba 1.6 var s;
586     if (e.src != null) {
587     s = getExternalScript (e.src);
588    
589     // If the load resulted in an error, then ... firing an error event ...
590     if (s == null) {
591     log ('error event fired at the script element');
592     return;
593     }
594    
595     log ('External script loaded: "' + s + '"');
596     } else {
597     s = e.text;
598     }
599 wakaba 1.2
600     // If the load was successful
601     log ('load event fired at the script element');
602    
603     if (true) {
604     // Scripting is enabled, Document.designMode is disabled,
605     // Document is the active document in its browsing context
606    
607     parseAndRunScript (doc, s);
608     }
609    
610     log ('executing a script block: end');
611     } // executeScript
612    
613 wakaba 1.6 function getExternalScript (uri) {
614     if (uri.match (/^javascript:/i)) {
615     var m;
616     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
617     if (m[1]) {
618 wakaba 1.11 return unescapeJSLiteral (m[1]);
619 wakaba 1.6 } else if (m[2]) {
620 wakaba 1.11 return unescapeJSLiteral (m[2]);
621 wakaba 1.6 } else {
622     return null;
623     }
624     } else {
625     log ('Complex javascript: URI is not supported: <' + uri + '>');
626     return null;
627     }
628     } else {
629     log ('URI scheme not supported: <' + uri + '>');
630     return null;
631     }
632     } // getExternalScript
633    
634 wakaba 1.2 function parseAndRunScript (doc, s) {
635     while (true) {
636     var matched = false;
637     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
638     matched = true;
639     var args = [];
640     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
641 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
642 wakaba 1.2 return '';
643     });
644     doc.write.apply (doc, args);
645     return '';
646     });
647 wakaba 1.15 var noDocumentElement = false;
648     s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
649 wakaba 1.10 function (s, t, u) {
650     matched = true;
651 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
652 wakaba 1.15 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
653     return '';
654     });
655     if (noDocumentElement) {
656     log ('Script error: documentElement is null');
657     break;
658     }
659     s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
660     function (s, t) {
661     matched = true;
662     log (dumpTree (doc, ''));
663 wakaba 1.10 return '';
664     });
665 wakaba 1.2 if (s == '') break;
666     if (!matched) {
667     log ('Script parse error: "' + s + '"');
668     break;
669     }
670     }
671     } // parseAndRunScript
672    
673 wakaba 1.11 function unescapeJSLiteral (s) {
674     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
675     return String.fromCharCode (parseInt ('0x' + v));
676     });
677     } // unescapeJSLiteral
678    
679 wakaba 1.1 function JSText (data) {
680     this.data = data;
681     } // JSText
682    
683     JSDocument.prototype.manakaiAppendText =
684     JSElement.prototype.manakaiAppendText =
685     function (s) {
686     if (this.childNodes.length > 0 &&
687     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
688     this.childNodes[this.childNodes.length - 1].data += s;
689     } else {
690     this.childNodes.push (new JSText (s));
691     }
692     }; // manakaiAppendText
693 wakaba 1.2
694 wakaba 1.4 JSDocument.prototype.open = function () {
695     // Two or fewer arguments
696    
697     // Step 1.
698     var type = arguments[0] || 'text/html';
699    
700     // Step 2.
701     var replace = arguments[1] == 'replace';
702    
703     // Step 3.
704     if (this._parser &&
705     !this._parser.scriptCreated &&
706 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
707 wakaba 1.4 log ('document.open () in parsing mode is ignored');
708     return this;
709     }
710    
711     // Step 4.
712     log ('onbeforeunload event fired');
713     log ('onunload event fired');
714    
715     // Step 5.
716     if (this._parser) {
717     // Discard the parser.
718     }
719    
720     // Step 6.
721     log ('document cleared by document.open ()');
722     this.childNodes = [];
723    
724     // Step 7.
725     this._parser = new Parser (new InputStream (''), this);
726     this._parser.scriptCreated = true;
727    
728     // Step 8.
729     this.manakaiIsHTML = true;
730    
731     // Step 9.
732     // If not text/html, ...
733    
734     // Step 10.
735     if (!replace) {
736     // History
737     }
738    
739     // Step 11.
740 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
741 wakaba 1.4
742     // Step 12.
743     return this;
744     }; // document.open
745    
746 wakaba 1.2 JSDocument.prototype.write = function () {
747 wakaba 1.15 log ('document.write: start');
748 wakaba 1.6 logIndentLevel++;
749    
750 wakaba 1.3 var p = this._parser;
751    
752 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
753 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
754     this.open ();
755     p = this._parser;
756 wakaba 1.3 }
757 wakaba 1.2
758     // 2. ... inserted into the input stream just before the insertion point.
759 wakaba 1.3 var s = Array.join (arguments, '');
760     log ('document.write: insert "' + s + '"' +
761 wakaba 1.8 ' before "' +
762     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
763     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
764     + p.input.s.substring (p.insertionPoint, p.input.s.length);
765 wakaba 1.3 p.insertionPoint += s.length;
766 wakaba 1.2
767     // 3. If there is a script that will execute as soon as the parser resumes
768 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
769     log ('document.write: processed later (there is an unprocessed <script src>)');
770     logIndentLevel--;
771 wakaba 1.15 log ('document.write: return');
772 wakaba 1.6 return;
773     }
774 wakaba 1.2
775     // 4. Process the characters that were inserted, ...
776 wakaba 1.6 var originalReentrant = p.reentrant;
777     p.reentrant = true;
778 wakaba 1.3 p.parse ();
779 wakaba 1.6 p.reentrant = originalReentrant;
780     // TODO: "Abort the processing of any nested invokations of the tokeniser,
781     // yielding control back to the caller." (<script> parsing). Do we need
782     // to do something here?
783 wakaba 1.2
784     // 5. Return
785 wakaba 1.15 logIndentLevel--;
786 wakaba 1.2 log ('document.write: return');
787 wakaba 1.6
788 wakaba 1.2 return;
789     }; // document.write
790    
791 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
792     var s = new JSElement (this, 'script');
793     s.src = uri;
794 wakaba 1.15 if (this.documentElement) {
795     this.documentElement.appendChild (s);
796     return true;
797     } else {
798     return false;
799     }
800 wakaba 1.10 }; // _insertExternalScript
801    
802     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
803     var cn = this.childNodes;
804     for (var i = 0; i < cn.length; i++) {
805     if (cn[i] instanceof JSElement) {
806     return cn[i]
807     }
808     }
809     return null;
810     });
811    
812 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
813     var r = '';
814     for (var i = 0; i < this.childNodes.length; i++) {
815     if (this.childNodes[i] instanceof JSText) {
816     r += this.childNodes[i].data;
817     }
818     }
819     return r;
820     });
821 wakaba 1.1
822     function dumpTree (n, indent) {
823     var r = '';
824     for (var i = 0; i < n.childNodes.length; i++) {
825     var node = n.childNodes[i];
826     if (node instanceof JSElement) {
827     r += '| ' + indent + node.localName + '\n';
828 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
829     if (node.defer) r += '| ' + indent + ' defer=""\n';
830 wakaba 1.9 if (node.src != null) {
831     r += '| ' + indent + ' src="' + node.src + '"\n';
832     }
833 wakaba 1.1 r += dumpTree (node, indent + ' ');
834     } else if (node instanceof JSText) {
835     r += '| ' + indent + '"' + node.data + '"\n';
836     } else {
837     r += '| ' + indent + node + '\n';
838     }
839     }
840     return r;
841     } // dumpTree
842     </script>
843     </head>
844     <body onload="
845     document.sourceElement = document.getElementsByTagName ('textarea')[0];
846 wakaba 1.8
847     var q = location.search;
848     if (q != null) {
849     q = q.substring (1).split (/;/);
850     for (var i = 0; i < q.length; i++) {
851     var v = q[i].split (/=/, 2);
852     v[0] = decodeURIComponent (v[0]);
853     v[1] = decodeURIComponent (v[1] || '');
854     if (v[0] == 's') {
855     document.sourceElement.value = v[1];
856     }
857     }
858     }
859    
860 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
861     update ();
862     ">
863 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
864     Parser</h1>
865 wakaba 1.1
866 wakaba 1.7 <h2>Markup to test
867 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
868     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
869     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
870     Viewer</a>)</h2>
871 wakaba 1.7 <p>
872     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
873 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
874     &lt;p>
875     &lt;script>
876 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
877 wakaba 1.1 &lt;/script>
878     &lt;p>
879     </textarea>
880    
881 wakaba 1.10 <h2 id=log>Log</h2>
882 wakaba 1.7 <p><output></output>
883    
884 wakaba 1.10 <h2 id=notes>Notes</h2>
885 wakaba 1.8
886     <p>This is a <em>simplified</em> implementation of
887     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
888     Parsing Algorithm</a>. It only implements script-related part of the
889     algorithm. Especially, this parser:
890     <ul>
891     <li>Does not support <code>DOCTYPE</code> and comment tokens.
892     <li>Does not support entities except for <code>&amp;quot;</code>,
893     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
894     <code>src</code> attribute value.
895     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
896     algorithm, and so on.
897     <li>Does not raise parse errors for invalid attribute specifications in start
898     or end tags.
899 wakaba 1.14 <li>Does not support PCDATA elements (<code>title</code> and
900     <code>textarea</code>).
901     <li>Does not strip the first newline in <code>pre</code> elements.
902 wakaba 1.8 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
903     in <code>script</code> element.
904     <li>Does not support foreign (SVG or MathML) elements.
905     <li>Only supports <code>script</code> <code>type</code>
906     <code>text/javascript</code>. <code>type</code> and <code>language</code>
907     attributes are ignored.
908 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
909     of statements looking similar to the following statements, possibly
910     introduced, followed, or separated by white space characters:
911     <ul>
912     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
913     <li><code>var s = document.createElement ("script");
914     s.src = "<var>string</var>";
915     document.documentElement.appendChild (s);</code>
916 wakaba 1.15 <li><code>w (document.documentElement.innerHTML);</code> (This statement
917     can be used to dump the document, even when the document has no
918     document element. The output format is the tree dump format used
919     in html5lib test data, not <abbr>HTML</abbr>.)
920 wakaba 1.10 </ul>
921     Note that strings may be delimited by <code>'</code>s instead of
922     <code>"</code>s.
923 wakaba 1.8 <li>Only supports <code>javascript:</code>
924     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
925     <code>src</code> attribute of the <code>script</code> element. In addition,
926     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
927     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
928 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
929     string literals.
930 wakaba 1.12 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
931     replaced by <code>document.open ()</code> call. In other word, delayed
932     (deferred or asynchronous) script executions and event firings might be
933     treated in a wrong way if a <code>document.open ()</code> invocation
934     is implicitly done by <code>document.write ()</code> in a delayed script.
935 wakaba 1.8 </ul>
936 wakaba 1.7
937 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
938     not support JavaScript 1.5.
939 wakaba 1.12
940     <!-- TODO: |src| attribute value should refer the value at the time
941     when it is inserted into the document, not the value when the script is
942     executed. Currently it does not matter, since we don't allow dynamic
943     modification to the |src| content/DOM attribute value yet. -->
944 wakaba 1.10
945 wakaba 1.13 </body>
946     </html>
947 wakaba 1.16 <!-- $Date: 2008/04/29 03:29:41 $ -->
948 wakaba 1.13 <!--
949    
950     Copyright 2008 Wakaba <w@suika.fam.cx>
951    
952     This program is free software; you can redistribute it and/or
953     modify it under the terms of the GNU General Public License
954     as published by the Free Software Foundation; either version 2
955     of the License, or (at your option) any later version.
956    
957     This program is distributed in the hope that it will be useful,
958     but WITHOUT ANY WARRANTY; without even the implied warranty of
959     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
960     GNU General Public License for more details.
961    
962     You should have received a copy of the GNU General Public License
963     along with this program; if not, write to the Free Software
964     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
965 wakaba 1.1
966 wakaba 1.13 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24