/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (hide annotations) (download) (as text)
Sun Aug 31 09:46:14 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.17: +10 -10 lines
File MIME type: text/html
'the script that will execute as soon as the parser resumes' -> 'the pending external script' (HTML5 revision 1830)

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.13 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6     <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7     title="GNU GPL2 or later">
8 wakaba 1.1 <style>
9 wakaba 1.13 h1 {
10     margin: 0;
11     font-size: 150%;
12     }
13     h2 {
14 wakaba 1.7 margin: 0;
15     font-size: 100%;
16     }
17 wakaba 1.13 p {
18     margin: 0 1em;
19 wakaba 1.7 }
20 wakaba 1.1 textarea {
21 wakaba 1.7 width: 100%;
22     -width: 99%;
23     height: 10em;
24 wakaba 1.1 }
25     output {
26     display: block;
27     font-family: monospace;
28 wakaba 1.4 white-space: -moz-pre-wrap;
29     white-space: pre-wrap;
30 wakaba 1.1 }
31     </style>
32     <script>
33 wakaba 1.7 var delayedUpdater = 0;
34    
35 wakaba 1.1 function update () {
36 wakaba 1.7 if (delayedUpdater) {
37     clearTimeout (delayedUpdater);
38     delayedUpdater = 0;
39     }
40     delayedUpdater = setTimeout (update2, 100);
41     } // update
42    
43     function update2 () {
44     var v = document.sourceElement.value;
45 wakaba 1.8 if (v != document.previousSourceText) {
46     document.previousSourceText = v;
47     document.links['permalink'].href
48     = location.pathname + '?s=' + encodeURIComponent (v);
49     document.links['ldvlink'].href
50     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51     + encodeURIComponent (v);
52    
53     document.logElement.textContent = '';
54     var p = new Parser (new InputStream (v));
55     var doc = p.doc;
56     p.parse ();
57 wakaba 1.10
58 wakaba 1.8 log (dumpTree (doc, ''));
59 wakaba 1.10
60     if (p.hasAsyncScript) {
61     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62     }
63 wakaba 1.8 }
64 wakaba 1.7 } // update2
65 wakaba 1.1
66 wakaba 1.6 var logIndentLevel = 0;
67 wakaba 1.1 function log (s) {
68 wakaba 1.15 var indent = '';
69 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
70 wakaba 1.15 indent += ' ';
71 wakaba 1.6 }
72 wakaba 1.15 s = indent + s.replace (/\n/g, "\n" + indent);
73 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
74     } // log
75    
76     function InputStream (s) {
77     this.s = s;
78     } // InputStream
79    
80 wakaba 1.4 function Parser (i, doc) {
81 wakaba 1.1 this.parseMode = 'pcdata';
82 wakaba 1.4 if (!doc) {
83     doc = new JSDocument (this);
84     doc.manakaiIsHTML = true;
85     }
86 wakaba 1.16 this.nextToken = [];
87 wakaba 1.4 this.doc = doc;
88     this.openElements = [doc];
89 wakaba 1.8 this.input = i;
90 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
91 wakaba 1.10 this.scriptsExecutedSoon = [];
92 wakaba 1.12 this.scriptsExecutedAsynchronously = [];
93 wakaba 1.1 } // Parser
94    
95 wakaba 1.2 Parser.prototype.getNextToken = function () {
96 wakaba 1.16 if (this.nextToken.length) {
97     return this.nextToken.shift ();
98     }
99    
100 wakaba 1.3 var p = this;
101 wakaba 1.8 var i = this.input;
102 wakaba 1.14 if (this.parseMode == 'cdata') {
103     var tagName = this.endTagName;
104 wakaba 1.1 var token;
105 wakaba 1.3 if (p.insertionPoint <= 0) {
106     return {type: 'abort'};
107     }
108 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
109 wakaba 1.1 function (s, t) {
110 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112     var ip = p.insertionPoint;
113     p.insertionPoint = 0;
114 wakaba 1.4 return t.substring (ip, t.length);
115 wakaba 1.3 }
116 wakaba 1.1 token = {type: 'char', value: t};
117 wakaba 1.4 p.insertionPoint -= t.length;
118     return '';
119 wakaba 1.1 });
120     if (token) return token;
121 wakaba 1.14 var pattern = new RegExp ('^</' + tagName + '>', 'i');
122     i.s = i.s.replace (pattern, function (s) {
123 wakaba 1.4 if (p.insertionPoint < s.length) {
124 wakaba 1.3 token = {type: 'abort'};
125     return s;
126     }
127 wakaba 1.14 token = {type: 'end-tag', value: tagName};
128 wakaba 1.3 p.insertionPoint -= s.length;
129 wakaba 1.1 return '';
130     });
131     if (token) return token;
132 wakaba 1.5 var m;
133 wakaba 1.14 if ((p.insertionPoint < ('</' + tagName).length) &&
134     (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135 wakaba 1.5 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136 wakaba 1.14 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137 wakaba 1.5 return {type: 'abort'};
138     }
139     }
140 wakaba 1.4 i.s = i.s.replace (/^</,
141     function (s) {
142     token = {type: 'char', value: s};
143     p.insertionPoint -= s.length;
144     return '';
145     });
146     if (token) return token;
147 wakaba 1.1 return {type: 'eof'};
148     }
149    
150     var token;
151 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152     if (p.insertionPoint < s.length ||
153     (p.insertionPoint <= s.length &&
154 wakaba 1.16 s.substring (s.length - 1, s.length) != '>')) {
155 wakaba 1.3 token = {type: 'abort'};
156     return s;
157     }
158 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
159 wakaba 1.3 p.insertionPoint -= s.length;
160 wakaba 1.1 return '';
161     });
162     if (token) return token;
163 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164     if (p.insertionPoint < s.length ||
165     (p.insertionPoint <= s.length &&
166 wakaba 1.16 s.substring (s.length - 1, s.length) != '>')) {
167 wakaba 1.3 token = {type: 'abort'};
168     return s;
169     }
170 wakaba 1.4 var tagName;
171     var attrs = {};
172     e = e.replace (/^[\S]+/, function (v) {
173     tagName = v.toLowerCase ();
174     return '';
175     });
176 wakaba 1.9 while (true) {
177     var m = false;
178     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179     function (x, attrName, attrValue1, attrValue2, attrValue3) {
180     v = attrValue1 || attrValue2 || attrValue3;
181     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182     .replace (/&amp;/g, '&');
183     attrs[attrName.toLowerCase ()] = v;
184     m = true;
185     return '';
186     });
187     if (!m) break;
188     }
189 wakaba 1.6 if (e.length) {
190     log ('Broken start tag: "' + e + '"');
191     }
192 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
193 wakaba 1.3 p.insertionPoint -= s.length;
194 wakaba 1.1 return '';
195     });
196     if (token) return token;
197 wakaba 1.3 if (p.insertionPoint <= 0) {
198     return {type: 'abort'};
199     }
200 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
201 wakaba 1.3 if (p.insertionPoint < s.length) {
202     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203     var ip = p.insertionPoint;
204     p.insertionPoint = 0;
205     return s.substring (ip, s.length);
206     }
207 wakaba 1.1 token = {type: 'char', value: s};
208 wakaba 1.3 p.insertionPoint -= s.length;
209 wakaba 1.1 return '';
210     });
211     if (token) return token;
212     i.s = i.s.replace (/^[\s\S]/, function (s) {
213     token = {type: 'char', value: s};
214 wakaba 1.3 p.insertionPoint -= s.length;
215 wakaba 1.1 return '';
216     });
217     if (token) return token;
218     return {type: 'eof'};
219     } // getNextToken
220    
221 wakaba 1.2 Parser.prototype.parse = function () {
222 wakaba 1.6 logIndentLevel++;
223     log ('parse: start');
224 wakaba 1.1
225     while (true) {
226 wakaba 1.2 var token = this.getNextToken ();
227 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
228    
229 wakaba 1.16 if (this.cdataEndTagRequired) {
230     // Generic CDATA parsing algorithm
231    
232     if (token.type != 'abort') {
233     // 7.
234     if (token.type == 'end-tag' && token.value == this.endTagName) {
235     // 7.1. Ignores it.
236     //
237     } else {
238     // 7.2. Parse error.
239     log ('Parse error: no </' + this.endTagName + '>');
240     this.nextToken.unshift (token);
241     }
242     this.cdataEndTagRequired = false;
243     continue;
244     }
245     }
246    
247 wakaba 1.1 if (token.type == 'start-tag') {
248     if (token.value == 'script') {
249 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
250     var el = new JSElement (this.doc, token.value);
251 wakaba 1.4 if (token.attrs.async != null) el.async = true;
252     if (token.attrs.defer != null) el.defer = true;
253     if (token.attrs.src != null) el.src = token.attrs.src;
254 wakaba 1.2
255     // 2. Mark the element as being "parser-inserted".
256     el.manakaiParserInserted = true;
257    
258     // 3. Switch the tokeniser's content model flag to the CDATA state.
259 wakaba 1.14 this.parseMode = 'cdata';
260     this.endTagName = 'script';
261 wakaba 1.1
262 wakaba 1.2 // 4.1. Collect all the character tokens.
263 wakaba 1.1 while (true) {
264 wakaba 1.2 var token = this.getNextToken ();
265 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
266    
267     if (token.type == 'char') {
268 wakaba 1.2 // 5. Append a single Text node to the script element node.
269 wakaba 1.1 el.manakaiAppendText (token.value);
270 wakaba 1.2
271     // 4.2. Until it returns a token that is not a character token, or
272 wakaba 1.3 // until it stops tokenising.
273 wakaba 1.1 } else if (token.type == 'eof' ||
274 wakaba 1.14 token.type == 'end-tag' ||
275 wakaba 1.3 token.type == 'abort') {
276 wakaba 1.2 // 6. Switched back to the PCDATA state.
277 wakaba 1.1 this.parseMode = 'pcdata';
278 wakaba 1.2
279     // 7.1. If the next token is not an end tag token with ...
280 wakaba 1.14 if (!(token.type == 'end-tag' && token.value == 'script')) {
281 wakaba 1.2 // 7.2. This is a parse error.
282     log ('Parse error: no </' + 'script>');
283 wakaba 1.16 this.nextToken.unshift (token);
284 wakaba 1.2
285     // 7.3. Mark the script element as "already executed".
286     el.manakaiAlreadyExecuted = true;
287     } else {
288     // 7.4. Ignore it.
289     //
290     }
291 wakaba 1.1 break;
292     }
293     }
294    
295 wakaba 1.2 // 8.1. If the parser were originally created for the ...
296     if (this.fragmentParsingMode) {
297     // 8.2. Mark the script element as "already executed" and ...
298     el.alreadyExecuted = true;
299     continue;
300     }
301    
302     // 9.1. Let the old insertion point have the same value as the ...
303 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
304 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
305 wakaba 1.3 this.setInsertionPoint (0);
306 wakaba 1.2
307     // 10. Append the new element to the current node.
308 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
309 wakaba 1.2
310     // 11. Let the insertion point have the value of the old ...
311 wakaba 1.7
312 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
313 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
314 wakaba 1.2
315 wakaba 1.18 // 12. If there is a pending external script
316     while (this.pendingExternalScript) {
317 wakaba 1.6 // 12.1. If the tree construction stage is being called reentrantly
318     if (this.reentrant) {
319     log ('parse: abort (reentrance)');
320     logIndentLevel--;
321     return;
322    
323     // 12.2. Otherwise
324     } else {
325     // 1.
326 wakaba 1.18 var script = this.pendingExternalScript;
327     this.pendingExternalScript = null;
328 wakaba 1.6
329     // 2. Pause until the script has completed loading.
330     //
331    
332     // 3. Let the insertion point to just before the next input char.
333     this.setInsertionPoint (0);
334    
335     // 4. Execute the script.
336     executeScript (this.doc, script);
337    
338     // 5. Let the insertion point be undefined again.
339     this.setInsertionPoint (undefined);
340 wakaba 1.2
341 wakaba 1.6 // 6. If there is once again a script that will execute ...
342     //
343     }
344     }
345 wakaba 1.14 } else if (token.value == 'style' ||
346     token.value == 'noscript' ||
347     token.value == 'xmp') {
348     // 1. Create an element for the token in the HTML namespace.
349     var el = new JSElement (this.doc, token.value);
350    
351     // 2. Append the new element to the current node.
352     this.openElements[this.openElements.length - 1].appendChild (el);
353    
354     // 3. Switch the tokeniser's content model flag to the CDATA state.
355     this.parseMode = 'cdata';
356     this.endTagName = token.value;
357    
358     // 4.1. Collect all the character tokens.
359     while (true) {
360     var token = this.getNextToken ();
361     log ('token: ' + token.type + ' "' + token.value + '"');
362    
363     if (token.type == 'char') {
364     // 5. Append a single Text node to the script element node.
365     el.manakaiAppendText (token.value);
366    
367     // 4.2. Until it returns a token that is not a character token, or
368     // until it stops tokenising.
369     } else if (token.type == 'eof' ||
370     token.type == 'end-tag' ||
371     token.type == 'abort') {
372     // 6. Switched back to the PCDATA state.
373     this.parseMode = 'pcdata';
374    
375 wakaba 1.16 if (token.type == 'abort') {
376     this.cdataEndTagRequired = true;
377     break;
378     }
379    
380 wakaba 1.14 // 7.1. If the next token is not an end tag token with ...
381     if (!(token.type == 'end-tag' &&
382     token.value == this.endTagName)) {
383     // 7.2. This is a parse error.
384     log ('Parse error: no </' + this.endTagName + '>');
385 wakaba 1.16 this.nextToken.unshift (token);
386 wakaba 1.14
387     // 7.3. Mark the script element as "already executed".
388     el.manakaiAlreadyExecuted = true;
389     } else {
390     // 7.4. Ignore it.
391     //
392     }
393     break;
394     }
395     }
396 wakaba 1.1 } else {
397 wakaba 1.2 var el = new JSElement (this.doc, token.value);
398 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
399     this.openElements.push (el);
400     }
401     } else if (token.type == 'end-tag') {
402     if (this.openElements[this.openElements.length - 1].localName ==
403     token.value) {
404     this.openElements.pop ();
405     } else {
406     log ('parse error: unmatched end tag: ' + token.value);
407     }
408 wakaba 1.3 } else if (token.type == 'char') {
409     this.openElements[this.openElements.length - 1].manakaiAppendText
410     (token.value);
411 wakaba 1.1 } else if (token.type == 'eof') {
412     break;
413 wakaba 1.3 } else if (token.type == 'abort') {
414     log ('parse: abort');
415 wakaba 1.6 logIndentLevel--;
416 wakaba 1.3 return;
417 wakaba 1.1 }
418     }
419    
420     log ('stop parsing');
421 wakaba 1.4
422     // readyState = 'interactive'
423    
424     // "When a script completes loading" rules start applying.
425    
426 wakaba 1.12 while (this.scriptsExecutedSoon.length > 0 ||
427     this.scriptsExecutedAsynchronously.length > 0) {
428     // Handle "list of scripts that will execute as soon as possible".
429     while (this.scriptsExecutedSoon.length > 0) {
430     var e = this.scriptsExecutedSoon.shift ();
431    
432     // If it has completed loading
433     log ('Execute an external script not inserted by parser...');
434     executeScript (this.doc, e);
435    
436     // NOTE: It MAY be executed before the end of the parsing, according
437     // to the spec.
438     this.hasAsyncScript = true;
439     }
440    
441     // Handle "list of scripts that will execute asynchronously".
442     while (this.scriptsExecutedAsynchronously.length > 0) {
443     var e = this.scriptsExecutedAsynchronously.shift ();
444    
445     // Step 1.
446     // We assume that all scripts have been loaded at this time.
447    
448     // Step 2.
449     log ('Execute an asynchronous script...');
450     executeScript (this.doc, e);
451    
452     // Step 3.
453     //
454    
455     // Step 4.
456     //
457 wakaba 1.10
458 wakaba 1.12 this.hasAsyncScript = true;
459     }
460 wakaba 1.10 }
461    
462 wakaba 1.4 // Handle "list of scripts that will execute when the document has finished
463     // parsing".
464     var list = this.scriptsExecutedAfterParsing;
465     while (list.length > 0) {
466     // TODO: break unless completed loading
467    
468     // Step 1.
469     //
470    
471     // Step 2. and Step 3.
472     log ('Executing a |defer|red script...');
473     executeScript (this.doc, list.shift ());
474    
475     // Step 4.
476     }
477    
478     log ('DOMContentLoaded event fired');
479    
480 wakaba 1.14 // "delays the load event" things has completed:
481 wakaba 1.4 // readyState = 'complete'
482     log ('load event fired');
483 wakaba 1.6
484     logIndentLevel--;
485 wakaba 1.1 } // parse
486    
487 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
488     if (ip == undefined || ip == null || isNaN (ip)) {
489     log ('insertion point: set to undefined');
490     this.insertionPoint = undefined;
491 wakaba 1.8 } else if (ip == this.input.s.length) {
492 wakaba 1.4 log ('insertion point: end of file');
493     this.insertionPoint = ip;
494 wakaba 1.3 } else {
495     log ('insertion point: set to ' + ip +
496 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
497 wakaba 1.3 this.insertionPoint = ip;
498     }
499     }; // setInsertionPoint
500    
501 wakaba 1.2 function JSDocument (p) {
502 wakaba 1.1 this.childNodes = [];
503 wakaba 1.2 this._parser = p;
504 wakaba 1.1 } // JSDocument
505    
506 wakaba 1.2 function JSElement (doc, localName) {
507 wakaba 1.1 this.localName = localName;
508 wakaba 1.2 this.ownerDocument = doc;
509 wakaba 1.1 this.childNodes = [];
510     } // JSElement
511    
512     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
513     function (e) {
514     this.childNodes.push (e);
515     e.parentNode = this;
516 wakaba 1.2
517     if (e.localName == 'script') {
518 wakaba 1.6 logIndentLevel++;
519 wakaba 1.4 log ('Running a script: start');
520 wakaba 1.2
521 wakaba 1.3 var doc = this.ownerDocument || this;
522 wakaba 1.2 var p = doc._parser;
523    
524     // 1. Script type
525     //
526    
527     // 2.1. If scripting is disabled
528     //
529     // 2.2. If the script element was created by an XML ... innerHTML ...
530     //
531     // 2.3. If the user agent does not support the scripting language ...
532     //
533     // 2.4. If the script element has its "already executed" flag set
534     if (e.manakaiAlreadyExecuted) {
535     // 2.5. Abort these steps at this point.
536 wakaba 1.15 log ('Running a script: aborted (already executed)');
537 wakaba 1.6 logIndentLevel--;
538 wakaba 1.2 return e;
539     }
540    
541     // 3. Set the element's "already executed" flag.
542     e.manakaiAlreadyExecuted = true;
543    
544     // 4. If the element has a src attribute, then a load for ...
545     // TODO: load an external resource
546    
547     // 5. The first of the following options:
548    
549     // 5.1.
550     if (/* TODO: If the document is still being parsed && */
551     e.defer && !e.async) {
552 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
553     log ('Running a script: aborted (defer)');
554 wakaba 1.2 } else if (e.async && e.src != null) {
555 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
556     log ('Running a script: aborted (async src)');
557     } else if (e.async && e.src == null &&
558     p.scriptsExecutedAsynchronously.length > 0) {
559     p.scriptsExecutedAsynchronously.push (e);
560     log ('Running a script: aborted (async)');
561     // ISSUE: What is the difference with the case above?
562 wakaba 1.2 } else if (e.src != null && e.manakaiParserInserted) {
563 wakaba 1.18 if (p.pendingExternalScript) {
564     log ('Error: There is a pending external script.');
565 wakaba 1.6 }
566 wakaba 1.18 p.pendingExternalScript = e;
567 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
568     } else if (e.src != null) {
569     p.scriptsExecutedSoon.push (e);
570 wakaba 1.6 log ('Running a script: aborted (src)');
571 wakaba 1.2 } else {
572     executeScript (doc, e); // even if other scripts are already executing.
573     }
574    
575 wakaba 1.4 log ('Running a script: end');
576 wakaba 1.6 logIndentLevel--;
577 wakaba 1.2 }
578    
579 wakaba 1.1 return e;
580     }; // appendChild
581    
582 wakaba 1.2 function executeScript (doc, e) {
583     log ('executing a script block: start');
584    
585 wakaba 1.6 var s;
586     if (e.src != null) {
587     s = getExternalScript (e.src);
588    
589     // If the load resulted in an error, then ... firing an error event ...
590     if (s == null) {
591     log ('error event fired at the script element');
592     return;
593     }
594    
595     log ('External script loaded: "' + s + '"');
596     } else {
597     s = e.text;
598     }
599 wakaba 1.2
600     // If the load was successful
601    
602     if (true) {
603     // Scripting is enabled, Document.designMode is disabled,
604     // Document is the active document in its browsing context
605    
606     parseAndRunScript (doc, s);
607     }
608    
609 wakaba 1.17 log ('load event fired at the script element');
610    
611 wakaba 1.2 log ('executing a script block: end');
612     } // executeScript
613    
614 wakaba 1.6 function getExternalScript (uri) {
615     if (uri.match (/^javascript:/i)) {
616     var m;
617     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
618     if (m[1]) {
619 wakaba 1.11 return unescapeJSLiteral (m[1]);
620 wakaba 1.6 } else if (m[2]) {
621 wakaba 1.11 return unescapeJSLiteral (m[2]);
622 wakaba 1.6 } else {
623     return null;
624     }
625     } else {
626     log ('Complex javascript: URI is not supported: <' + uri + '>');
627     return null;
628     }
629     } else {
630     log ('URI scheme not supported: <' + uri + '>');
631     return null;
632     }
633     } // getExternalScript
634    
635 wakaba 1.2 function parseAndRunScript (doc, s) {
636     while (true) {
637     var matched = false;
638     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
639     matched = true;
640     var args = [];
641     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
642 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
643 wakaba 1.2 return '';
644     });
645     doc.write.apply (doc, args);
646     return '';
647     });
648 wakaba 1.15 var noDocumentElement = false;
649     s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
650 wakaba 1.10 function (s, t, u) {
651     matched = true;
652 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
653 wakaba 1.15 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
654     return '';
655     });
656     if (noDocumentElement) {
657     log ('Script error: documentElement is null');
658     break;
659     }
660     s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
661     function (s, t) {
662     matched = true;
663     log (dumpTree (doc, ''));
664 wakaba 1.10 return '';
665     });
666 wakaba 1.2 if (s == '') break;
667     if (!matched) {
668     log ('Script parse error: "' + s + '"');
669     break;
670     }
671     }
672     } // parseAndRunScript
673    
674 wakaba 1.11 function unescapeJSLiteral (s) {
675     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
676     return String.fromCharCode (parseInt ('0x' + v));
677     });
678     } // unescapeJSLiteral
679    
680 wakaba 1.1 function JSText (data) {
681     this.data = data;
682     } // JSText
683    
684     JSDocument.prototype.manakaiAppendText =
685     JSElement.prototype.manakaiAppendText =
686     function (s) {
687     if (this.childNodes.length > 0 &&
688     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
689     this.childNodes[this.childNodes.length - 1].data += s;
690     } else {
691     this.childNodes.push (new JSText (s));
692     }
693     }; // manakaiAppendText
694 wakaba 1.2
695 wakaba 1.4 JSDocument.prototype.open = function () {
696     // Two or fewer arguments
697    
698     // Step 1.
699     var type = arguments[0] || 'text/html';
700    
701     // Step 2.
702     var replace = arguments[1] == 'replace';
703    
704     // Step 3.
705     if (this._parser &&
706     !this._parser.scriptCreated &&
707 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
708 wakaba 1.4 log ('document.open () in parsing mode is ignored');
709     return this;
710     }
711    
712     // Step 4.
713     log ('onbeforeunload event fired');
714     log ('onunload event fired');
715    
716     // Step 5.
717     if (this._parser) {
718     // Discard the parser.
719     }
720    
721     // Step 6.
722     log ('document cleared by document.open ()');
723     this.childNodes = [];
724    
725     // Step 7.
726     this._parser = new Parser (new InputStream (''), this);
727     this._parser.scriptCreated = true;
728    
729     // Step 8.
730     this.manakaiIsHTML = true;
731    
732     // Step 9.
733     // If not text/html, ...
734    
735     // Step 10.
736     if (!replace) {
737     // History
738     }
739    
740     // Step 11.
741 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
742 wakaba 1.4
743     // Step 12.
744     return this;
745     }; // document.open
746    
747 wakaba 1.2 JSDocument.prototype.write = function () {
748 wakaba 1.15 log ('document.write: start');
749 wakaba 1.6 logIndentLevel++;
750    
751 wakaba 1.3 var p = this._parser;
752    
753 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
754 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
755     this.open ();
756     p = this._parser;
757 wakaba 1.3 }
758 wakaba 1.2
759     // 2. ... inserted into the input stream just before the insertion point.
760 wakaba 1.3 var s = Array.join (arguments, '');
761     log ('document.write: insert "' + s + '"' +
762 wakaba 1.8 ' before "' +
763     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
764     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
765     + p.input.s.substring (p.insertionPoint, p.input.s.length);
766 wakaba 1.3 p.insertionPoint += s.length;
767 wakaba 1.2
768 wakaba 1.18 // 3. If there is a pending external script
769     if (p.pendingExternalScript) {
770 wakaba 1.6 log ('document.write: processed later (there is an unprocessed <script src>)');
771     logIndentLevel--;
772 wakaba 1.15 log ('document.write: return');
773 wakaba 1.6 return;
774     }
775 wakaba 1.2
776     // 4. Process the characters that were inserted, ...
777 wakaba 1.6 var originalReentrant = p.reentrant;
778     p.reentrant = true;
779 wakaba 1.3 p.parse ();
780 wakaba 1.6 p.reentrant = originalReentrant;
781     // TODO: "Abort the processing of any nested invokations of the tokeniser,
782     // yielding control back to the caller." (<script> parsing). Do we need
783     // to do something here?
784 wakaba 1.2
785     // 5. Return
786 wakaba 1.15 logIndentLevel--;
787 wakaba 1.2 log ('document.write: return');
788 wakaba 1.6
789 wakaba 1.2 return;
790     }; // document.write
791    
792 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
793     var s = new JSElement (this, 'script');
794     s.src = uri;
795 wakaba 1.15 if (this.documentElement) {
796     this.documentElement.appendChild (s);
797     return true;
798     } else {
799     return false;
800     }
801 wakaba 1.10 }; // _insertExternalScript
802    
803     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
804     var cn = this.childNodes;
805     for (var i = 0; i < cn.length; i++) {
806     if (cn[i] instanceof JSElement) {
807     return cn[i]
808     }
809     }
810     return null;
811     });
812    
813 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
814     var r = '';
815     for (var i = 0; i < this.childNodes.length; i++) {
816     if (this.childNodes[i] instanceof JSText) {
817     r += this.childNodes[i].data;
818     }
819     }
820     return r;
821     });
822 wakaba 1.1
823     function dumpTree (n, indent) {
824     var r = '';
825     for (var i = 0; i < n.childNodes.length; i++) {
826     var node = n.childNodes[i];
827     if (node instanceof JSElement) {
828     r += '| ' + indent + node.localName + '\n';
829 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
830     if (node.defer) r += '| ' + indent + ' defer=""\n';
831 wakaba 1.9 if (node.src != null) {
832     r += '| ' + indent + ' src="' + node.src + '"\n';
833     }
834 wakaba 1.1 r += dumpTree (node, indent + ' ');
835     } else if (node instanceof JSText) {
836     r += '| ' + indent + '"' + node.data + '"\n';
837     } else {
838     r += '| ' + indent + node + '\n';
839     }
840     }
841     return r;
842     } // dumpTree
843     </script>
844     </head>
845     <body onload="
846     document.sourceElement = document.getElementsByTagName ('textarea')[0];
847 wakaba 1.8
848     var q = location.search;
849     if (q != null) {
850     q = q.substring (1).split (/;/);
851     for (var i = 0; i < q.length; i++) {
852     var v = q[i].split (/=/, 2);
853     v[0] = decodeURIComponent (v[0]);
854     v[1] = decodeURIComponent (v[1] || '');
855     if (v[0] == 's') {
856     document.sourceElement.value = v[1];
857     }
858     }
859     }
860    
861 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
862     update ();
863     ">
864 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
865     Parser</h1>
866 wakaba 1.1
867 wakaba 1.7 <h2>Markup to test
868 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
869     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
870     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
871     Viewer</a>)</h2>
872 wakaba 1.7 <p>
873     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
874 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
875     &lt;p>
876     &lt;script>
877 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
878 wakaba 1.1 &lt;/script>
879     &lt;p>
880     </textarea>
881    
882 wakaba 1.10 <h2 id=log>Log</h2>
883 wakaba 1.7 <p><output></output>
884    
885 wakaba 1.10 <h2 id=notes>Notes</h2>
886 wakaba 1.8
887     <p>This is a <em>simplified</em> implementation of
888     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
889     Parsing Algorithm</a>. It only implements script-related part of the
890     algorithm. Especially, this parser:
891     <ul>
892     <li>Does not support <code>DOCTYPE</code> and comment tokens.
893     <li>Does not support entities except for <code>&amp;quot;</code>,
894     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
895     <code>src</code> attribute value.
896     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
897     algorithm, and so on.
898     <li>Does not raise parse errors for invalid attribute specifications in start
899     or end tags.
900 wakaba 1.17 <li>Does not support RCDATA elements (<code>title</code> and
901 wakaba 1.14 <code>textarea</code>).
902 wakaba 1.17 <li>Does not strip the first newline in <code>pre</code>,
903     <code>listing</code>, and <code>textarea</code> elements.
904 wakaba 1.8 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
905 wakaba 1.17 in CDATA/RCDATA elements.
906 wakaba 1.8 <li>Does not support foreign (SVG or MathML) elements.
907     <li>Only supports <code>script</code> <code>type</code>
908     <code>text/javascript</code>. <code>type</code> and <code>language</code>
909     attributes are ignored.
910 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
911     of statements looking similar to the following statements, possibly
912     introduced, followed, or separated by white space characters:
913     <ul>
914     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
915     <li><code>var s = document.createElement ("script");
916     s.src = "<var>string</var>";
917     document.documentElement.appendChild (s);</code>
918 wakaba 1.15 <li><code>w (document.documentElement.innerHTML);</code> (This statement
919     can be used to dump the document, even when the document has no
920     document element. The output format is the tree dump format used
921     in html5lib test data, not <abbr>HTML</abbr>.)
922 wakaba 1.10 </ul>
923     Note that strings may be delimited by <code>'</code>s instead of
924     <code>"</code>s.
925 wakaba 1.8 <li>Only supports <code>javascript:</code>
926     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
927     <code>src</code> attribute of the <code>script</code> element. In addition,
928     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
929     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
930 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
931     string literals.
932 wakaba 1.12 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
933     replaced by <code>document.open ()</code> call. In other word, delayed
934     (deferred or asynchronous) script executions and event firings might be
935     treated in a wrong way if a <code>document.open ()</code> invocation
936     is implicitly done by <code>document.write ()</code> in a delayed script.
937 wakaba 1.8 </ul>
938 wakaba 1.7
939 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
940     not support JavaScript 1.5.
941 wakaba 1.12
942     <!-- TODO: |src| attribute value should refer the value at the time
943     when it is inserted into the document, not the value when the script is
944     executed. Currently it does not matter, since we don't allow dynamic
945     modification to the |src| content/DOM attribute value yet. -->
946 wakaba 1.10
947 wakaba 1.13 </body>
948     </html>
949 wakaba 1.18 <!-- $Date: 2008/05/16 10:29:25 $ -->
950 wakaba 1.13 <!--
951    
952     Copyright 2008 Wakaba <w@suika.fam.cx>
953    
954     This program is free software; you can redistribute it and/or
955     modify it under the terms of the GNU General Public License
956     as published by the Free Software Foundation; either version 2
957     of the License, or (at your option) any later version.
958    
959     This program is distributed in the hope that it will be useful,
960     but WITHOUT ANY WARRANTY; without even the implied warranty of
961     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
962     GNU General Public License for more details.
963    
964     You should have received a copy of the GNU General Public License
965     along with this program; if not, write to the Free Software
966     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
967 wakaba 1.1
968 wakaba 1.13 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24