/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (hide annotations) (download) (as text)
Sun Jun 20 03:39:12 2010 UTC (14 years, 5 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.20: +6 -5 lines
File MIME type: text/html
was not committed

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.13 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6     <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7     title="GNU GPL2 or later">
8 wakaba 1.1 <style>
9 wakaba 1.13 h1 {
10     margin: 0;
11     font-size: 150%;
12     }
13     h2 {
14 wakaba 1.7 margin: 0;
15     font-size: 100%;
16     }
17 wakaba 1.13 p {
18     margin: 0 1em;
19 wakaba 1.7 }
20 wakaba 1.1 textarea {
21 wakaba 1.7 width: 100%;
22     -width: 99%;
23     height: 10em;
24 wakaba 1.1 }
25     output {
26     display: block;
27     font-family: monospace;
28 wakaba 1.4 white-space: -moz-pre-wrap;
29     white-space: pre-wrap;
30 wakaba 1.1 }
31     </style>
32     <script>
33 wakaba 1.7 var delayedUpdater = 0;
34    
35 wakaba 1.1 function update () {
36 wakaba 1.7 if (delayedUpdater) {
37     clearTimeout (delayedUpdater);
38     delayedUpdater = 0;
39     }
40     delayedUpdater = setTimeout (update2, 100);
41     } // update
42    
43     function update2 () {
44     var v = document.sourceElement.value;
45 wakaba 1.8 if (v != document.previousSourceText) {
46     document.previousSourceText = v;
47     document.links['permalink'].href
48     = location.pathname + '?s=' + encodeURIComponent (v);
49     document.links['ldvlink'].href
50     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51     + encodeURIComponent (v);
52    
53     document.logElement.textContent = '';
54     var p = new Parser (new InputStream (v));
55     var doc = p.doc;
56     p.parse ();
57 wakaba 1.10
58 wakaba 1.8 log (dumpTree (doc, ''));
59 wakaba 1.10
60     if (p.hasAsyncScript) {
61     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62     }
63 wakaba 1.8 }
64 wakaba 1.7 } // update2
65 wakaba 1.1
66 wakaba 1.6 var logIndentLevel = 0;
67 wakaba 1.1 function log (s) {
68 wakaba 1.15 var indent = '';
69 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
70 wakaba 1.15 indent += ' ';
71 wakaba 1.6 }
72 wakaba 1.15 s = indent + s.replace (/\n/g, "\n" + indent);
73 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
74     } // log
75    
76     function InputStream (s) {
77     this.s = s;
78     } // InputStream
79    
80 wakaba 1.4 function Parser (i, doc) {
81 wakaba 1.1 this.parseMode = 'pcdata';
82 wakaba 1.4 if (!doc) {
83     doc = new JSDocument (this);
84     doc.manakaiIsHTML = true;
85     }
86 wakaba 1.16 this.nextToken = [];
87 wakaba 1.4 this.doc = doc;
88     this.openElements = [doc];
89 wakaba 1.8 this.input = i;
90 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
91 wakaba 1.10 this.scriptsExecutedSoon = [];
92 wakaba 1.12 this.scriptsExecutedAsynchronously = [];
93 wakaba 1.1 } // Parser
94    
95 wakaba 1.2 Parser.prototype.getNextToken = function () {
96 wakaba 1.16 if (this.nextToken.length) {
97     return this.nextToken.shift ();
98     }
99    
100 wakaba 1.3 var p = this;
101 wakaba 1.8 var i = this.input;
102 wakaba 1.14 if (this.parseMode == 'cdata') {
103     var tagName = this.endTagName;
104 wakaba 1.1 var token;
105 wakaba 1.3 if (p.insertionPoint <= 0) {
106     return {type: 'abort'};
107     }
108 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
109 wakaba 1.1 function (s, t) {
110 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
111     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
112     var ip = p.insertionPoint;
113     p.insertionPoint = 0;
114 wakaba 1.4 return t.substring (ip, t.length);
115 wakaba 1.3 }
116 wakaba 1.1 token = {type: 'char', value: t};
117 wakaba 1.4 p.insertionPoint -= t.length;
118     return '';
119 wakaba 1.1 });
120     if (token) return token;
121 wakaba 1.14 var pattern = new RegExp ('^</' + tagName + '>', 'i');
122     i.s = i.s.replace (pattern, function (s) {
123 wakaba 1.4 if (p.insertionPoint < s.length) {
124 wakaba 1.3 token = {type: 'abort'};
125     return s;
126     }
127 wakaba 1.14 token = {type: 'end-tag', value: tagName};
128 wakaba 1.3 p.insertionPoint -= s.length;
129 wakaba 1.1 return '';
130     });
131     if (token) return token;
132 wakaba 1.5 var m;
133 wakaba 1.14 if ((p.insertionPoint < ('</' + tagName).length) &&
134     (m = i.s.match (/^<\/([A-Za-z]+)/))) {
135 wakaba 1.5 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
136 wakaba 1.14 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
137 wakaba 1.5 return {type: 'abort'};
138     }
139     }
140 wakaba 1.4 i.s = i.s.replace (/^</,
141     function (s) {
142     token = {type: 'char', value: s};
143     p.insertionPoint -= s.length;
144     return '';
145     });
146     if (token) return token;
147 wakaba 1.1 return {type: 'eof'};
148     }
149    
150     var token;
151 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
152     if (p.insertionPoint < s.length ||
153     (p.insertionPoint <= s.length &&
154 wakaba 1.16 s.substring (s.length - 1, s.length) != '>')) {
155 wakaba 1.3 token = {type: 'abort'};
156     return s;
157     }
158 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
159 wakaba 1.3 p.insertionPoint -= s.length;
160 wakaba 1.1 return '';
161     });
162     if (token) return token;
163 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
164     if (p.insertionPoint < s.length ||
165     (p.insertionPoint <= s.length &&
166 wakaba 1.16 s.substring (s.length - 1, s.length) != '>')) {
167 wakaba 1.3 token = {type: 'abort'};
168     return s;
169     }
170 wakaba 1.4 var tagName;
171     var attrs = {};
172     e = e.replace (/^[\S]+/, function (v) {
173     tagName = v.toLowerCase ();
174     return '';
175     });
176 wakaba 1.9 while (true) {
177     var m = false;
178     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
179     function (x, attrName, attrValue1, attrValue2, attrValue3) {
180     v = attrValue1 || attrValue2 || attrValue3;
181     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
182     .replace (/&amp;/g, '&');
183     attrs[attrName.toLowerCase ()] = v;
184     m = true;
185     return '';
186     });
187     if (!m) break;
188     }
189 wakaba 1.6 if (e.length) {
190     log ('Broken start tag: "' + e + '"');
191     }
192 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
193 wakaba 1.3 p.insertionPoint -= s.length;
194 wakaba 1.1 return '';
195     });
196     if (token) return token;
197 wakaba 1.3 if (p.insertionPoint <= 0) {
198     return {type: 'abort'};
199     }
200 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
201 wakaba 1.3 if (p.insertionPoint < s.length) {
202     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
203     var ip = p.insertionPoint;
204     p.insertionPoint = 0;
205     return s.substring (ip, s.length);
206     }
207 wakaba 1.1 token = {type: 'char', value: s};
208 wakaba 1.3 p.insertionPoint -= s.length;
209 wakaba 1.1 return '';
210     });
211     if (token) return token;
212     i.s = i.s.replace (/^[\s\S]/, function (s) {
213     token = {type: 'char', value: s};
214 wakaba 1.3 p.insertionPoint -= s.length;
215 wakaba 1.1 return '';
216     });
217     if (token) return token;
218     return {type: 'eof'};
219     } // getNextToken
220    
221 wakaba 1.2 Parser.prototype.parse = function () {
222 wakaba 1.6 logIndentLevel++;
223     log ('parse: start');
224 wakaba 1.1
225     while (true) {
226 wakaba 1.2 var token = this.getNextToken ();
227 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
228    
229 wakaba 1.16 if (this.cdataEndTagRequired) {
230     // Generic CDATA parsing algorithm
231    
232     if (token.type != 'abort') {
233     // 7.
234     if (token.type == 'end-tag' && token.value == this.endTagName) {
235     // 7.1. Ignores it.
236     //
237     } else {
238     // 7.2. Parse error.
239     log ('Parse error: no </' + this.endTagName + '>');
240     this.nextToken.unshift (token);
241     }
242     this.cdataEndTagRequired = false;
243     continue;
244     }
245     }
246    
247 wakaba 1.1 if (token.type == 'start-tag') {
248     if (token.value == 'script') {
249 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
250     var el = new JSElement (this.doc, token.value);
251 wakaba 1.4 if (token.attrs.async != null) el.async = true;
252     if (token.attrs.defer != null) el.defer = true;
253     if (token.attrs.src != null) el.src = token.attrs.src;
254 wakaba 1.2
255     // 2. Mark the element as being "parser-inserted".
256     el.manakaiParserInserted = true;
257    
258     // 3. Switch the tokeniser's content model flag to the CDATA state.
259 wakaba 1.14 this.parseMode = 'cdata';
260     this.endTagName = 'script';
261 wakaba 1.1
262 wakaba 1.2 // 4.1. Collect all the character tokens.
263 wakaba 1.1 while (true) {
264 wakaba 1.2 var token = this.getNextToken ();
265 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
266    
267     if (token.type == 'char') {
268 wakaba 1.2 // 5. Append a single Text node to the script element node.
269 wakaba 1.1 el.manakaiAppendText (token.value);
270 wakaba 1.2
271     // 4.2. Until it returns a token that is not a character token, or
272 wakaba 1.3 // until it stops tokenising.
273 wakaba 1.1 } else if (token.type == 'eof' ||
274 wakaba 1.14 token.type == 'end-tag' ||
275 wakaba 1.3 token.type == 'abort') {
276 wakaba 1.2 // 6. Switched back to the PCDATA state.
277 wakaba 1.1 this.parseMode = 'pcdata';
278 wakaba 1.2
279     // 7.1. If the next token is not an end tag token with ...
280 wakaba 1.14 if (!(token.type == 'end-tag' && token.value == 'script')) {
281 wakaba 1.2 // 7.2. This is a parse error.
282     log ('Parse error: no </' + 'script>');
283 wakaba 1.16 this.nextToken.unshift (token);
284 wakaba 1.2
285     // 7.3. Mark the script element as "already executed".
286     el.manakaiAlreadyExecuted = true;
287     } else {
288     // 7.4. Ignore it.
289     //
290     }
291 wakaba 1.1 break;
292     }
293     }
294    
295 wakaba 1.2 // 8.1. If the parser were originally created for the ...
296     if (this.fragmentParsingMode) {
297     // 8.2. Mark the script element as "already executed" and ...
298 wakaba 1.20 el.manakaiAlreadyExecuted = true;
299 wakaba 1.2 continue;
300     }
301    
302     // 9.1. Let the old insertion point have the same value as the ...
303 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
304 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
305 wakaba 1.3 this.setInsertionPoint (0);
306 wakaba 1.2
307     // 10. Append the new element to the current node.
308 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
309 wakaba 1.2
310     // 11. Let the insertion point have the value of the old ...
311 wakaba 1.7
312 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
313 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
314 wakaba 1.2
315 wakaba 1.18 // 12. If there is a pending external script
316     while (this.pendingExternalScript) {
317 wakaba 1.6 // 12.1. If the tree construction stage is being called reentrantly
318     if (this.reentrant) {
319     log ('parse: abort (reentrance)');
320     logIndentLevel--;
321     return;
322    
323     // 12.2. Otherwise
324     } else {
325     // 1.
326 wakaba 1.18 var script = this.pendingExternalScript;
327     this.pendingExternalScript = null;
328 wakaba 1.6
329     // 2. Pause until the script has completed loading.
330     //
331    
332     // 3. Let the insertion point to just before the next input char.
333     this.setInsertionPoint (0);
334    
335     // 4. Execute the script.
336     executeScript (this.doc, script);
337    
338     // 5. Let the insertion point be undefined again.
339     this.setInsertionPoint (undefined);
340 wakaba 1.2
341 wakaba 1.6 // 6. If there is once again a script that will execute ...
342     //
343     }
344     }
345 wakaba 1.14 } else if (token.value == 'style' ||
346     token.value == 'noscript' ||
347     token.value == 'xmp') {
348     // 1. Create an element for the token in the HTML namespace.
349     var el = new JSElement (this.doc, token.value);
350    
351     // 2. Append the new element to the current node.
352     this.openElements[this.openElements.length - 1].appendChild (el);
353    
354     // 3. Switch the tokeniser's content model flag to the CDATA state.
355     this.parseMode = 'cdata';
356     this.endTagName = token.value;
357    
358     // 4.1. Collect all the character tokens.
359     while (true) {
360     var token = this.getNextToken ();
361     log ('token: ' + token.type + ' "' + token.value + '"');
362    
363     if (token.type == 'char') {
364     // 5. Append a single Text node to the script element node.
365     el.manakaiAppendText (token.value);
366    
367     // 4.2. Until it returns a token that is not a character token, or
368     // until it stops tokenising.
369     } else if (token.type == 'eof' ||
370     token.type == 'end-tag' ||
371     token.type == 'abort') {
372     // 6. Switched back to the PCDATA state.
373     this.parseMode = 'pcdata';
374    
375 wakaba 1.16 if (token.type == 'abort') {
376     this.cdataEndTagRequired = true;
377     break;
378     }
379    
380 wakaba 1.14 // 7.1. If the next token is not an end tag token with ...
381     if (!(token.type == 'end-tag' &&
382     token.value == this.endTagName)) {
383     // 7.2. This is a parse error.
384     log ('Parse error: no </' + this.endTagName + '>');
385 wakaba 1.16 this.nextToken.unshift (token);
386 wakaba 1.14
387     // 7.3. Mark the script element as "already executed".
388     el.manakaiAlreadyExecuted = true;
389     } else {
390     // 7.4. Ignore it.
391     //
392     }
393     break;
394     }
395     }
396 wakaba 1.1 } else {
397 wakaba 1.2 var el = new JSElement (this.doc, token.value);
398 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
399     this.openElements.push (el);
400     }
401     } else if (token.type == 'end-tag') {
402     if (this.openElements[this.openElements.length - 1].localName ==
403     token.value) {
404     this.openElements.pop ();
405     } else {
406     log ('parse error: unmatched end tag: ' + token.value);
407     }
408 wakaba 1.3 } else if (token.type == 'char') {
409     this.openElements[this.openElements.length - 1].manakaiAppendText
410     (token.value);
411 wakaba 1.1 } else if (token.type == 'eof') {
412     break;
413 wakaba 1.3 } else if (token.type == 'abort') {
414     log ('parse: abort');
415 wakaba 1.6 logIndentLevel--;
416 wakaba 1.3 return;
417 wakaba 1.1 }
418     }
419    
420     log ('stop parsing');
421 wakaba 1.4
422     // readyState = 'interactive'
423    
424     // "When a script completes loading" rules start applying.
425    
426 wakaba 1.12 while (this.scriptsExecutedSoon.length > 0 ||
427     this.scriptsExecutedAsynchronously.length > 0) {
428     // Handle "list of scripts that will execute as soon as possible".
429     while (this.scriptsExecutedSoon.length > 0) {
430     var e = this.scriptsExecutedSoon.shift ();
431    
432     // If it has completed loading
433     log ('Execute an external script not inserted by parser...');
434     executeScript (this.doc, e);
435    
436     // NOTE: It MAY be executed before the end of the parsing, according
437     // to the spec.
438     this.hasAsyncScript = true;
439     }
440    
441     // Handle "list of scripts that will execute asynchronously".
442     while (this.scriptsExecutedAsynchronously.length > 0) {
443     var e = this.scriptsExecutedAsynchronously.shift ();
444    
445     // Step 1.
446     // We assume that all scripts have been loaded at this time.
447    
448     // Step 2.
449     log ('Execute an asynchronous script...');
450     executeScript (this.doc, e);
451    
452     // Step 3.
453     //
454    
455     // Step 4.
456     //
457 wakaba 1.10
458 wakaba 1.12 this.hasAsyncScript = true;
459     }
460 wakaba 1.10 }
461    
462 wakaba 1.4 // Handle "list of scripts that will execute when the document has finished
463     // parsing".
464     var list = this.scriptsExecutedAfterParsing;
465     while (list.length > 0) {
466     // TODO: break unless completed loading
467    
468     // Step 1.
469     //
470    
471     // Step 2. and Step 3.
472     log ('Executing a |defer|red script...');
473     executeScript (this.doc, list.shift ());
474    
475     // Step 4.
476     }
477    
478     log ('DOMContentLoaded event fired');
479    
480 wakaba 1.14 // "delays the load event" things has completed:
481 wakaba 1.4 // readyState = 'complete'
482     log ('load event fired');
483 wakaba 1.6
484     logIndentLevel--;
485 wakaba 1.1 } // parse
486    
487 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
488     if (ip == undefined || ip == null || isNaN (ip)) {
489     log ('insertion point: set to undefined');
490     this.insertionPoint = undefined;
491 wakaba 1.8 } else if (ip == this.input.s.length) {
492 wakaba 1.4 log ('insertion point: end of file');
493     this.insertionPoint = ip;
494 wakaba 1.3 } else {
495     log ('insertion point: set to ' + ip +
496 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
497 wakaba 1.3 this.insertionPoint = ip;
498     }
499     }; // setInsertionPoint
500    
501 wakaba 1.2 function JSDocument (p) {
502 wakaba 1.1 this.childNodes = [];
503 wakaba 1.2 this._parser = p;
504 wakaba 1.1 } // JSDocument
505    
506 wakaba 1.2 function JSElement (doc, localName) {
507 wakaba 1.1 this.localName = localName;
508 wakaba 1.2 this.ownerDocument = doc;
509 wakaba 1.1 this.childNodes = [];
510     } // JSElement
511    
512     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
513     function (e) {
514     this.childNodes.push (e);
515     e.parentNode = this;
516 wakaba 1.2
517     if (e.localName == 'script') {
518 wakaba 1.6 logIndentLevel++;
519 wakaba 1.4 log ('Running a script: start');
520 wakaba 1.2
521 wakaba 1.3 var doc = this.ownerDocument || this;
522 wakaba 1.2 var p = doc._parser;
523    
524 wakaba 1.20 // 1.The script's type
525 wakaba 1.2 //
526    
527 wakaba 1.20 // 2. The cript's character encoding
528     //
529    
530     // 3.1. If without script
531 wakaba 1.2 //
532     // 2.2. If the script element was created by an XML ... innerHTML ...
533     //
534     // 2.3. If the user agent does not support the scripting language ...
535     //
536 wakaba 1.20 if (false) {
537 wakaba 1.2 // 2.5. Abort these steps at this point.
538 wakaba 1.20 log ('Running a script: aborted (noscript)');
539 wakaba 1.6 logIndentLevel--;
540 wakaba 1.2 return e;
541     }
542    
543 wakaba 1.20 // 4. Set the element's "already executed" flag.
544 wakaba 1.2 e.manakaiAlreadyExecuted = true;
545    
546 wakaba 1.20 // 5. If the element has a src attribute, then a load for ...
547 wakaba 1.2 // TODO: load an external resource
548    
549     // 5. The first of the following options:
550    
551     if (/* TODO: If the document is still being parsed && */
552     e.defer && !e.async) {
553 wakaba 1.20 // 6.1.
554 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
555     log ('Running a script: aborted (defer)');
556 wakaba 1.2 } else if (e.async && e.src != null) {
557 wakaba 1.20 // 6.2.
558 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
559     log ('Running a script: aborted (async src)');
560     } else if (e.async && e.src == null &&
561     p.scriptsExecutedAsynchronously.length > 0) {
562 wakaba 1.20 // 6.3.
563 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
564     log ('Running a script: aborted (async)');
565 wakaba 1.2 } else if (e.src != null && e.manakaiParserInserted) {
566 wakaba 1.20 // 6.4.
567 wakaba 1.18 if (p.pendingExternalScript) {
568     log ('Error: There is a pending external script.');
569 wakaba 1.6 }
570 wakaba 1.18 p.pendingExternalScript = e;
571 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
572     } else if (e.src != null) {
573 wakaba 1.20 // 6.5.
574 wakaba 1.10 p.scriptsExecutedSoon.push (e);
575 wakaba 1.6 log ('Running a script: aborted (src)');
576 wakaba 1.2 } else {
577 wakaba 1.20 // 6.6.
578 wakaba 1.2 executeScript (doc, e); // even if other scripts are already executing.
579     }
580    
581 wakaba 1.4 log ('Running a script: end');
582 wakaba 1.6 logIndentLevel--;
583 wakaba 1.2 }
584    
585 wakaba 1.1 return e;
586     }; // appendChild
587    
588 wakaba 1.2 function executeScript (doc, e) {
589     log ('executing a script block: start');
590    
591 wakaba 1.6 var s;
592     if (e.src != null) {
593     s = getExternalScript (e.src);
594    
595     // If the load resulted in an error, then ... firing an error event ...
596     if (s == null) {
597     log ('error event fired at the script element');
598     return;
599     }
600    
601     log ('External script loaded: "' + s + '"');
602     } else {
603     s = e.text;
604     }
605 wakaba 1.2
606     // If the load was successful
607    
608     if (true) {
609     // Scripting is enabled, Document.designMode is disabled,
610     // Document is the active document in its browsing context
611    
612     parseAndRunScript (doc, s);
613     }
614    
615 wakaba 1.17 log ('load event fired at the script element');
616    
617 wakaba 1.2 log ('executing a script block: end');
618     } // executeScript
619    
620 wakaba 1.6 function getExternalScript (uri) {
621     if (uri.match (/^javascript:/i)) {
622     var m;
623     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
624     if (m[1]) {
625 wakaba 1.11 return unescapeJSLiteral (m[1]);
626 wakaba 1.6 } else if (m[2]) {
627 wakaba 1.11 return unescapeJSLiteral (m[2]);
628 wakaba 1.6 } else {
629     return null;
630     }
631     } else {
632     log ('Complex javascript: URI is not supported: <' + uri + '>');
633     return null;
634     }
635     } else {
636     log ('URI scheme not supported: <' + uri + '>');
637     return null;
638     }
639     } // getExternalScript
640    
641 wakaba 1.2 function parseAndRunScript (doc, s) {
642     while (true) {
643     var matched = false;
644     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
645     matched = true;
646     var args = [];
647     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
648 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
649 wakaba 1.2 return '';
650     });
651     doc.write.apply (doc, args);
652     return '';
653     });
654 wakaba 1.15 var noDocumentElement = false;
655     s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'([^']*)'|"([^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
656 wakaba 1.10 function (s, t, u) {
657     matched = true;
658 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
659 wakaba 1.15 noDocumentElement = !doc._insertExternalScript.apply (doc, args);
660     return '';
661     });
662     if (noDocumentElement) {
663     log ('Script error: documentElement is null');
664     break;
665     }
666     s = s.replace (/^\s*w\s*\(\s*document\.documentElement\.innerHTML\s*\)\s*;\s*/,
667     function (s, t) {
668     matched = true;
669     log (dumpTree (doc, ''));
670 wakaba 1.10 return '';
671     });
672 wakaba 1.2 if (s == '') break;
673     if (!matched) {
674     log ('Script parse error: "' + s + '"');
675     break;
676     }
677     }
678     } // parseAndRunScript
679    
680 wakaba 1.11 function unescapeJSLiteral (s) {
681     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
682     return String.fromCharCode (parseInt ('0x' + v));
683     });
684     } // unescapeJSLiteral
685    
686 wakaba 1.1 function JSText (data) {
687     this.data = data;
688     } // JSText
689    
690     JSDocument.prototype.manakaiAppendText =
691     JSElement.prototype.manakaiAppendText =
692     function (s) {
693     if (this.childNodes.length > 0 &&
694     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
695     this.childNodes[this.childNodes.length - 1].data += s;
696     } else {
697     this.childNodes.push (new JSText (s));
698     }
699     }; // manakaiAppendText
700 wakaba 1.2
701 wakaba 1.4 JSDocument.prototype.open = function () {
702     // Two or fewer arguments
703    
704     // Step 1.
705     var type = arguments[0] || 'text/html';
706    
707     // Step 2.
708     var replace = arguments[1] == 'replace';
709    
710     // Step 3.
711     if (this._parser &&
712     !this._parser.scriptCreated &&
713 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
714 wakaba 1.4 log ('document.open () in parsing mode is ignored');
715     return this;
716     }
717    
718     // Step 4.
719     log ('onbeforeunload event fired');
720     log ('onunload event fired');
721    
722     // Step 5.
723     if (this._parser) {
724     // Discard the parser.
725     }
726    
727     // Step 6.
728     log ('document cleared by document.open ()');
729     this.childNodes = [];
730    
731     // Step 7.
732     this._parser = new Parser (new InputStream (''), this);
733     this._parser.scriptCreated = true;
734    
735     // Step 8.
736     this.manakaiIsHTML = true;
737    
738     // Step 9.
739     // If not text/html, ...
740    
741     // Step 10.
742     if (!replace) {
743     // History
744     }
745    
746     // Step 11.
747 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
748 wakaba 1.4
749     // Step 12.
750     return this;
751     }; // document.open
752    
753 wakaba 1.2 JSDocument.prototype.write = function () {
754 wakaba 1.15 log ('document.write: start');
755 wakaba 1.6 logIndentLevel++;
756    
757 wakaba 1.3 var p = this._parser;
758    
759 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
760 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
761     this.open ();
762     p = this._parser;
763 wakaba 1.3 }
764 wakaba 1.2
765     // 2. ... inserted into the input stream just before the insertion point.
766 wakaba 1.3 var s = Array.join (arguments, '');
767     log ('document.write: insert "' + s + '"' +
768 wakaba 1.8 ' before "' +
769     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
770     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
771     + p.input.s.substring (p.insertionPoint, p.input.s.length);
772 wakaba 1.3 p.insertionPoint += s.length;
773 wakaba 1.2
774 wakaba 1.18 // 3. If there is a pending external script
775     if (p.pendingExternalScript) {
776 wakaba 1.6 log ('document.write: processed later (there is an unprocessed <script src>)');
777     logIndentLevel--;
778 wakaba 1.15 log ('document.write: return');
779 wakaba 1.6 return;
780     }
781 wakaba 1.2
782     // 4. Process the characters that were inserted, ...
783 wakaba 1.6 var originalReentrant = p.reentrant;
784     p.reentrant = true;
785 wakaba 1.3 p.parse ();
786 wakaba 1.6 p.reentrant = originalReentrant;
787     // TODO: "Abort the processing of any nested invokations of the tokeniser,
788     // yielding control back to the caller." (<script> parsing). Do we need
789     // to do something here?
790 wakaba 1.2
791     // 5. Return
792 wakaba 1.15 logIndentLevel--;
793 wakaba 1.2 log ('document.write: return');
794 wakaba 1.6
795 wakaba 1.2 return;
796     }; // document.write
797    
798 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
799     var s = new JSElement (this, 'script');
800     s.src = uri;
801 wakaba 1.15 if (this.documentElement) {
802     this.documentElement.appendChild (s);
803     return true;
804     } else {
805     return false;
806     }
807 wakaba 1.10 }; // _insertExternalScript
808    
809     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
810     var cn = this.childNodes;
811     for (var i = 0; i < cn.length; i++) {
812     if (cn[i] instanceof JSElement) {
813     return cn[i]
814     }
815     }
816     return null;
817     });
818    
819 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
820     var r = '';
821     for (var i = 0; i < this.childNodes.length; i++) {
822     if (this.childNodes[i] instanceof JSText) {
823     r += this.childNodes[i].data;
824     }
825     }
826     return r;
827     });
828 wakaba 1.1
829     function dumpTree (n, indent) {
830     var r = '';
831     for (var i = 0; i < n.childNodes.length; i++) {
832     var node = n.childNodes[i];
833     if (node instanceof JSElement) {
834     r += '| ' + indent + node.localName + '\n';
835 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
836     if (node.defer) r += '| ' + indent + ' defer=""\n';
837 wakaba 1.9 if (node.src != null) {
838     r += '| ' + indent + ' src="' + node.src + '"\n';
839     }
840 wakaba 1.1 r += dumpTree (node, indent + ' ');
841     } else if (node instanceof JSText) {
842     r += '| ' + indent + '"' + node.data + '"\n';
843     } else {
844     r += '| ' + indent + node + '\n';
845     }
846     }
847     return r;
848     } // dumpTree
849     </script>
850     </head>
851     <body onload="
852     document.sourceElement = document.getElementsByTagName ('textarea')[0];
853 wakaba 1.8
854     var q = location.search;
855     if (q != null) {
856     q = q.substring (1).split (/;/);
857     for (var i = 0; i < q.length; i++) {
858     var v = q[i].split (/=/, 2);
859     v[0] = decodeURIComponent (v[0]);
860     v[1] = decodeURIComponent (v[1] || '');
861     if (v[0] == 's') {
862     document.sourceElement.value = v[1];
863     }
864     }
865     }
866    
867 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
868     update ();
869     ">
870 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
871     Parser</h1>
872 wakaba 1.1
873 wakaba 1.7 <h2>Markup to test
874 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
875     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
876     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
877     Viewer</a>)</h2>
878 wakaba 1.7 <p>
879     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
880 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
881     &lt;p>
882     &lt;script>
883 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
884 wakaba 1.1 &lt;/script>
885     &lt;p>
886     </textarea>
887    
888 wakaba 1.10 <h2 id=log>Log</h2>
889 wakaba 1.7 <p><output></output>
890    
891 wakaba 1.10 <h2 id=notes>Notes</h2>
892 wakaba 1.8
893 wakaba 1.21 <p>This is a <em>simplified</em> implementation of <a
894     href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
895     Parsing Algorithm</a> (revision 2138). It only implements
896     scripting-related parts of the algorithm. Especially, this parser:
897    
898 wakaba 1.8 <ul>
899     <li>Does not support <code>DOCTYPE</code> and comment tokens.
900     <li>Does not support entities except for <code>&amp;quot;</code>,
901     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
902     <code>src</code> attribute value.
903     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
904     algorithm, and so on.
905     <li>Does not raise parse errors for invalid attribute specifications in start
906     or end tags.
907 wakaba 1.17 <li>Does not support RCDATA elements (<code>title</code> and
908 wakaba 1.14 <code>textarea</code>).
909 wakaba 1.17 <li>Does not strip the first newline in <code>pre</code>,
910     <code>listing</code>, and <code>textarea</code> elements.
911 wakaba 1.8 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
912 wakaba 1.17 in CDATA/RCDATA elements.
913 wakaba 1.8 <li>Does not support foreign (SVG or MathML) elements.
914     <li>Only supports <code>script</code> <code>type</code>
915     <code>text/javascript</code>. <code>type</code> and <code>language</code>
916     attributes are ignored.
917 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
918     of statements looking similar to the following statements, possibly
919     introduced, followed, or separated by white space characters:
920     <ul>
921     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
922     <li><code>var s = document.createElement ("script");
923     s.src = "<var>string</var>";
924     document.documentElement.appendChild (s);</code>
925 wakaba 1.15 <li><code>w (document.documentElement.innerHTML);</code> (This statement
926     can be used to dump the document, even when the document has no
927     document element. The output format is the tree dump format used
928     in html5lib test data, not <abbr>HTML</abbr>.)
929 wakaba 1.10 </ul>
930     Note that strings may be delimited by <code>'</code>s instead of
931     <code>"</code>s.
932 wakaba 1.8 <li>Only supports <code>javascript:</code>
933     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
934     <code>src</code> attribute of the <code>script</code> element. In addition,
935     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
936     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
937 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
938     string literals.
939 wakaba 1.12 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
940     replaced by <code>document.open ()</code> call. In other word, delayed
941     (deferred or asynchronous) script executions and event firings might be
942     treated in a wrong way if a <code>document.open ()</code> invocation
943     is implicitly done by <code>document.write ()</code> in a delayed script.
944 wakaba 1.8 </ul>
945 wakaba 1.7
946 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
947     not support JavaScript 1.5.
948 wakaba 1.12
949     <!-- TODO: |src| attribute value should refer the value at the time
950     when it is inserted into the document, not the value when the script is
951     executed. Currently it does not matter, since we don't allow dynamic
952     modification to the |src| content/DOM attribute value yet. -->
953 wakaba 1.10
954 wakaba 1.19 <p>See also
955     <a href="http://suika.fam.cx/gate/2005/sw/Live%20Scripting%20HTML%20Parser">SuikaWiki:
956     Live Scripting HTML Parser</a>.
957    
958 wakaba 1.13 </body>
959     </html>
960 wakaba 1.21 <!-- $Date: 2008/09/20 13:32:45 $ -->
961 wakaba 1.13 <!--
962    
963     Copyright 2008 Wakaba <w@suika.fam.cx>
964    
965     This program is free software; you can redistribute it and/or
966     modify it under the terms of the GNU General Public License
967     as published by the Free Software Foundation; either version 2
968     of the License, or (at your option) any later version.
969    
970     This program is distributed in the hope that it will be useful,
971     but WITHOUT ANY WARRANTY; without even the implied warranty of
972     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
973     GNU General Public License for more details.
974    
975     You should have received a copy of the GNU General Public License
976     along with this program; if not, write to the Free Software
977     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
978 wakaba 1.1
979 wakaba 1.13 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24