/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (hide annotations) (download) (as text)
Tue Apr 29 02:50:00 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.13: +61 -11 lines
File MIME type: text/html
Support the other CDATA elements

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4 wakaba 1.8 <title>Live Scripting HTML Parser</title>
5 wakaba 1.13 <link rel=author href="http://suika.fam.cx/~wakaba/who?">
6     <link rel=license href="http://suika.fam.cx/c/gnu/gpl"
7     title="GNU GPL2 or later">
8 wakaba 1.1 <style>
9 wakaba 1.13 h1 {
10     margin: 0;
11     font-size: 150%;
12     }
13     h2 {
14 wakaba 1.7 margin: 0;
15     font-size: 100%;
16     }
17 wakaba 1.13 p {
18     margin: 0 1em;
19 wakaba 1.7 }
20 wakaba 1.1 textarea {
21 wakaba 1.7 width: 100%;
22     -width: 99%;
23     height: 10em;
24 wakaba 1.1 }
25     output {
26     display: block;
27     font-family: monospace;
28 wakaba 1.4 white-space: -moz-pre-wrap;
29     white-space: pre-wrap;
30 wakaba 1.1 }
31     </style>
32     <script>
33 wakaba 1.7 var delayedUpdater = 0;
34    
35 wakaba 1.1 function update () {
36 wakaba 1.7 if (delayedUpdater) {
37     clearTimeout (delayedUpdater);
38     delayedUpdater = 0;
39     }
40     delayedUpdater = setTimeout (update2, 100);
41     } // update
42    
43     function update2 () {
44     var v = document.sourceElement.value;
45 wakaba 1.8 if (v != document.previousSourceText) {
46     document.previousSourceText = v;
47     document.links['permalink'].href
48     = location.pathname + '?s=' + encodeURIComponent (v);
49     document.links['ldvlink'].href
50     = 'http://software.hixie.ch/utilities/js/live-dom-viewer/?'
51     + encodeURIComponent (v);
52    
53     document.logElement.textContent = '';
54     var p = new Parser (new InputStream (v));
55     var doc = p.doc;
56     p.parse ();
57 wakaba 1.10
58 wakaba 1.8 log (dumpTree (doc, ''));
59 wakaba 1.10
60     if (p.hasAsyncScript) {
61     log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors');
62     }
63 wakaba 1.8 }
64 wakaba 1.7 } // update2
65 wakaba 1.1
66 wakaba 1.6 var logIndentLevel = 0;
67 wakaba 1.1 function log (s) {
68 wakaba 1.6 for (var i = 0; i < logIndentLevel; i++) {
69     s = ' ' + s;
70     }
71 wakaba 1.1 document.logElement.appendChild (document.createTextNode (s + "\n"));
72     } // log
73    
74     function InputStream (s) {
75     this.s = s;
76     } // InputStream
77    
78 wakaba 1.4 function Parser (i, doc) {
79 wakaba 1.1 this.parseMode = 'pcdata';
80 wakaba 1.4 if (!doc) {
81     doc = new JSDocument (this);
82     doc.manakaiIsHTML = true;
83     }
84     this.doc = doc;
85     this.openElements = [doc];
86 wakaba 1.8 this.input = i;
87 wakaba 1.4 this.scriptsExecutedAfterParsing = [];
88 wakaba 1.10 this.scriptsExecutedSoon = [];
89 wakaba 1.12 this.scriptsExecutedAsynchronously = [];
90 wakaba 1.1 } // Parser
91    
92 wakaba 1.2 Parser.prototype.getNextToken = function () {
93 wakaba 1.3 var p = this;
94 wakaba 1.8 var i = this.input;
95 wakaba 1.14 if (this.parseMode == 'cdata') {
96     var tagName = this.endTagName;
97 wakaba 1.1 var token;
98 wakaba 1.3 if (p.insertionPoint <= 0) {
99     return {type: 'abort'};
100     }
101 wakaba 1.4 i.s = i.s.replace (/^([^<]+)/,
102 wakaba 1.1 function (s, t) {
103 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
104     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
105     var ip = p.insertionPoint;
106     p.insertionPoint = 0;
107 wakaba 1.4 return t.substring (ip, t.length);
108 wakaba 1.3 }
109 wakaba 1.1 token = {type: 'char', value: t};
110 wakaba 1.4 p.insertionPoint -= t.length;
111     return '';
112 wakaba 1.1 });
113     if (token) return token;
114 wakaba 1.14 var pattern = new RegExp ('^</' + tagName + '>', 'i');
115     i.s = i.s.replace (pattern, function (s) {
116 wakaba 1.4 if (p.insertionPoint < s.length) {
117 wakaba 1.3 token = {type: 'abort'};
118     return s;
119     }
120 wakaba 1.14 token = {type: 'end-tag', value: tagName};
121 wakaba 1.3 p.insertionPoint -= s.length;
122 wakaba 1.1 return '';
123     });
124     if (token) return token;
125 wakaba 1.5 var m;
126 wakaba 1.14 if ((p.insertionPoint < ('</' + tagName).length) &&
127     (m = i.s.match (/^<\/([A-Za-z]+)/))) {
128 wakaba 1.5 var v = m[1].substring (0, p.insertionPoint).toLowerCase ();
129 wakaba 1.14 if (v == tagName.substring (0, p.insertionPoint - '</'.length)) {
130 wakaba 1.5 return {type: 'abort'};
131     }
132     }
133 wakaba 1.4 i.s = i.s.replace (/^</,
134     function (s) {
135     token = {type: 'char', value: s};
136     p.insertionPoint -= s.length;
137     return '';
138     });
139     if (token) return token;
140 wakaba 1.1 return {type: 'eof'};
141     }
142    
143     var token;
144 wakaba 1.5 i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) {
145     if (p.insertionPoint < s.length ||
146     (p.insertionPoint <= s.length &&
147     s.substring (s.length - 1, 1) != '>')) {
148 wakaba 1.3 token = {type: 'abort'};
149     return s;
150     }
151 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
152 wakaba 1.3 p.insertionPoint -= s.length;
153 wakaba 1.1 return '';
154     });
155     if (token) return token;
156 wakaba 1.5 i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) {
157     if (p.insertionPoint < s.length ||
158     (p.insertionPoint <= s.length &&
159     s.substring (s.length - 1, 1) != '>')) {
160 wakaba 1.3 token = {type: 'abort'};
161     return s;
162     }
163 wakaba 1.4 var tagName;
164     var attrs = {};
165     e = e.replace (/^[\S]+/, function (v) {
166     tagName = v.toLowerCase ();
167     return '';
168     });
169 wakaba 1.9 while (true) {
170     var m = false;
171     e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/,
172     function (x, attrName, attrValue1, attrValue2, attrValue3) {
173     v = attrValue1 || attrValue2 || attrValue3;
174     v = v.replace (/&quot;/g, '"').replace (/&apos;/g, "'")
175     .replace (/&amp;/g, '&');
176     attrs[attrName.toLowerCase ()] = v;
177     m = true;
178     return '';
179     });
180     if (!m) break;
181     }
182 wakaba 1.6 if (e.length) {
183     log ('Broken start tag: "' + e + '"');
184     }
185 wakaba 1.4 token = {type: 'start-tag', value: tagName, attrs: attrs};
186 wakaba 1.3 p.insertionPoint -= s.length;
187 wakaba 1.1 return '';
188     });
189     if (token) return token;
190 wakaba 1.3 if (p.insertionPoint <= 0) {
191     return {type: 'abort'};
192     }
193 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
194 wakaba 1.3 if (p.insertionPoint < s.length) {
195     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
196     var ip = p.insertionPoint;
197     p.insertionPoint = 0;
198     return s.substring (ip, s.length);
199     }
200 wakaba 1.1 token = {type: 'char', value: s};
201 wakaba 1.3 p.insertionPoint -= s.length;
202 wakaba 1.1 return '';
203     });
204     if (token) return token;
205     i.s = i.s.replace (/^[\s\S]/, function (s) {
206     token = {type: 'char', value: s};
207 wakaba 1.3 p.insertionPoint -= s.length;
208 wakaba 1.1 return '';
209     });
210     if (token) return token;
211     return {type: 'eof'};
212     } // getNextToken
213    
214 wakaba 1.2 Parser.prototype.parse = function () {
215 wakaba 1.6 logIndentLevel++;
216     log ('parse: start');
217 wakaba 1.1
218     while (true) {
219 wakaba 1.2 var token = this.getNextToken ();
220 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
221    
222     if (token.type == 'start-tag') {
223     if (token.value == 'script') {
224 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
225     var el = new JSElement (this.doc, token.value);
226 wakaba 1.4 if (token.attrs.async != null) el.async = true;
227     if (token.attrs.defer != null) el.defer = true;
228     if (token.attrs.src != null) el.src = token.attrs.src;
229 wakaba 1.2
230     // 2. Mark the element as being "parser-inserted".
231     el.manakaiParserInserted = true;
232    
233     // 3. Switch the tokeniser's content model flag to the CDATA state.
234 wakaba 1.14 this.parseMode = 'cdata';
235     this.endTagName = 'script';
236 wakaba 1.1
237 wakaba 1.2 // 4.1. Collect all the character tokens.
238 wakaba 1.1 while (true) {
239 wakaba 1.2 var token = this.getNextToken ();
240 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
241    
242     if (token.type == 'char') {
243 wakaba 1.2 // 5. Append a single Text node to the script element node.
244 wakaba 1.1 el.manakaiAppendText (token.value);
245 wakaba 1.2
246     // 4.2. Until it returns a token that is not a character token, or
247 wakaba 1.3 // until it stops tokenising.
248 wakaba 1.1 } else if (token.type == 'eof' ||
249 wakaba 1.14 token.type == 'end-tag' ||
250 wakaba 1.3 token.type == 'abort') {
251 wakaba 1.2 // 6. Switched back to the PCDATA state.
252 wakaba 1.1 this.parseMode = 'pcdata';
253 wakaba 1.2
254     // 7.1. If the next token is not an end tag token with ...
255 wakaba 1.14 if (!(token.type == 'end-tag' && token.value == 'script')) {
256 wakaba 1.2 // 7.2. This is a parse error.
257     log ('Parse error: no </' + 'script>');
258    
259     // 7.3. Mark the script element as "already executed".
260     el.manakaiAlreadyExecuted = true;
261     } else {
262     // 7.4. Ignore it.
263     //
264     }
265 wakaba 1.1 break;
266     }
267     }
268    
269 wakaba 1.2 // 8.1. If the parser were originally created for the ...
270     if (this.fragmentParsingMode) {
271     // 8.2. Mark the script element as "already executed" and ...
272     el.alreadyExecuted = true;
273     continue;
274     }
275    
276     // 9.1. Let the old insertion point have the same value as the ...
277 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
278 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
279 wakaba 1.3 this.setInsertionPoint (0);
280 wakaba 1.2
281     // 10. Append the new element to the current node.
282 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
283 wakaba 1.2
284     // 11. Let the insertion point have the value of the old ...
285 wakaba 1.7
286 wakaba 1.5 oldInsertionPoint += this.insertionPoint;
287 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
288 wakaba 1.2
289     // 12. If there is a script that will execute as soon as ...
290 wakaba 1.6 while (this.scriptExecutedWhenParserResumes) {
291     // 12.1. If the tree construction stage is being called reentrantly
292     if (this.reentrant) {
293     log ('parse: abort (reentrance)');
294     logIndentLevel--;
295     return;
296    
297     // 12.2. Otherwise
298     } else {
299     // 1.
300     var script = this.scriptExecutedWhenParserResumes;
301     this.scriptExecutedWhenParserResumes = null;
302    
303     // 2. Pause until the script has completed loading.
304     //
305    
306     // 3. Let the insertion point to just before the next input char.
307     this.setInsertionPoint (0);
308    
309     // 4. Execute the script.
310     executeScript (this.doc, script);
311    
312     // 5. Let the insertion point be undefined again.
313     this.setInsertionPoint (undefined);
314 wakaba 1.2
315 wakaba 1.6 // 6. If there is once again a script that will execute ...
316     //
317     }
318     }
319 wakaba 1.14 } else if (token.value == 'style' ||
320     token.value == 'noscript' ||
321     token.value == 'xmp') {
322     // 1. Create an element for the token in the HTML namespace.
323     var el = new JSElement (this.doc, token.value);
324    
325     // 2. Append the new element to the current node.
326     this.openElements[this.openElements.length - 1].appendChild (el);
327    
328     // 3. Switch the tokeniser's content model flag to the CDATA state.
329     this.parseMode = 'cdata';
330     this.endTagName = token.value;
331    
332     // 4.1. Collect all the character tokens.
333     while (true) {
334     var token = this.getNextToken ();
335     log ('token: ' + token.type + ' "' + token.value + '"');
336    
337     if (token.type == 'char') {
338     // 5. Append a single Text node to the script element node.
339     el.manakaiAppendText (token.value);
340    
341     // 4.2. Until it returns a token that is not a character token, or
342     // until it stops tokenising.
343     } else if (token.type == 'eof' ||
344     token.type == 'end-tag' ||
345     token.type == 'abort') {
346     // 6. Switched back to the PCDATA state.
347     this.parseMode = 'pcdata';
348    
349     // 7.1. If the next token is not an end tag token with ...
350     if (!(token.type == 'end-tag' &&
351     token.value == this.endTagName)) {
352     // 7.2. This is a parse error.
353     log ('Parse error: no </' + this.endTagName + '>');
354    
355     // 7.3. Mark the script element as "already executed".
356     el.manakaiAlreadyExecuted = true;
357     } else {
358     // 7.4. Ignore it.
359     //
360     }
361     break;
362     }
363     }
364 wakaba 1.1 } else {
365 wakaba 1.2 var el = new JSElement (this.doc, token.value);
366 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
367     this.openElements.push (el);
368     }
369     } else if (token.type == 'end-tag') {
370     if (this.openElements[this.openElements.length - 1].localName ==
371     token.value) {
372     this.openElements.pop ();
373     } else {
374     log ('parse error: unmatched end tag: ' + token.value);
375     }
376 wakaba 1.3 } else if (token.type == 'char') {
377     this.openElements[this.openElements.length - 1].manakaiAppendText
378     (token.value);
379 wakaba 1.1 } else if (token.type == 'eof') {
380     break;
381 wakaba 1.3 } else if (token.type == 'abort') {
382     log ('parse: abort');
383 wakaba 1.6 logIndentLevel--;
384 wakaba 1.3 return;
385 wakaba 1.1 }
386     }
387    
388     log ('stop parsing');
389 wakaba 1.4
390     // readyState = 'interactive'
391    
392     // "When a script completes loading" rules start applying.
393    
394 wakaba 1.12 while (this.scriptsExecutedSoon.length > 0 ||
395     this.scriptsExecutedAsynchronously.length > 0) {
396     // Handle "list of scripts that will execute as soon as possible".
397     while (this.scriptsExecutedSoon.length > 0) {
398     var e = this.scriptsExecutedSoon.shift ();
399    
400     // If it has completed loading
401     log ('Execute an external script not inserted by parser...');
402     executeScript (this.doc, e);
403    
404     // NOTE: It MAY be executed before the end of the parsing, according
405     // to the spec.
406     this.hasAsyncScript = true;
407     }
408    
409     // Handle "list of scripts that will execute asynchronously".
410     while (this.scriptsExecutedAsynchronously.length > 0) {
411     var e = this.scriptsExecutedAsynchronously.shift ();
412    
413     // Step 1.
414     // We assume that all scripts have been loaded at this time.
415    
416     // Step 2.
417     log ('Execute an asynchronous script...');
418     executeScript (this.doc, e);
419    
420     // Step 3.
421     //
422    
423     // Step 4.
424     //
425 wakaba 1.10
426 wakaba 1.12 this.hasAsyncScript = true;
427     }
428 wakaba 1.10 }
429    
430 wakaba 1.4 // Handle "list of scripts that will execute when the document has finished
431     // parsing".
432     var list = this.scriptsExecutedAfterParsing;
433     while (list.length > 0) {
434     // TODO: break unless completed loading
435    
436     // Step 1.
437     //
438    
439     // Step 2. and Step 3.
440     log ('Executing a |defer|red script...');
441     executeScript (this.doc, list.shift ());
442    
443     // Step 4.
444     }
445    
446     log ('DOMContentLoaded event fired');
447    
448 wakaba 1.14 // "delays the load event" things has completed:
449 wakaba 1.4 // readyState = 'complete'
450     log ('load event fired');
451 wakaba 1.6
452     logIndentLevel--;
453 wakaba 1.1 } // parse
454    
455 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
456     if (ip == undefined || ip == null || isNaN (ip)) {
457     log ('insertion point: set to undefined');
458     this.insertionPoint = undefined;
459 wakaba 1.8 } else if (ip == this.input.s.length) {
460 wakaba 1.4 log ('insertion point: end of file');
461     this.insertionPoint = ip;
462 wakaba 1.3 } else {
463     log ('insertion point: set to ' + ip +
464 wakaba 1.8 ' (before "' + this.input.s.substring (0, 10) + '")');
465 wakaba 1.3 this.insertionPoint = ip;
466     }
467     }; // setInsertionPoint
468    
469 wakaba 1.2 function JSDocument (p) {
470 wakaba 1.1 this.childNodes = [];
471 wakaba 1.2 this._parser = p;
472 wakaba 1.1 } // JSDocument
473    
474 wakaba 1.2 function JSElement (doc, localName) {
475 wakaba 1.1 this.localName = localName;
476 wakaba 1.2 this.ownerDocument = doc;
477 wakaba 1.1 this.childNodes = [];
478     } // JSElement
479    
480     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
481     function (e) {
482     this.childNodes.push (e);
483     e.parentNode = this;
484 wakaba 1.2
485     if (e.localName == 'script') {
486 wakaba 1.6 logIndentLevel++;
487 wakaba 1.4 log ('Running a script: start');
488 wakaba 1.2
489 wakaba 1.3 var doc = this.ownerDocument || this;
490 wakaba 1.2 var p = doc._parser;
491    
492     // 1. Script type
493     //
494    
495     // 2.1. If scripting is disabled
496     //
497     // 2.2. If the script element was created by an XML ... innerHTML ...
498     //
499     // 2.3. If the user agent does not support the scripting language ...
500     //
501     // 2.4. If the script element has its "already executed" flag set
502     if (e.manakaiAlreadyExecuted) {
503     // 2.5. Abort these steps at this point.
504 wakaba 1.4 log ('Running a script: aborted');
505 wakaba 1.6 logIndentLevel--;
506 wakaba 1.2 return e;
507     }
508    
509     // 3. Set the element's "already executed" flag.
510     e.manakaiAlreadyExecuted = true;
511    
512     // 4. If the element has a src attribute, then a load for ...
513     // TODO: load an external resource
514    
515     // 5. The first of the following options:
516    
517     // 5.1.
518     if (/* TODO: If the document is still being parsed && */
519     e.defer && !e.async) {
520 wakaba 1.4 p.scriptsExecutedAfterParsing.push (e);
521     log ('Running a script: aborted (defer)');
522 wakaba 1.2 } else if (e.async && e.src != null) {
523 wakaba 1.12 p.scriptsExecutedAsynchronously.push (e);
524     log ('Running a script: aborted (async src)');
525     } else if (e.async && e.src == null &&
526     p.scriptsExecutedAsynchronously.length > 0) {
527     p.scriptsExecutedAsynchronously.push (e);
528     log ('Running a script: aborted (async)');
529     // ISSUE: What is the difference with the case above?
530 wakaba 1.2 } else if (e.src != null && e.manakaiParserInserted) {
531 wakaba 1.6 if (p.scriptExecutedWhenParserResumes) {
532     log ('Error: There is a script that will execute as soon as the parser resumes.');
533     }
534     p.scriptExecutedWhenParserResumes = e;
535 wakaba 1.10 log ('Running a script: aborted (src parser-inserted)');
536     } else if (e.src != null) {
537     p.scriptsExecutedSoon.push (e);
538 wakaba 1.6 log ('Running a script: aborted (src)');
539 wakaba 1.2 } else {
540     executeScript (doc, e); // even if other scripts are already executing.
541     }
542    
543 wakaba 1.4 log ('Running a script: end');
544 wakaba 1.6 logIndentLevel--;
545 wakaba 1.2 }
546    
547 wakaba 1.1 return e;
548     }; // appendChild
549    
550 wakaba 1.2 function executeScript (doc, e) {
551     log ('executing a script block: start');
552    
553 wakaba 1.6 var s;
554     if (e.src != null) {
555     s = getExternalScript (e.src);
556    
557     // If the load resulted in an error, then ... firing an error event ...
558     if (s == null) {
559     log ('error event fired at the script element');
560     return;
561     }
562    
563     log ('External script loaded: "' + s + '"');
564     } else {
565     s = e.text;
566     }
567 wakaba 1.2
568     // If the load was successful
569     log ('load event fired at the script element');
570    
571     if (true) {
572     // Scripting is enabled, Document.designMode is disabled,
573     // Document is the active document in its browsing context
574    
575     parseAndRunScript (doc, s);
576     }
577    
578     log ('executing a script block: end');
579     } // executeScript
580    
581 wakaba 1.6 function getExternalScript (uri) {
582     if (uri.match (/^javascript:/i)) {
583     var m;
584     if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) {
585     if (m[1]) {
586 wakaba 1.11 return unescapeJSLiteral (m[1]);
587 wakaba 1.6 } else if (m[2]) {
588 wakaba 1.11 return unescapeJSLiteral (m[2]);
589 wakaba 1.6 } else {
590     return null;
591     }
592     } else {
593     log ('Complex javascript: URI is not supported: <' + uri + '>');
594     return null;
595     }
596     } else {
597     log ('URI scheme not supported: <' + uri + '>');
598     return null;
599     }
600     } // getExternalScript
601    
602 wakaba 1.2 function parseAndRunScript (doc, s) {
603     while (true) {
604     var matched = false;
605     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
606     matched = true;
607     var args = [];
608     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
609 wakaba 1.11 args.push (unescapeJSLiteral (v.substring (1, v.length - 1)));
610 wakaba 1.2 return '';
611     });
612     doc.write.apply (doc, args);
613     return '';
614     });
615 wakaba 1.10 s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/,
616     function (s, t, u) {
617     matched = true;
618 wakaba 1.11 var args = [unescapeJSLiteral (t ? t : u)];
619 wakaba 1.10 doc._insertExternalScript.apply (doc, args);
620     return '';
621     });
622 wakaba 1.2 if (s == '') break;
623     if (!matched) {
624     log ('Script parse error: "' + s + '"');
625     break;
626     }
627     }
628     } // parseAndRunScript
629    
630 wakaba 1.11 function unescapeJSLiteral (s) {
631     return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) {
632     return String.fromCharCode (parseInt ('0x' + v));
633     });
634     } // unescapeJSLiteral
635    
636 wakaba 1.1 function JSText (data) {
637     this.data = data;
638     } // JSText
639    
640     JSDocument.prototype.manakaiAppendText =
641     JSElement.prototype.manakaiAppendText =
642     function (s) {
643     if (this.childNodes.length > 0 &&
644     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
645     this.childNodes[this.childNodes.length - 1].data += s;
646     } else {
647     this.childNodes.push (new JSText (s));
648     }
649     }; // manakaiAppendText
650 wakaba 1.2
651 wakaba 1.4 JSDocument.prototype.open = function () {
652     // Two or fewer arguments
653    
654     // Step 1.
655     var type = arguments[0] || 'text/html';
656    
657     // Step 2.
658     var replace = arguments[1] == 'replace';
659    
660     // Step 3.
661     if (this._parser &&
662     !this._parser.scriptCreated &&
663 wakaba 1.8 this._parser.input.insertionPoint != undefined) {
664 wakaba 1.4 log ('document.open () in parsing mode is ignored');
665     return this;
666     }
667    
668     // Step 4.
669     log ('onbeforeunload event fired');
670     log ('onunload event fired');
671    
672     // Step 5.
673     if (this._parser) {
674     // Discard the parser.
675     }
676    
677     // Step 6.
678     log ('document cleared by document.open ()');
679     this.childNodes = [];
680    
681     // Step 7.
682     this._parser = new Parser (new InputStream (''), this);
683     this._parser.scriptCreated = true;
684    
685     // Step 8.
686     this.manakaiIsHTML = true;
687    
688     // Step 9.
689     // If not text/html, ...
690    
691     // Step 10.
692     if (!replace) {
693     // History
694     }
695    
696     // Step 11.
697 wakaba 1.8 this._parser.setInsertionPoint (this._parser.input.s.length);
698 wakaba 1.4
699     // Step 12.
700     return this;
701     }; // document.open
702    
703 wakaba 1.2 JSDocument.prototype.write = function () {
704 wakaba 1.6 logIndentLevel++;
705    
706 wakaba 1.3 var p = this._parser;
707    
708 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
709 wakaba 1.4 if (isNaN (p.insertionPoint) || p.insertionPoint == undefined) {
710     this.open ();
711     p = this._parser;
712 wakaba 1.3 }
713 wakaba 1.2
714     // 2. ... inserted into the input stream just before the insertion point.
715 wakaba 1.3 var s = Array.join (arguments, '');
716     log ('document.write: insert "' + s + '"' +
717 wakaba 1.8 ' before "' +
718     p.input.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
719     p.input.s = p.input.s.substring (0, p.insertionPoint) + s
720     + p.input.s.substring (p.insertionPoint, p.input.s.length);
721 wakaba 1.3 p.insertionPoint += s.length;
722 wakaba 1.2
723     // 3. If there is a script that will execute as soon as the parser resumes
724 wakaba 1.6 if (p.scriptExecutedAfterParserResumes) {
725     log ('document.write: processed later (there is an unprocessed <script src>)');
726     logIndentLevel--;
727     return;
728     }
729 wakaba 1.2
730     // 4. Process the characters that were inserted, ...
731 wakaba 1.6 var originalReentrant = p.reentrant;
732     p.reentrant = true;
733 wakaba 1.3 p.parse ();
734 wakaba 1.6 p.reentrant = originalReentrant;
735     // TODO: "Abort the processing of any nested invokations of the tokeniser,
736     // yielding control back to the caller." (<script> parsing). Do we need
737     // to do something here?
738 wakaba 1.2
739     // 5. Return
740     log ('document.write: return');
741 wakaba 1.6
742     logIndentLevel--;
743 wakaba 1.2 return;
744     }; // document.write
745    
746 wakaba 1.10 JSDocument.prototype._insertExternalScript = function (uri) {
747     var s = new JSElement (this, 'script');
748     s.src = uri;
749     this.documentElement.appendChild (s);
750     }; // _insertExternalScript
751    
752     JSDocument.prototype.__defineGetter__ ('documentElement', function () {
753     var cn = this.childNodes;
754     for (var i = 0; i < cn.length; i++) {
755     if (cn[i] instanceof JSElement) {
756     return cn[i]
757     }
758     }
759     return null;
760     });
761    
762 wakaba 1.2 JSElement.prototype.__defineGetter__ ('text', function () {
763     var r = '';
764     for (var i = 0; i < this.childNodes.length; i++) {
765     if (this.childNodes[i] instanceof JSText) {
766     r += this.childNodes[i].data;
767     }
768     }
769     return r;
770     });
771 wakaba 1.1
772     function dumpTree (n, indent) {
773     var r = '';
774     for (var i = 0; i < n.childNodes.length; i++) {
775     var node = n.childNodes[i];
776     if (node instanceof JSElement) {
777     r += '| ' + indent + node.localName + '\n';
778 wakaba 1.4 if (node.async) r += '| ' + indent + ' async=""\n';
779     if (node.defer) r += '| ' + indent + ' defer=""\n';
780 wakaba 1.9 if (node.src != null) {
781     r += '| ' + indent + ' src="' + node.src + '"\n';
782     }
783 wakaba 1.1 r += dumpTree (node, indent + ' ');
784     } else if (node instanceof JSText) {
785     r += '| ' + indent + '"' + node.data + '"\n';
786     } else {
787     r += '| ' + indent + node + '\n';
788     }
789     }
790     return r;
791     } // dumpTree
792     </script>
793     </head>
794     <body onload="
795     document.sourceElement = document.getElementsByTagName ('textarea')[0];
796 wakaba 1.8
797     var q = location.search;
798     if (q != null) {
799     q = q.substring (1).split (/;/);
800     for (var i = 0; i < q.length; i++) {
801     var v = q[i].split (/=/, 2);
802     v[0] = decodeURIComponent (v[0]);
803     v[1] = decodeURIComponent (v[1] || '');
804     if (v[0] == 's') {
805     document.sourceElement.value = v[1];
806     }
807     }
808     }
809    
810 wakaba 1.1 document.logElement = document.getElementsByTagName ('output')[0];
811     update ();
812     ">
813 wakaba 1.8 <h1>Live Scripting <abbr title="Hypertext Markup Language">HTML</abbr>
814     Parser</h1>
815 wakaba 1.1
816 wakaba 1.7 <h2>Markup to test
817 wakaba 1.8 (<a href=data:, id=permalink rel=bookmark>permalink</a>,
818     <a href="http://software.hixie.ch/utilities/js/live-dom-viewer/"
819     id=ldvlink>Live <abbr title="Document Object Model">DOM</abbr>
820     Viewer</a>)</h2>
821 wakaba 1.7 <p>
822     <textarea onkeydown=" update () " onchange=" update () " oninput=" update () ">&lt;html>
823 wakaba 1.1 &lt;head>&lt;/head>&lt;body>
824     &lt;p>
825     &lt;script>
826 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
827 wakaba 1.1 &lt;/script>
828     &lt;p>
829     </textarea>
830    
831 wakaba 1.10 <h2 id=log>Log</h2>
832 wakaba 1.7 <p><output></output>
833    
834 wakaba 1.10 <h2 id=notes>Notes</h2>
835 wakaba 1.8
836     <p>This is a <em>simplified</em> implementation of
837     <a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5
838     Parsing Algorithm</a>. It only implements script-related part of the
839     algorithm. Especially, this parser:
840     <ul>
841     <li>Does not support <code>DOCTYPE</code> and comment tokens.
842     <li>Does not support entities except for <code>&amp;quot;</code>,
843     <code>&amp;apos;</code>, and <code>&amp;amp;</code> in <code>script</code>
844     <code>src</code> attribute value.
845     <li>Does not support omissions of start or end tags, the <abbr>AAA</abbr>
846     algorithm, and so on.
847     <li>Does not raise parse errors for invalid attribute specifications in start
848     or end tags.
849 wakaba 1.14 <li>Does not support PCDATA elements (<code>title</code> and
850     <code>textarea</code>).
851     <li>Does not strip the first newline in <code>pre</code> elements.
852 wakaba 1.8 <li>Does not support <code>&lt;!--</code>..<code>--></code> parsing rule
853     in <code>script</code> element.
854     <li>Does not support foreign (SVG or MathML) elements.
855     <li>Only supports <code>script</code> <code>type</code>
856     <code>text/javascript</code>. <code>type</code> and <code>language</code>
857     attributes are ignored.
858 wakaba 1.10 <li>Only supports limited statements. It must consist of zero or more
859     of statements looking similar to the following statements, possibly
860     introduced, followed, or separated by white space characters:
861     <ul>
862     <li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>.
863     <li><code>var s = document.createElement ("script");
864     s.src = "<var>string</var>";
865     document.documentElement.appendChild (s);</code>
866     </ul>
867     Note that strings may be delimited by <code>'</code>s instead of
868     <code>"</code>s.
869 wakaba 1.8 <li>Only supports <code>javascript:</code>
870     <abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the
871     <code>src</code> attribute of the <code>script</code> element. In addition,
872     the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to
873     the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>.
874 wakaba 1.11 <li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript
875     string literals.
876 wakaba 1.12 <li>Does not handle <i>stop parsing</i> phase correctly if the document is
877     replaced by <code>document.open ()</code> call. In other word, delayed
878     (deferred or asynchronous) script executions and event firings might be
879     treated in a wrong way if a <code>document.open ()</code> invocation
880     is implicitly done by <code>document.write ()</code> in a delayed script.
881 wakaba 1.8 </ul>
882 wakaba 1.7
883 wakaba 1.8 <p>For some reason, this parser does not work in browsers that do
884     not support JavaScript 1.5.
885 wakaba 1.12
886     <!-- TODO: |src| attribute value should refer the value at the time
887     when it is inserted into the document, not the value when the script is
888     executed. Currently it does not matter, since we don't allow dynamic
889     modification to the |src| content/DOM attribute value yet. -->
890 wakaba 1.10
891 wakaba 1.13 </body>
892     </html>
893     <!-- $Date: 2008/01/19 06:47:07 $ -->
894     <!--
895    
896     Copyright 2008 Wakaba <w@suika.fam.cx>
897    
898     This program is free software; you can redistribute it and/or
899     modify it under the terms of the GNU General Public License
900     as published by the Free Software Foundation; either version 2
901     of the License, or (at your option) any later version.
902    
903     This program is distributed in the hope that it will be useful,
904     but WITHOUT ANY WARRANTY; without even the implied warranty of
905     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
906     GNU General Public License for more details.
907    
908     You should have received a copy of the GNU General Public License
909     along with this program; if not, write to the Free Software
910     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
911 wakaba 1.1
912 wakaba 1.13 -->

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24