/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download) (as text)
Sun Apr 20 10:02:43 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +76 -8 lines
File MIME type: text/html
document.write support, first version (document.write in inline <script>

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4     <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5     <style>
6     textarea {
7     display: block;
8     width: 80%;
9     margin-left: auto;
10     margin-right: auto;
11     min-height: 20em;
12     }
13     output {
14     display: block;
15     font-family: monospace;
16     white-space: pre;
17     }
18     </style>
19     <script>
20     function update () {
21     document.logElement.textContent = '';
22 wakaba 1.2 var p = new Parser (new InputStream (document.sourceElement.value));
23     p.parse ();
24 wakaba 1.1 log (dumpTree (p.doc, ''));
25     } // update
26    
27     function log (s) {
28     document.logElement.appendChild (document.createTextNode (s + "\n"));
29     } // log
30    
31     function InputStream (s) {
32     this.s = s;
33     } // InputStream
34    
35 wakaba 1.2 function Parser (i) {
36 wakaba 1.1 this.parseMode = 'pcdata';
37 wakaba 1.2 this.doc = new JSDocument (this);
38 wakaba 1.1 this.openElements = [this.doc];
39 wakaba 1.2 this.in = i;
40 wakaba 1.1 } // Parser
41    
42 wakaba 1.2 Parser.prototype.getNextToken = function () {
43 wakaba 1.3 var p = this;
44 wakaba 1.2 var i = this.in;
45 wakaba 1.1 if (this.parseMode == 'script') {
46     var token;
47 wakaba 1.3 if (p.insertionPoint <= 0) {
48     return {type: 'abort'};
49     }
50 wakaba 1.1 i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,
51     function (s, t) {
52 wakaba 1.3 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
53     token = {type: 'char', value: t.substring (0, p.insertionPoint)};
54     var ip = p.insertionPoint;
55     p.insertionPoint = 0;
56     return t.substring (ip, t.length) +
57     s.substring (s.length - 9, s.length);
58     }
59 wakaba 1.1 token = {type: 'char', value: t};
60 wakaba 1.3 p.insertionPoint -= s.length;
61 wakaba 1.1 return '<' + '/script>';
62     });
63     if (token) return token;
64 wakaba 1.3 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
65     if (s.length < p.insertionPoint) {
66     token = {type: 'abort'};
67     return s;
68     }
69 wakaba 1.1 token = {type: 'end-tag', value: 'script'};
70 wakaba 1.3 p.insertionPoint -= s.length;
71 wakaba 1.1 return '';
72     });
73     if (token) return token;
74     return {type: 'eof'};
75     }
76    
77     var token;
78     i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
79 wakaba 1.3 if (p.insertionPoint < s.length) {
80     token = {type: 'abort'};
81     return s;
82     }
83 wakaba 1.1 token = {type: 'end-tag', value: e.toLowerCase ()};
84 wakaba 1.3 p.insertionPoint -= s.length;
85 wakaba 1.1 return '';
86     });
87     if (token) return token;
88     i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
89 wakaba 1.3 if (p.insertionPoint < s.length) {
90     token = {type: 'abort'};
91     return s;
92     }
93 wakaba 1.1 token = {type: 'start-tag', value: e.toLowerCase ()};
94 wakaba 1.3 p.insertionPoint -= s.length;
95 wakaba 1.1 return '';
96     });
97     if (token) return token;
98 wakaba 1.3 if (p.insertionPoint <= 0) {
99     return {type: 'abort'};
100     }
101 wakaba 1.1 i.s = i.s.replace (/^[^<]+/, function (s) {
102 wakaba 1.3 if (p.insertionPoint < s.length) {
103     token = {type: 'char', value: s.substring (0, p.insertionPoint)};
104     var ip = p.insertionPoint;
105     p.insertionPoint = 0;
106     return s.substring (ip, s.length);
107     }
108 wakaba 1.1 token = {type: 'char', value: s};
109 wakaba 1.3 p.insertionPoint -= s.length;
110 wakaba 1.1 return '';
111     });
112     if (token) return token;
113     i.s = i.s.replace (/^[\s\S]/, function (s) {
114     token = {type: 'char', value: s};
115 wakaba 1.3 p.insertionPoint -= s.length;
116 wakaba 1.1 return '';
117     });
118     if (token) return token;
119     return {type: 'eof'};
120     } // getNextToken
121    
122 wakaba 1.2 Parser.prototype.parse = function () {
123 wakaba 1.1 log ('start parsing');
124    
125     while (true) {
126 wakaba 1.2 var token = this.getNextToken ();
127 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
128    
129     if (token.type == 'start-tag') {
130     if (token.value == 'script') {
131 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
132     var el = new JSElement (this.doc, token.value);
133    
134     // 2. Mark the element as being "parser-inserted".
135     el.manakaiParserInserted = true;
136    
137     // 3. Switch the tokeniser's content model flag to the CDATA state.
138 wakaba 1.1 this.parseMode = 'script';
139    
140 wakaba 1.2 // 4.1. Collect all the character tokens.
141 wakaba 1.1 while (true) {
142 wakaba 1.2 var token = this.getNextToken ();
143 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
144    
145     if (token.type == 'char') {
146 wakaba 1.2 // 5. Append a single Text node to the script element node.
147 wakaba 1.1 el.manakaiAppendText (token.value);
148 wakaba 1.2
149     // 4.2. Until it returns a token that is not a character token, or
150 wakaba 1.3 // until it stops tokenising.
151 wakaba 1.1 } else if (token.type == 'eof' ||
152 wakaba 1.3 (token.type == 'end-tag' && token.value == 'script') ||
153     token.type == 'abort') {
154 wakaba 1.2 // 6. Switched back to the PCDATA state.
155 wakaba 1.1 this.parseMode = 'pcdata';
156 wakaba 1.2
157     // 7.1. If the next token is not an end tag token with ...
158     if (token.type != 'end-tag') {
159     // 7.2. This is a parse error.
160     log ('Parse error: no </' + 'script>');
161    
162     // 7.3. Mark the script element as "already executed".
163     el.manakaiAlreadyExecuted = true;
164     } else {
165     // 7.4. Ignore it.
166     //
167     }
168 wakaba 1.1 break;
169     }
170     }
171    
172 wakaba 1.2 // 8.1. If the parser were originally created for the ...
173     if (this.fragmentParsingMode) {
174     // 8.2. Mark the script element as "already executed" and ...
175     el.alreadyExecuted = true;
176     continue;
177     }
178    
179     // 9.1. Let the old insertion point have the same value as the ...
180 wakaba 1.3 var oldInsertionPoint = this.insertionPoint;
181 wakaba 1.2 // 9.2. Let the insertion point be just before the next input ...
182 wakaba 1.3 this.setInsertionPoint (0);
183 wakaba 1.2
184     // 10. Append the new element to the current node.
185 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
186 wakaba 1.2
187     // 11. Let the insertion point have the value of the old ...
188 wakaba 1.3 this.setInsertionPoint (oldInsertionPoint);
189 wakaba 1.2
190     // 12. If there is a script that will execute as soon as ...
191    
192    
193 wakaba 1.1 } else {
194 wakaba 1.2 var el = new JSElement (this.doc, token.value);
195 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
196     this.openElements.push (el);
197     }
198     } else if (token.type == 'end-tag') {
199     if (this.openElements[this.openElements.length - 1].localName ==
200     token.value) {
201     this.openElements.pop ();
202     } else {
203     log ('parse error: unmatched end tag: ' + token.value);
204     }
205 wakaba 1.3 } else if (token.type == 'char') {
206     this.openElements[this.openElements.length - 1].manakaiAppendText
207     (token.value);
208 wakaba 1.1 } else if (token.type == 'eof') {
209     break;
210 wakaba 1.3 } else if (token.type == 'abort') {
211     log ('parse: abort');
212     return;
213 wakaba 1.1 }
214     }
215    
216     log ('stop parsing');
217     } // parse
218    
219 wakaba 1.3 Parser.prototype.setInsertionPoint = function (ip) {
220     if (ip == undefined || ip == null || isNaN (ip)) {
221     log ('insertion point: set to undefined');
222     this.insertionPoint = undefined;
223     } else {
224     log ('insertion point: set to ' + ip +
225     ' (before "' + this.in.s.substring (0, 10) + '")');
226     this.insertionPoint = ip;
227     }
228     }; // setInsertionPoint
229    
230 wakaba 1.2 function JSDocument (p) {
231 wakaba 1.1 this.childNodes = [];
232 wakaba 1.2 this._parser = p;
233 wakaba 1.1 } // JSDocument
234    
235 wakaba 1.2 function JSElement (doc, localName) {
236 wakaba 1.1 this.localName = localName;
237 wakaba 1.2 this.ownerDocument = doc;
238 wakaba 1.1 this.childNodes = [];
239     } // JSElement
240    
241     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
242     function (e) {
243     this.childNodes.push (e);
244     e.parentNode = this;
245 wakaba 1.2
246     if (e.localName == 'script') {
247     log ('start running a script');
248    
249 wakaba 1.3 var doc = this.ownerDocument || this;
250 wakaba 1.2 var p = doc._parser;
251    
252     // 1. Script type
253     //
254    
255     // 2.1. If scripting is disabled
256     //
257     // 2.2. If the script element was created by an XML ... innerHTML ...
258     //
259     // 2.3. If the user agent does not support the scripting language ...
260     //
261     // 2.4. If the script element has its "already executed" flag set
262     if (e.manakaiAlreadyExecuted) {
263     // 2.5. Abort these steps at this point.
264     log ('running a script: aborted');
265     return e;
266     }
267    
268     // 3. Set the element's "already executed" flag.
269     e.manakaiAlreadyExecuted = true;
270    
271     // 4. If the element has a src attribute, then a load for ...
272     // TODO: load an external resource
273    
274     // 5. The first of the following options:
275    
276     // 5.1.
277     if (/* TODO: If the document is still being parsed && */
278     e.defer && !e.async) {
279     // TODO
280     } else if (e.async && e.src != null) {
281     // TODO
282     } else if (e.async && e.src == null
283     /* && list of scripts that will execute asynchronously is not empty */) {
284     // TODO
285     } else if (e.src != null && e.manakaiParserInserted) {
286     // TODO
287     } else if (e.src != null) {
288     // TODO
289     } else {
290     executeScript (doc, e); // even if other scripts are already executing.
291     }
292    
293     log ('end running a script');
294     }
295    
296 wakaba 1.1 return e;
297     }; // appendChild
298    
299 wakaba 1.2 function executeScript (doc, e) {
300     log ('executing a script block: start');
301    
302     // If the load resulted in an error, then ... firing an error event ...
303    
304     // If the load was successful
305     log ('load event fired at the script element');
306    
307     if (true) {
308     // Scripting is enabled, Document.designMode is disabled,
309     // Document is the active document in its browsing context
310    
311     var s;
312     if (e.src != null) {
313     // TODO: from external file
314     } else {
315     s = e.text;
316     }
317    
318     parseAndRunScript (doc, s);
319     }
320    
321     log ('executing a script block: end');
322     } // executeScript
323    
324     function parseAndRunScript (doc, s) {
325     while (true) {
326     var matched = false;
327     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
328     matched = true;
329     var args = [];
330     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
331     args.push (v.substring (1, v.length - 1));
332     return '';
333     });
334     doc.write.apply (doc, args);
335     return '';
336     });
337     if (s == '') break;
338     if (!matched) {
339     log ('Script parse error: "' + s + '"');
340     break;
341     }
342     }
343     } // parseAndRunScript
344    
345 wakaba 1.1 function JSText (data) {
346     this.data = data;
347     } // JSText
348    
349     JSDocument.prototype.manakaiAppendText =
350     JSElement.prototype.manakaiAppendText =
351     function (s) {
352     if (this.childNodes.length > 0 &&
353     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
354     this.childNodes[this.childNodes.length - 1].data += s;
355     } else {
356     this.childNodes.push (new JSText (s));
357     }
358     }; // manakaiAppendText
359 wakaba 1.2
360     JSDocument.prototype.write = function () {
361 wakaba 1.3 var p = this._parser;
362    
363 wakaba 1.2 // 1. If the insertion point is undefined, the open() method must be ...
364 wakaba 1.3 if (p.insertionPoint == NaN || p.insertionPoint == undefined) {
365     // TODO: open ()
366     }
367 wakaba 1.2
368     // 2. ... inserted into the input stream just before the insertion point.
369 wakaba 1.3 var s = Array.join (arguments, '');
370     log ('document.write: insert "' + s + '"' +
371     ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
372     p.in.s = p.in.s.substring (0, p.insertionPoint) + s
373     + p.in.s.substring (p.insertionPoint, p.in.s.length);
374     p.insertionPoint += s.length;
375 wakaba 1.2
376     // 3. If there is a script that will execute as soon as the parser resumes
377     // TODO
378    
379     // 4. Process the characters that were inserted, ...
380 wakaba 1.3 p.parse ();
381 wakaba 1.2
382     // 5. Return
383     log ('document.write: return');
384     return;
385     }; // document.write
386    
387     JSElement.prototype.__defineGetter__ ('text', function () {
388     var r = '';
389     for (var i = 0; i < this.childNodes.length; i++) {
390     if (this.childNodes[i] instanceof JSText) {
391     r += this.childNodes[i].data;
392     }
393     }
394     return r;
395     });
396 wakaba 1.1
397     function dumpTree (n, indent) {
398     var r = '';
399     for (var i = 0; i < n.childNodes.length; i++) {
400     var node = n.childNodes[i];
401     if (node instanceof JSElement) {
402     r += '| ' + indent + node.localName + '\n';
403     r += dumpTree (node, indent + ' ');
404     } else if (node instanceof JSText) {
405     r += '| ' + indent + '"' + node.data + '"\n';
406     } else {
407     r += '| ' + indent + node + '\n';
408     }
409     }
410     return r;
411     } // dumpTree
412     </script>
413     </head>
414     <body onload="
415     document.sourceElement = document.getElementsByTagName ('textarea')[0];
416     document.logElement = document.getElementsByTagName ('output')[0];
417     update ();
418     ">
419    
420     <textarea onchange=" update () ">&lt;html>
421     &lt;head>&lt;/head>&lt;body>
422     &lt;p>
423     &lt;script>
424 wakaba 1.3 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
425 wakaba 1.1 &lt;/script>
426     &lt;p>
427     </textarea>
428    
429     <output></output>
430    
431     </body>
432     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24