/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download) (as text)
Sun Apr 20 07:48:00 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +182 -11 lines
File MIME type: text/html
Scripting support, first version (only placeholder for document.write is implemented)

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4     <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5     <style>
6     textarea {
7     display: block;
8     width: 80%;
9     margin-left: auto;
10     margin-right: auto;
11     min-height: 20em;
12     }
13     output {
14     display: block;
15     font-family: monospace;
16     white-space: pre;
17     }
18     </style>
19     <script>
20     function update () {
21     document.logElement.textContent = '';
22 wakaba 1.2 var p = new Parser (new InputStream (document.sourceElement.value));
23     p.parse ();
24 wakaba 1.1 log (dumpTree (p.doc, ''));
25     } // update
26    
27     function log (s) {
28     document.logElement.appendChild (document.createTextNode (s + "\n"));
29     } // log
30    
31     function InputStream (s) {
32     this.s = s;
33     } // InputStream
34    
35 wakaba 1.2 function Parser (i) {
36 wakaba 1.1 this.parseMode = 'pcdata';
37 wakaba 1.2 this.doc = new JSDocument (this);
38 wakaba 1.1 this.openElements = [this.doc];
39 wakaba 1.2 this.in = i;
40 wakaba 1.1 } // Parser
41    
42 wakaba 1.2 Parser.prototype.getNextToken = function () {
43     var i = this.in;
44 wakaba 1.1 if (this.parseMode == 'script') {
45     var token;
46     i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,
47     function (s, t) {
48     token = {type: 'char', value: t};
49     return '<' + '/script>';
50     });
51     if (token) return token;
52     i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {
53     token = {type: 'end-tag', value: 'script'};
54     return '';
55     });
56     if (token) return token;
57     return {type: 'eof'};
58     }
59    
60     var token;
61     i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
62     token = {type: 'end-tag', value: e.toLowerCase ()};
63     return '';
64     });
65     if (token) return token;
66     i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
67     token = {type: 'start-tag', value: e.toLowerCase ()};
68     return '';
69     });
70     if (token) return token;
71     i.s = i.s.replace (/^[^<]+/, function (s) {
72     token = {type: 'char', value: s};
73     return '';
74     });
75     if (token) return token;
76     i.s = i.s.replace (/^[\s\S]/, function (s) {
77     token = {type: 'char', value: s};
78     return '';
79     });
80     if (token) return token;
81     return {type: 'eof'};
82     } // getNextToken
83    
84 wakaba 1.2 Parser.prototype.parse = function () {
85 wakaba 1.1 log ('start parsing');
86    
87     while (true) {
88 wakaba 1.2 var token = this.getNextToken ();
89 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
90    
91     if (token.type == 'start-tag') {
92     if (token.value == 'script') {
93 wakaba 1.2 // 1. Create an element for the token in the HTML namespace.
94     var el = new JSElement (this.doc, token.value);
95    
96     // 2. Mark the element as being "parser-inserted".
97     el.manakaiParserInserted = true;
98    
99     // 3. Switch the tokeniser's content model flag to the CDATA state.
100 wakaba 1.1 this.parseMode = 'script';
101    
102 wakaba 1.2 // 4.1. Collect all the character tokens.
103 wakaba 1.1 while (true) {
104 wakaba 1.2 var token = this.getNextToken ();
105 wakaba 1.1 log ('token: ' + token.type + ' "' + token.value + '"');
106    
107     if (token.type == 'char') {
108 wakaba 1.2 // 5. Append a single Text node to the script element node.
109 wakaba 1.1 el.manakaiAppendText (token.value);
110 wakaba 1.2
111     // 4.2. Until it returns a token that is not a character token, or
112     // TODO: 4.3. Until it stops tokenising.
113 wakaba 1.1 } else if (token.type == 'eof' ||
114     (token.type == 'end-tag' && token.value == 'script')) {
115 wakaba 1.2 // 6. Switched back to the PCDATA state.
116 wakaba 1.1 this.parseMode = 'pcdata';
117 wakaba 1.2
118     // 7.1. If the next token is not an end tag token with ...
119     if (token.type != 'end-tag') {
120     // 7.2. This is a parse error.
121     log ('Parse error: no </' + 'script>');
122    
123     // 7.3. Mark the script element as "already executed".
124     el.manakaiAlreadyExecuted = true;
125     } else {
126     // 7.4. Ignore it.
127     //
128     }
129 wakaba 1.1 break;
130     }
131     }
132    
133 wakaba 1.2 // 8.1. If the parser were originally created for the ...
134     if (this.fragmentParsingMode) {
135     // 8.2. Mark the script element as "already executed" and ...
136     el.alreadyExecuted = true;
137     continue;
138     }
139    
140     // 9.1. Let the old insertion point have the same value as the ...
141    
142     // 9.2. Let the insertion point be just before the next input ...
143    
144     // 10. Append the new element to the current node.
145 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
146 wakaba 1.2
147     // 11. Let the insertion point have the value of the old ...
148    
149     // 12. If there is a script that will execute as soon as ...
150    
151    
152 wakaba 1.1 } else {
153 wakaba 1.2 var el = new JSElement (this.doc, token.value);
154 wakaba 1.1 this.openElements[this.openElements.length - 1].appendChild (el);
155     this.openElements.push (el);
156     }
157     } else if (token.type == 'end-tag') {
158     if (this.openElements[this.openElements.length - 1].localName ==
159     token.value) {
160     this.openElements.pop ();
161     } else {
162     log ('parse error: unmatched end tag: ' + token.value);
163     }
164     } else if (token.type == 'eof') {
165     break;
166     }
167     }
168    
169     log ('stop parsing');
170     } // parse
171    
172 wakaba 1.2 function JSDocument (p) {
173 wakaba 1.1 this.childNodes = [];
174 wakaba 1.2 this._parser = p;
175 wakaba 1.1 } // JSDocument
176    
177 wakaba 1.2 function JSElement (doc, localName) {
178 wakaba 1.1 this.localName = localName;
179 wakaba 1.2 this.ownerDocument = doc;
180 wakaba 1.1 this.childNodes = [];
181     } // JSElement
182    
183     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
184     function (e) {
185     this.childNodes.push (e);
186     e.parentNode = this;
187 wakaba 1.2
188     if (e.localName == 'script') {
189     log ('start running a script');
190    
191     var doc = this.ownerDocument;
192     var p = doc._parser;
193    
194     // 1. Script type
195     //
196    
197     // 2.1. If scripting is disabled
198     //
199     // 2.2. If the script element was created by an XML ... innerHTML ...
200     //
201     // 2.3. If the user agent does not support the scripting language ...
202     //
203     // 2.4. If the script element has its "already executed" flag set
204     if (e.manakaiAlreadyExecuted) {
205     // 2.5. Abort these steps at this point.
206     log ('running a script: aborted');
207     return e;
208     }
209    
210     // 3. Set the element's "already executed" flag.
211     e.manakaiAlreadyExecuted = true;
212    
213     // 4. If the element has a src attribute, then a load for ...
214     // TODO: load an external resource
215    
216     // 5. The first of the following options:
217    
218     // 5.1.
219     if (/* TODO: If the document is still being parsed && */
220     e.defer && !e.async) {
221     // TODO
222     } else if (e.async && e.src != null) {
223     // TODO
224     } else if (e.async && e.src == null
225     /* && list of scripts that will execute asynchronously is not empty */) {
226     // TODO
227     } else if (e.src != null && e.manakaiParserInserted) {
228     // TODO
229     } else if (e.src != null) {
230     // TODO
231     } else {
232     executeScript (doc, e); // even if other scripts are already executing.
233     }
234    
235     log ('end running a script');
236     }
237    
238 wakaba 1.1 return e;
239     }; // appendChild
240    
241 wakaba 1.2 function executeScript (doc, e) {
242     log ('executing a script block: start');
243    
244     // If the load resulted in an error, then ... firing an error event ...
245    
246     // If the load was successful
247     log ('load event fired at the script element');
248    
249     if (true) {
250     // Scripting is enabled, Document.designMode is disabled,
251     // Document is the active document in its browsing context
252    
253     var s;
254     if (e.src != null) {
255     // TODO: from external file
256     } else {
257     s = e.text;
258     }
259    
260     parseAndRunScript (doc, s);
261     }
262    
263     log ('executing a script block: end');
264     } // executeScript
265    
266     function parseAndRunScript (doc, s) {
267     while (true) {
268     var matched = false;
269     s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
270     matched = true;
271     var args = [];
272     t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
273     args.push (v.substring (1, v.length - 1));
274     return '';
275     });
276     doc.write.apply (doc, args);
277     return '';
278     });
279     if (s == '') break;
280     if (!matched) {
281     log ('Script parse error: "' + s + '"');
282     break;
283     }
284     }
285     } // parseAndRunScript
286    
287 wakaba 1.1 function JSText (data) {
288     this.data = data;
289     } // JSText
290    
291     JSDocument.prototype.manakaiAppendText =
292     JSElement.prototype.manakaiAppendText =
293     function (s) {
294     if (this.childNodes.length > 0 &&
295     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
296     this.childNodes[this.childNodes.length - 1].data += s;
297     } else {
298     this.childNodes.push (new JSText (s));
299     }
300     }; // manakaiAppendText
301 wakaba 1.2
302     JSDocument.prototype.write = function () {
303     // 1. If the insertion point is undefined, the open() method must be ...
304     //
305    
306     // 2. ... inserted into the input stream just before the insertion point.
307     log ('document.write: insert "' + Array.join (arguments, '') + '"');
308    
309     // 3. If there is a script that will execute as soon as the parser resumes
310     // TODO
311    
312     // 4. Process the characters that were inserted, ...
313    
314     // 5. Return
315     log ('document.write: return');
316     return;
317     }; // document.write
318    
319     JSElement.prototype.__defineGetter__ ('text', function () {
320     var r = '';
321     for (var i = 0; i < this.childNodes.length; i++) {
322     if (this.childNodes[i] instanceof JSText) {
323     r += this.childNodes[i].data;
324     }
325     }
326     return r;
327     });
328 wakaba 1.1
329     function dumpTree (n, indent) {
330     var r = '';
331     for (var i = 0; i < n.childNodes.length; i++) {
332     var node = n.childNodes[i];
333     if (node instanceof JSElement) {
334     r += '| ' + indent + node.localName + '\n';
335     r += dumpTree (node, indent + ' ');
336     } else if (node instanceof JSText) {
337     r += '| ' + indent + '"' + node.data + '"\n';
338     } else {
339     r += '| ' + indent + node + '\n';
340     }
341     }
342     return r;
343     } // dumpTree
344     </script>
345     </head>
346     <body onload="
347     document.sourceElement = document.getElementsByTagName ('textarea')[0];
348     document.logElement = document.getElementsByTagName ('output')[0];
349     update ();
350     ">
351    
352     <textarea onchange=" update () ">&lt;html>
353     &lt;head>&lt;/head>&lt;body>
354     &lt;p>
355     &lt;script>
356     document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');
357     &lt;/script>
358     &lt;p>
359     </textarea>
360    
361     <output></output>
362    
363     </body>
364     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24