/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download) (as text)
Sun Apr 20 07:48:00 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +182 -11 lines
File MIME type: text/html
Scripting support, first version (only placeholder for document.write is implemented)

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 textarea {
7 display: block;
8 width: 80%;
9 margin-left: auto;
10 margin-right: auto;
11 min-height: 20em;
12 }
13 output {
14 display: block;
15 font-family: monospace;
16 white-space: pre;
17 }
18 </style>
19 <script>
20 function update () {
21 document.logElement.textContent = '';
22 var p = new Parser (new InputStream (document.sourceElement.value));
23 p.parse ();
24 log (dumpTree (p.doc, ''));
25 } // update
26
27 function log (s) {
28 document.logElement.appendChild (document.createTextNode (s + "\n"));
29 } // log
30
31 function InputStream (s) {
32 this.s = s;
33 } // InputStream
34
35 function Parser (i) {
36 this.parseMode = 'pcdata';
37 this.doc = new JSDocument (this);
38 this.openElements = [this.doc];
39 this.in = i;
40 } // Parser
41
42 Parser.prototype.getNextToken = function () {
43 var i = this.in;
44 if (this.parseMode == 'script') {
45 var token;
46 i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,
47 function (s, t) {
48 token = {type: 'char', value: t};
49 return '<' + '/script>';
50 });
51 if (token) return token;
52 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {
53 token = {type: 'end-tag', value: 'script'};
54 return '';
55 });
56 if (token) return token;
57 return {type: 'eof'};
58 }
59
60 var token;
61 i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
62 token = {type: 'end-tag', value: e.toLowerCase ()};
63 return '';
64 });
65 if (token) return token;
66 i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
67 token = {type: 'start-tag', value: e.toLowerCase ()};
68 return '';
69 });
70 if (token) return token;
71 i.s = i.s.replace (/^[^<]+/, function (s) {
72 token = {type: 'char', value: s};
73 return '';
74 });
75 if (token) return token;
76 i.s = i.s.replace (/^[\s\S]/, function (s) {
77 token = {type: 'char', value: s};
78 return '';
79 });
80 if (token) return token;
81 return {type: 'eof'};
82 } // getNextToken
83
84 Parser.prototype.parse = function () {
85 log ('start parsing');
86
87 while (true) {
88 var token = this.getNextToken ();
89 log ('token: ' + token.type + ' "' + token.value + '"');
90
91 if (token.type == 'start-tag') {
92 if (token.value == 'script') {
93 // 1. Create an element for the token in the HTML namespace.
94 var el = new JSElement (this.doc, token.value);
95
96 // 2. Mark the element as being "parser-inserted".
97 el.manakaiParserInserted = true;
98
99 // 3. Switch the tokeniser's content model flag to the CDATA state.
100 this.parseMode = 'script';
101
102 // 4.1. Collect all the character tokens.
103 while (true) {
104 var token = this.getNextToken ();
105 log ('token: ' + token.type + ' "' + token.value + '"');
106
107 if (token.type == 'char') {
108 // 5. Append a single Text node to the script element node.
109 el.manakaiAppendText (token.value);
110
111 // 4.2. Until it returns a token that is not a character token, or
112 // TODO: 4.3. Until it stops tokenising.
113 } else if (token.type == 'eof' ||
114 (token.type == 'end-tag' && token.value == 'script')) {
115 // 6. Switched back to the PCDATA state.
116 this.parseMode = 'pcdata';
117
118 // 7.1. If the next token is not an end tag token with ...
119 if (token.type != 'end-tag') {
120 // 7.2. This is a parse error.
121 log ('Parse error: no </' + 'script>');
122
123 // 7.3. Mark the script element as "already executed".
124 el.manakaiAlreadyExecuted = true;
125 } else {
126 // 7.4. Ignore it.
127 //
128 }
129 break;
130 }
131 }
132
133 // 8.1. If the parser were originally created for the ...
134 if (this.fragmentParsingMode) {
135 // 8.2. Mark the script element as "already executed" and ...
136 el.alreadyExecuted = true;
137 continue;
138 }
139
140 // 9.1. Let the old insertion point have the same value as the ...
141
142 // 9.2. Let the insertion point be just before the next input ...
143
144 // 10. Append the new element to the current node.
145 this.openElements[this.openElements.length - 1].appendChild (el);
146
147 // 11. Let the insertion point have the value of the old ...
148
149 // 12. If there is a script that will execute as soon as ...
150
151
152 } else {
153 var el = new JSElement (this.doc, token.value);
154 this.openElements[this.openElements.length - 1].appendChild (el);
155 this.openElements.push (el);
156 }
157 } else if (token.type == 'end-tag') {
158 if (this.openElements[this.openElements.length - 1].localName ==
159 token.value) {
160 this.openElements.pop ();
161 } else {
162 log ('parse error: unmatched end tag: ' + token.value);
163 }
164 } else if (token.type == 'eof') {
165 break;
166 }
167 }
168
169 log ('stop parsing');
170 } // parse
171
172 function JSDocument (p) {
173 this.childNodes = [];
174 this._parser = p;
175 } // JSDocument
176
177 function JSElement (doc, localName) {
178 this.localName = localName;
179 this.ownerDocument = doc;
180 this.childNodes = [];
181 } // JSElement
182
183 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
184 function (e) {
185 this.childNodes.push (e);
186 e.parentNode = this;
187
188 if (e.localName == 'script') {
189 log ('start running a script');
190
191 var doc = this.ownerDocument;
192 var p = doc._parser;
193
194 // 1. Script type
195 //
196
197 // 2.1. If scripting is disabled
198 //
199 // 2.2. If the script element was created by an XML ... innerHTML ...
200 //
201 // 2.3. If the user agent does not support the scripting language ...
202 //
203 // 2.4. If the script element has its "already executed" flag set
204 if (e.manakaiAlreadyExecuted) {
205 // 2.5. Abort these steps at this point.
206 log ('running a script: aborted');
207 return e;
208 }
209
210 // 3. Set the element's "already executed" flag.
211 e.manakaiAlreadyExecuted = true;
212
213 // 4. If the element has a src attribute, then a load for ...
214 // TODO: load an external resource
215
216 // 5. The first of the following options:
217
218 // 5.1.
219 if (/* TODO: If the document is still being parsed && */
220 e.defer && !e.async) {
221 // TODO
222 } else if (e.async && e.src != null) {
223 // TODO
224 } else if (e.async && e.src == null
225 /* && list of scripts that will execute asynchronously is not empty */) {
226 // TODO
227 } else if (e.src != null && e.manakaiParserInserted) {
228 // TODO
229 } else if (e.src != null) {
230 // TODO
231 } else {
232 executeScript (doc, e); // even if other scripts are already executing.
233 }
234
235 log ('end running a script');
236 }
237
238 return e;
239 }; // appendChild
240
241 function executeScript (doc, e) {
242 log ('executing a script block: start');
243
244 // If the load resulted in an error, then ... firing an error event ...
245
246 // If the load was successful
247 log ('load event fired at the script element');
248
249 if (true) {
250 // Scripting is enabled, Document.designMode is disabled,
251 // Document is the active document in its browsing context
252
253 var s;
254 if (e.src != null) {
255 // TODO: from external file
256 } else {
257 s = e.text;
258 }
259
260 parseAndRunScript (doc, s);
261 }
262
263 log ('executing a script block: end');
264 } // executeScript
265
266 function parseAndRunScript (doc, s) {
267 while (true) {
268 var matched = false;
269 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
270 matched = true;
271 var args = [];
272 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
273 args.push (v.substring (1, v.length - 1));
274 return '';
275 });
276 doc.write.apply (doc, args);
277 return '';
278 });
279 if (s == '') break;
280 if (!matched) {
281 log ('Script parse error: "' + s + '"');
282 break;
283 }
284 }
285 } // parseAndRunScript
286
287 function JSText (data) {
288 this.data = data;
289 } // JSText
290
291 JSDocument.prototype.manakaiAppendText =
292 JSElement.prototype.manakaiAppendText =
293 function (s) {
294 if (this.childNodes.length > 0 &&
295 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
296 this.childNodes[this.childNodes.length - 1].data += s;
297 } else {
298 this.childNodes.push (new JSText (s));
299 }
300 }; // manakaiAppendText
301
302 JSDocument.prototype.write = function () {
303 // 1. If the insertion point is undefined, the open() method must be ...
304 //
305
306 // 2. ... inserted into the input stream just before the insertion point.
307 log ('document.write: insert "' + Array.join (arguments, '') + '"');
308
309 // 3. If there is a script that will execute as soon as the parser resumes
310 // TODO
311
312 // 4. Process the characters that were inserted, ...
313
314 // 5. Return
315 log ('document.write: return');
316 return;
317 }; // document.write
318
319 JSElement.prototype.__defineGetter__ ('text', function () {
320 var r = '';
321 for (var i = 0; i < this.childNodes.length; i++) {
322 if (this.childNodes[i] instanceof JSText) {
323 r += this.childNodes[i].data;
324 }
325 }
326 return r;
327 });
328
329 function dumpTree (n, indent) {
330 var r = '';
331 for (var i = 0; i < n.childNodes.length; i++) {
332 var node = n.childNodes[i];
333 if (node instanceof JSElement) {
334 r += '| ' + indent + node.localName + '\n';
335 r += dumpTree (node, indent + ' ');
336 } else if (node instanceof JSText) {
337 r += '| ' + indent + '"' + node.data + '"\n';
338 } else {
339 r += '| ' + indent + node + '\n';
340 }
341 }
342 return r;
343 } // dumpTree
344 </script>
345 </head>
346 <body onload="
347 document.sourceElement = document.getElementsByTagName ('textarea')[0];
348 document.logElement = document.getElementsByTagName ('output')[0];
349 update ();
350 ">
351
352 <textarea onchange=" update () ">&lt;html>
353 &lt;head>&lt;/head>&lt;body>
354 &lt;p>
355 &lt;script>
356 document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');
357 &lt;/script>
358 &lt;p>
359 </textarea>
360
361 <output></output>
362
363 </body>
364 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24