/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download) (as text)
Sun Apr 20 10:02:43 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +76 -8 lines
File MIME type: text/html
document.write support, first version (document.write in inline <script>

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 textarea {
7 display: block;
8 width: 80%;
9 margin-left: auto;
10 margin-right: auto;
11 min-height: 20em;
12 }
13 output {
14 display: block;
15 font-family: monospace;
16 white-space: pre;
17 }
18 </style>
19 <script>
20 function update () {
21 document.logElement.textContent = '';
22 var p = new Parser (new InputStream (document.sourceElement.value));
23 p.parse ();
24 log (dumpTree (p.doc, ''));
25 } // update
26
27 function log (s) {
28 document.logElement.appendChild (document.createTextNode (s + "\n"));
29 } // log
30
31 function InputStream (s) {
32 this.s = s;
33 } // InputStream
34
35 function Parser (i) {
36 this.parseMode = 'pcdata';
37 this.doc = new JSDocument (this);
38 this.openElements = [this.doc];
39 this.in = i;
40 } // Parser
41
42 Parser.prototype.getNextToken = function () {
43 var p = this;
44 var i = this.in;
45 if (this.parseMode == 'script') {
46 var token;
47 if (p.insertionPoint <= 0) {
48 return {type: 'abort'};
49 }
50 i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,
51 function (s, t) {
52 if (0 < p.insertionPoint && p.insertionPoint < t.length) {
53 token = {type: 'char', value: t.substring (0, p.insertionPoint)};
54 var ip = p.insertionPoint;
55 p.insertionPoint = 0;
56 return t.substring (ip, t.length) +
57 s.substring (s.length - 9, s.length);
58 }
59 token = {type: 'char', value: t};
60 p.insertionPoint -= s.length;
61 return '<' + '/script>';
62 });
63 if (token) return token;
64 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function (s) {
65 if (s.length < p.insertionPoint) {
66 token = {type: 'abort'};
67 return s;
68 }
69 token = {type: 'end-tag', value: 'script'};
70 p.insertionPoint -= s.length;
71 return '';
72 });
73 if (token) return token;
74 return {type: 'eof'};
75 }
76
77 var token;
78 i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
79 if (p.insertionPoint < s.length) {
80 token = {type: 'abort'};
81 return s;
82 }
83 token = {type: 'end-tag', value: e.toLowerCase ()};
84 p.insertionPoint -= s.length;
85 return '';
86 });
87 if (token) return token;
88 i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
89 if (p.insertionPoint < s.length) {
90 token = {type: 'abort'};
91 return s;
92 }
93 token = {type: 'start-tag', value: e.toLowerCase ()};
94 p.insertionPoint -= s.length;
95 return '';
96 });
97 if (token) return token;
98 if (p.insertionPoint <= 0) {
99 return {type: 'abort'};
100 }
101 i.s = i.s.replace (/^[^<]+/, function (s) {
102 if (p.insertionPoint < s.length) {
103 token = {type: 'char', value: s.substring (0, p.insertionPoint)};
104 var ip = p.insertionPoint;
105 p.insertionPoint = 0;
106 return s.substring (ip, s.length);
107 }
108 token = {type: 'char', value: s};
109 p.insertionPoint -= s.length;
110 return '';
111 });
112 if (token) return token;
113 i.s = i.s.replace (/^[\s\S]/, function (s) {
114 token = {type: 'char', value: s};
115 p.insertionPoint -= s.length;
116 return '';
117 });
118 if (token) return token;
119 return {type: 'eof'};
120 } // getNextToken
121
122 Parser.prototype.parse = function () {
123 log ('start parsing');
124
125 while (true) {
126 var token = this.getNextToken ();
127 log ('token: ' + token.type + ' "' + token.value + '"');
128
129 if (token.type == 'start-tag') {
130 if (token.value == 'script') {
131 // 1. Create an element for the token in the HTML namespace.
132 var el = new JSElement (this.doc, token.value);
133
134 // 2. Mark the element as being "parser-inserted".
135 el.manakaiParserInserted = true;
136
137 // 3. Switch the tokeniser's content model flag to the CDATA state.
138 this.parseMode = 'script';
139
140 // 4.1. Collect all the character tokens.
141 while (true) {
142 var token = this.getNextToken ();
143 log ('token: ' + token.type + ' "' + token.value + '"');
144
145 if (token.type == 'char') {
146 // 5. Append a single Text node to the script element node.
147 el.manakaiAppendText (token.value);
148
149 // 4.2. Until it returns a token that is not a character token, or
150 // until it stops tokenising.
151 } else if (token.type == 'eof' ||
152 (token.type == 'end-tag' && token.value == 'script') ||
153 token.type == 'abort') {
154 // 6. Switched back to the PCDATA state.
155 this.parseMode = 'pcdata';
156
157 // 7.1. If the next token is not an end tag token with ...
158 if (token.type != 'end-tag') {
159 // 7.2. This is a parse error.
160 log ('Parse error: no </' + 'script>');
161
162 // 7.3. Mark the script element as "already executed".
163 el.manakaiAlreadyExecuted = true;
164 } else {
165 // 7.4. Ignore it.
166 //
167 }
168 break;
169 }
170 }
171
172 // 8.1. If the parser were originally created for the ...
173 if (this.fragmentParsingMode) {
174 // 8.2. Mark the script element as "already executed" and ...
175 el.alreadyExecuted = true;
176 continue;
177 }
178
179 // 9.1. Let the old insertion point have the same value as the ...
180 var oldInsertionPoint = this.insertionPoint;
181 // 9.2. Let the insertion point be just before the next input ...
182 this.setInsertionPoint (0);
183
184 // 10. Append the new element to the current node.
185 this.openElements[this.openElements.length - 1].appendChild (el);
186
187 // 11. Let the insertion point have the value of the old ...
188 this.setInsertionPoint (oldInsertionPoint);
189
190 // 12. If there is a script that will execute as soon as ...
191
192
193 } else {
194 var el = new JSElement (this.doc, token.value);
195 this.openElements[this.openElements.length - 1].appendChild (el);
196 this.openElements.push (el);
197 }
198 } else if (token.type == 'end-tag') {
199 if (this.openElements[this.openElements.length - 1].localName ==
200 token.value) {
201 this.openElements.pop ();
202 } else {
203 log ('parse error: unmatched end tag: ' + token.value);
204 }
205 } else if (token.type == 'char') {
206 this.openElements[this.openElements.length - 1].manakaiAppendText
207 (token.value);
208 } else if (token.type == 'eof') {
209 break;
210 } else if (token.type == 'abort') {
211 log ('parse: abort');
212 return;
213 }
214 }
215
216 log ('stop parsing');
217 } // parse
218
219 Parser.prototype.setInsertionPoint = function (ip) {
220 if (ip == undefined || ip == null || isNaN (ip)) {
221 log ('insertion point: set to undefined');
222 this.insertionPoint = undefined;
223 } else {
224 log ('insertion point: set to ' + ip +
225 ' (before "' + this.in.s.substring (0, 10) + '")');
226 this.insertionPoint = ip;
227 }
228 }; // setInsertionPoint
229
230 function JSDocument (p) {
231 this.childNodes = [];
232 this._parser = p;
233 } // JSDocument
234
235 function JSElement (doc, localName) {
236 this.localName = localName;
237 this.ownerDocument = doc;
238 this.childNodes = [];
239 } // JSElement
240
241 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
242 function (e) {
243 this.childNodes.push (e);
244 e.parentNode = this;
245
246 if (e.localName == 'script') {
247 log ('start running a script');
248
249 var doc = this.ownerDocument || this;
250 var p = doc._parser;
251
252 // 1. Script type
253 //
254
255 // 2.1. If scripting is disabled
256 //
257 // 2.2. If the script element was created by an XML ... innerHTML ...
258 //
259 // 2.3. If the user agent does not support the scripting language ...
260 //
261 // 2.4. If the script element has its "already executed" flag set
262 if (e.manakaiAlreadyExecuted) {
263 // 2.5. Abort these steps at this point.
264 log ('running a script: aborted');
265 return e;
266 }
267
268 // 3. Set the element's "already executed" flag.
269 e.manakaiAlreadyExecuted = true;
270
271 // 4. If the element has a src attribute, then a load for ...
272 // TODO: load an external resource
273
274 // 5. The first of the following options:
275
276 // 5.1.
277 if (/* TODO: If the document is still being parsed && */
278 e.defer && !e.async) {
279 // TODO
280 } else if (e.async && e.src != null) {
281 // TODO
282 } else if (e.async && e.src == null
283 /* && list of scripts that will execute asynchronously is not empty */) {
284 // TODO
285 } else if (e.src != null && e.manakaiParserInserted) {
286 // TODO
287 } else if (e.src != null) {
288 // TODO
289 } else {
290 executeScript (doc, e); // even if other scripts are already executing.
291 }
292
293 log ('end running a script');
294 }
295
296 return e;
297 }; // appendChild
298
299 function executeScript (doc, e) {
300 log ('executing a script block: start');
301
302 // If the load resulted in an error, then ... firing an error event ...
303
304 // If the load was successful
305 log ('load event fired at the script element');
306
307 if (true) {
308 // Scripting is enabled, Document.designMode is disabled,
309 // Document is the active document in its browsing context
310
311 var s;
312 if (e.src != null) {
313 // TODO: from external file
314 } else {
315 s = e.text;
316 }
317
318 parseAndRunScript (doc, s);
319 }
320
321 log ('executing a script block: end');
322 } // executeScript
323
324 function parseAndRunScript (doc, s) {
325 while (true) {
326 var matched = false;
327 s = s.replace (/^\s*document\.write\s*\(((?:'[^']*'|"[^"]*")\s*(?:,\s*(?:'[^']*'|"[^"]*"))*)\)\s*;\s*/, function (s, t) {
328 matched = true;
329 var args = [];
330 t.replace (/('[^']*'|"[^"]*")/g, function (s, v) {
331 args.push (v.substring (1, v.length - 1));
332 return '';
333 });
334 doc.write.apply (doc, args);
335 return '';
336 });
337 if (s == '') break;
338 if (!matched) {
339 log ('Script parse error: "' + s + '"');
340 break;
341 }
342 }
343 } // parseAndRunScript
344
345 function JSText (data) {
346 this.data = data;
347 } // JSText
348
349 JSDocument.prototype.manakaiAppendText =
350 JSElement.prototype.manakaiAppendText =
351 function (s) {
352 if (this.childNodes.length > 0 &&
353 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
354 this.childNodes[this.childNodes.length - 1].data += s;
355 } else {
356 this.childNodes.push (new JSText (s));
357 }
358 }; // manakaiAppendText
359
360 JSDocument.prototype.write = function () {
361 var p = this._parser;
362
363 // 1. If the insertion point is undefined, the open() method must be ...
364 if (p.insertionPoint == NaN || p.insertionPoint == undefined) {
365 // TODO: open ()
366 }
367
368 // 2. ... inserted into the input stream just before the insertion point.
369 var s = Array.join (arguments, '');
370 log ('document.write: insert "' + s + '"' +
371 ' before "' + p.in.s.substring (p.insertionPoint, p.insertionPoint + 10) + '"');
372 p.in.s = p.in.s.substring (0, p.insertionPoint) + s
373 + p.in.s.substring (p.insertionPoint, p.in.s.length);
374 p.insertionPoint += s.length;
375
376 // 3. If there is a script that will execute as soon as the parser resumes
377 // TODO
378
379 // 4. Process the characters that were inserted, ...
380 p.parse ();
381
382 // 5. Return
383 log ('document.write: return');
384 return;
385 }; // document.write
386
387 JSElement.prototype.__defineGetter__ ('text', function () {
388 var r = '';
389 for (var i = 0; i < this.childNodes.length; i++) {
390 if (this.childNodes[i] instanceof JSText) {
391 r += this.childNodes[i].data;
392 }
393 }
394 return r;
395 });
396
397 function dumpTree (n, indent) {
398 var r = '';
399 for (var i = 0; i < n.childNodes.length; i++) {
400 var node = n.childNodes[i];
401 if (node instanceof JSElement) {
402 r += '| ' + indent + node.localName + '\n';
403 r += dumpTree (node, indent + ' ');
404 } else if (node instanceof JSText) {
405 r += '| ' + indent + '"' + node.data + '"\n';
406 } else {
407 r += '| ' + indent + node + '\n';
408 }
409 }
410 return r;
411 } // dumpTree
412 </script>
413 </head>
414 <body onload="
415 document.sourceElement = document.getElementsByTagName ('textarea')[0];
416 document.logElement = document.getElementsByTagName ('output')[0];
417 update ();
418 ">
419
420 <textarea onchange=" update () ">&lt;html>
421 &lt;head>&lt;/head>&lt;body>
422 &lt;p>
423 &lt;script>
424 document.write ('aaaaaaa&lt;/p>&lt;script>document.write("cccccc");&lt;/', 'script>bbbbbb');
425 &lt;/script>
426 &lt;p>
427 </textarea>
428
429 <output></output>
430
431 </body>
432 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24