/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download) (as text)
Sun Apr 20 06:07:24 2008 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
File MIME type: text/html
First version with no support for scripting

1 <!DOCTYPE HTML>
2 <html lang=en>
3 <head>
4 <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5 <style>
6 textarea {
7 display: block;
8 width: 80%;
9 margin-left: auto;
10 margin-right: auto;
11 min-height: 20em;
12 }
13 output {
14 display: block;
15 font-family: monospace;
16 white-space: pre;
17 }
18 </style>
19 <script>
20 function update () {
21 document.logElement.textContent = '';
22 var p = new Parser ();
23 p.parse (new InputStream (document.sourceElement.value));
24 log (dumpTree (p.doc, ''));
25 } // update
26
27 function log (s) {
28 document.logElement.appendChild (document.createTextNode (s + "\n"));
29 } // log
30
31 function InputStream (s) {
32 this.s = s;
33 } // InputStream
34
35 function Parser () {
36 this.parseMode = 'pcdata';
37 this.doc = new JSDocument ();
38 this.openElements = [this.doc];
39 } // Parser
40
41 Parser.prototype.getNextToken = function (i) {
42 if (this.parseMode == 'script') {
43 var token;
44 i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,
45 function (s, t) {
46 token = {type: 'char', value: t};
47 return '<' + '/script>';
48 });
49 if (token) return token;
50 i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {
51 token = {type: 'end-tag', value: 'script'};
52 return '';
53 });
54 if (token) return token;
55 return {type: 'eof'};
56 }
57
58 var token;
59 i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
60 token = {type: 'end-tag', value: e.toLowerCase ()};
61 return '';
62 });
63 if (token) return token;
64 i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
65 token = {type: 'start-tag', value: e.toLowerCase ()};
66 return '';
67 });
68 if (token) return token;
69 i.s = i.s.replace (/^[^<]+/, function (s) {
70 token = {type: 'char', value: s};
71 return '';
72 });
73 if (token) return token;
74 i.s = i.s.replace (/^[\s\S]/, function (s) {
75 token = {type: 'char', value: s};
76 return '';
77 });
78 if (token) return token;
79 return {type: 'eof'};
80 } // getNextToken
81
82 Parser.prototype.parse = function (i) {
83 log ('start parsing');
84
85 while (true) {
86 var token = this.getNextToken (i);
87 log ('token: ' + token.type + ' "' + token.value + '"');
88
89 if (token.type == 'start-tag') {
90 var el = new JSElement (token.value);
91 if (token.value == 'script') {
92 this.parseMode = 'script';
93
94 while (true) {
95 var token = this.getNextToken (i);
96 log ('token: ' + token.type + ' "' + token.value + '"');
97
98 if (token.type == 'char') {
99 el.manakaiAppendText (token.value);
100 } else if (token.type == 'eof' ||
101 (token.type == 'end-tag' && token.value == 'script')) {
102 this.parseMode = 'pcdata';
103 break;
104 }
105 }
106
107 this.openElements[this.openElements.length - 1].appendChild (el);
108 } else {
109 this.openElements[this.openElements.length - 1].appendChild (el);
110 this.openElements.push (el);
111 }
112 } else if (token.type == 'end-tag') {
113 if (this.openElements[this.openElements.length - 1].localName ==
114 token.value) {
115 this.openElements.pop ();
116 } else {
117 log ('parse error: unmatched end tag: ' + token.value);
118 }
119 } else if (token.type == 'eof') {
120 break;
121 }
122 }
123
124 log ('stop parsing');
125 } // parse
126
127 function JSDocument () {
128 this.childNodes = [];
129 } // JSDocument
130
131 function JSElement (localName) {
132 this.localName = localName;
133 this.childNodes = [];
134 } // JSElement
135
136 JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
137 function (e) {
138 this.childNodes.push (e);
139 e.parentNode = this;
140 return e;
141 }; // appendChild
142
143 function JSText (data) {
144 this.data = data;
145 } // JSText
146
147 JSDocument.prototype.manakaiAppendText =
148 JSElement.prototype.manakaiAppendText =
149 function (s) {
150 if (this.childNodes.length > 0 &&
151 this.childNodes[this.childNodes.length - 1] instanceof JSText) {
152 this.childNodes[this.childNodes.length - 1].data += s;
153 } else {
154 this.childNodes.push (new JSText (s));
155 }
156 }; // manakaiAppendText
157
158 function dumpTree (n, indent) {
159 var r = '';
160 for (var i = 0; i < n.childNodes.length; i++) {
161 var node = n.childNodes[i];
162 if (node instanceof JSElement) {
163 r += '| ' + indent + node.localName + '\n';
164 r += dumpTree (node, indent + ' ');
165 } else if (node instanceof JSText) {
166 r += '| ' + indent + '"' + node.data + '"\n';
167 } else {
168 r += '| ' + indent + node + '\n';
169 }
170 }
171 return r;
172 } // dumpTree
173 </script>
174 </head>
175 <body onload="
176 document.sourceElement = document.getElementsByTagName ('textarea')[0];
177 document.logElement = document.getElementsByTagName ('output')[0];
178 update ();
179 ">
180
181 <textarea onchange=" update () ">&lt;html>
182 &lt;head>&lt;/head>&lt;body>
183 &lt;p>
184 &lt;script>
185 document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');
186 &lt;/script>
187 &lt;p>
188 </textarea>
189
190 <output></output>
191
192 </body>
193 </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24