/[suikacvs]/markup/html/scripting-parser/parser.html
Suika

Contents of /markup/html/scripting-parser/parser.html

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download) (as text)
Sun Apr 20 06:07:24 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
File MIME type: text/html
First version with no support for scripting

1 wakaba 1.1 <!DOCTYPE HTML>
2     <html lang=en>
3     <head>
4     <title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title>
5     <style>
6     textarea {
7     display: block;
8     width: 80%;
9     margin-left: auto;
10     margin-right: auto;
11     min-height: 20em;
12     }
13     output {
14     display: block;
15     font-family: monospace;
16     white-space: pre;
17     }
18     </style>
19     <script>
20     function update () {
21     document.logElement.textContent = '';
22     var p = new Parser ();
23     p.parse (new InputStream (document.sourceElement.value));
24     log (dumpTree (p.doc, ''));
25     } // update
26    
27     function log (s) {
28     document.logElement.appendChild (document.createTextNode (s + "\n"));
29     } // log
30    
31     function InputStream (s) {
32     this.s = s;
33     } // InputStream
34    
35     function Parser () {
36     this.parseMode = 'pcdata';
37     this.doc = new JSDocument ();
38     this.openElements = [this.doc];
39     } // Parser
40    
41     Parser.prototype.getNextToken = function (i) {
42     if (this.parseMode == 'script') {
43     var token;
44     i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/,
45     function (s, t) {
46     token = {type: 'char', value: t};
47     return '<' + '/script>';
48     });
49     if (token) return token;
50     i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () {
51     token = {type: 'end-tag', value: 'script'};
52     return '';
53     });
54     if (token) return token;
55     return {type: 'eof'};
56     }
57    
58     var token;
59     i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) {
60     token = {type: 'end-tag', value: e.toLowerCase ()};
61     return '';
62     });
63     if (token) return token;
64     i.s = i.s.replace (/^<([^>]+)>/, function (s, e) {
65     token = {type: 'start-tag', value: e.toLowerCase ()};
66     return '';
67     });
68     if (token) return token;
69     i.s = i.s.replace (/^[^<]+/, function (s) {
70     token = {type: 'char', value: s};
71     return '';
72     });
73     if (token) return token;
74     i.s = i.s.replace (/^[\s\S]/, function (s) {
75     token = {type: 'char', value: s};
76     return '';
77     });
78     if (token) return token;
79     return {type: 'eof'};
80     } // getNextToken
81    
82     Parser.prototype.parse = function (i) {
83     log ('start parsing');
84    
85     while (true) {
86     var token = this.getNextToken (i);
87     log ('token: ' + token.type + ' "' + token.value + '"');
88    
89     if (token.type == 'start-tag') {
90     var el = new JSElement (token.value);
91     if (token.value == 'script') {
92     this.parseMode = 'script';
93    
94     while (true) {
95     var token = this.getNextToken (i);
96     log ('token: ' + token.type + ' "' + token.value + '"');
97    
98     if (token.type == 'char') {
99     el.manakaiAppendText (token.value);
100     } else if (token.type == 'eof' ||
101     (token.type == 'end-tag' && token.value == 'script')) {
102     this.parseMode = 'pcdata';
103     break;
104     }
105     }
106    
107     this.openElements[this.openElements.length - 1].appendChild (el);
108     } else {
109     this.openElements[this.openElements.length - 1].appendChild (el);
110     this.openElements.push (el);
111     }
112     } else if (token.type == 'end-tag') {
113     if (this.openElements[this.openElements.length - 1].localName ==
114     token.value) {
115     this.openElements.pop ();
116     } else {
117     log ('parse error: unmatched end tag: ' + token.value);
118     }
119     } else if (token.type == 'eof') {
120     break;
121     }
122     }
123    
124     log ('stop parsing');
125     } // parse
126    
127     function JSDocument () {
128     this.childNodes = [];
129     } // JSDocument
130    
131     function JSElement (localName) {
132     this.localName = localName;
133     this.childNodes = [];
134     } // JSElement
135    
136     JSDocument.prototype.appendChild = JSElement.prototype.appendChild =
137     function (e) {
138     this.childNodes.push (e);
139     e.parentNode = this;
140     return e;
141     }; // appendChild
142    
143     function JSText (data) {
144     this.data = data;
145     } // JSText
146    
147     JSDocument.prototype.manakaiAppendText =
148     JSElement.prototype.manakaiAppendText =
149     function (s) {
150     if (this.childNodes.length > 0 &&
151     this.childNodes[this.childNodes.length - 1] instanceof JSText) {
152     this.childNodes[this.childNodes.length - 1].data += s;
153     } else {
154     this.childNodes.push (new JSText (s));
155     }
156     }; // manakaiAppendText
157    
158     function dumpTree (n, indent) {
159     var r = '';
160     for (var i = 0; i < n.childNodes.length; i++) {
161     var node = n.childNodes[i];
162     if (node instanceof JSElement) {
163     r += '| ' + indent + node.localName + '\n';
164     r += dumpTree (node, indent + ' ');
165     } else if (node instanceof JSText) {
166     r += '| ' + indent + '"' + node.data + '"\n';
167     } else {
168     r += '| ' + indent + node + '\n';
169     }
170     }
171     return r;
172     } // dumpTree
173     </script>
174     </head>
175     <body onload="
176     document.sourceElement = document.getElementsByTagName ('textarea')[0];
177     document.logElement = document.getElementsByTagName ('output')[0];
178     update ();
179     ">
180    
181     <textarea onchange=" update () ">&lt;html>
182     &lt;head>&lt;/head>&lt;body>
183     &lt;p>
184     &lt;script>
185     document.write ('aaaaaaa&lt;/p>\n&lt;script>\ndocument.write("cccccc")\n&lt;/', 'script>\nbbbbbb');
186     &lt;/script>
187     &lt;p>
188     </textarea>
189    
190     <output></output>
191    
192     </body>
193     </html>

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24