1 |
wakaba |
1.1 |
<!DOCTYPE HTML> |
2 |
|
|
<html lang=en> |
3 |
|
|
<head> |
4 |
|
|
<title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title> |
5 |
|
|
<style> |
6 |
|
|
textarea { |
7 |
|
|
display: block; |
8 |
|
|
width: 80%; |
9 |
|
|
margin-left: auto; |
10 |
|
|
margin-right: auto; |
11 |
|
|
min-height: 20em; |
12 |
|
|
} |
13 |
|
|
output { |
14 |
|
|
display: block; |
15 |
|
|
font-family: monospace; |
16 |
|
|
white-space: pre; |
17 |
|
|
} |
18 |
|
|
</style> |
19 |
|
|
<script> |
20 |
|
|
function update () { |
21 |
|
|
document.logElement.textContent = ''; |
22 |
|
|
var p = new Parser (); |
23 |
|
|
p.parse (new InputStream (document.sourceElement.value)); |
24 |
|
|
log (dumpTree (p.doc, '')); |
25 |
|
|
} // update |
26 |
|
|
|
27 |
|
|
function log (s) { |
28 |
|
|
document.logElement.appendChild (document.createTextNode (s + "\n")); |
29 |
|
|
} // log |
30 |
|
|
|
31 |
|
|
function InputStream (s) { |
32 |
|
|
this.s = s; |
33 |
|
|
} // InputStream |
34 |
|
|
|
35 |
|
|
function Parser () { |
36 |
|
|
this.parseMode = 'pcdata'; |
37 |
|
|
this.doc = new JSDocument (); |
38 |
|
|
this.openElements = [this.doc]; |
39 |
|
|
} // Parser |
40 |
|
|
|
41 |
|
|
Parser.prototype.getNextToken = function (i) { |
42 |
|
|
if (this.parseMode == 'script') { |
43 |
|
|
var token; |
44 |
|
|
i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, |
45 |
|
|
function (s, t) { |
46 |
|
|
token = {type: 'char', value: t}; |
47 |
|
|
return '<' + '/script>'; |
48 |
|
|
}); |
49 |
|
|
if (token) return token; |
50 |
|
|
i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () { |
51 |
|
|
token = {type: 'end-tag', value: 'script'}; |
52 |
|
|
return ''; |
53 |
|
|
}); |
54 |
|
|
if (token) return token; |
55 |
|
|
return {type: 'eof'}; |
56 |
|
|
} |
57 |
|
|
|
58 |
|
|
var token; |
59 |
|
|
i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) { |
60 |
|
|
token = {type: 'end-tag', value: e.toLowerCase ()}; |
61 |
|
|
return ''; |
62 |
|
|
}); |
63 |
|
|
if (token) return token; |
64 |
|
|
i.s = i.s.replace (/^<([^>]+)>/, function (s, e) { |
65 |
|
|
token = {type: 'start-tag', value: e.toLowerCase ()}; |
66 |
|
|
return ''; |
67 |
|
|
}); |
68 |
|
|
if (token) return token; |
69 |
|
|
i.s = i.s.replace (/^[^<]+/, function (s) { |
70 |
|
|
token = {type: 'char', value: s}; |
71 |
|
|
return ''; |
72 |
|
|
}); |
73 |
|
|
if (token) return token; |
74 |
|
|
i.s = i.s.replace (/^[\s\S]/, function (s) { |
75 |
|
|
token = {type: 'char', value: s}; |
76 |
|
|
return ''; |
77 |
|
|
}); |
78 |
|
|
if (token) return token; |
79 |
|
|
return {type: 'eof'}; |
80 |
|
|
} // getNextToken |
81 |
|
|
|
82 |
|
|
Parser.prototype.parse = function (i) { |
83 |
|
|
log ('start parsing'); |
84 |
|
|
|
85 |
|
|
while (true) { |
86 |
|
|
var token = this.getNextToken (i); |
87 |
|
|
log ('token: ' + token.type + ' "' + token.value + '"'); |
88 |
|
|
|
89 |
|
|
if (token.type == 'start-tag') { |
90 |
|
|
var el = new JSElement (token.value); |
91 |
|
|
if (token.value == 'script') { |
92 |
|
|
this.parseMode = 'script'; |
93 |
|
|
|
94 |
|
|
while (true) { |
95 |
|
|
var token = this.getNextToken (i); |
96 |
|
|
log ('token: ' + token.type + ' "' + token.value + '"'); |
97 |
|
|
|
98 |
|
|
if (token.type == 'char') { |
99 |
|
|
el.manakaiAppendText (token.value); |
100 |
|
|
} else if (token.type == 'eof' || |
101 |
|
|
(token.type == 'end-tag' && token.value == 'script')) { |
102 |
|
|
this.parseMode = 'pcdata'; |
103 |
|
|
break; |
104 |
|
|
} |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
this.openElements[this.openElements.length - 1].appendChild (el); |
108 |
|
|
} else { |
109 |
|
|
this.openElements[this.openElements.length - 1].appendChild (el); |
110 |
|
|
this.openElements.push (el); |
111 |
|
|
} |
112 |
|
|
} else if (token.type == 'end-tag') { |
113 |
|
|
if (this.openElements[this.openElements.length - 1].localName == |
114 |
|
|
token.value) { |
115 |
|
|
this.openElements.pop (); |
116 |
|
|
} else { |
117 |
|
|
log ('parse error: unmatched end tag: ' + token.value); |
118 |
|
|
} |
119 |
|
|
} else if (token.type == 'eof') { |
120 |
|
|
break; |
121 |
|
|
} |
122 |
|
|
} |
123 |
|
|
|
124 |
|
|
log ('stop parsing'); |
125 |
|
|
} // parse |
126 |
|
|
|
127 |
|
|
function JSDocument () { |
128 |
|
|
this.childNodes = []; |
129 |
|
|
} // JSDocument |
130 |
|
|
|
131 |
|
|
function JSElement (localName) { |
132 |
|
|
this.localName = localName; |
133 |
|
|
this.childNodes = []; |
134 |
|
|
} // JSElement |
135 |
|
|
|
136 |
|
|
JSDocument.prototype.appendChild = JSElement.prototype.appendChild = |
137 |
|
|
function (e) { |
138 |
|
|
this.childNodes.push (e); |
139 |
|
|
e.parentNode = this; |
140 |
|
|
return e; |
141 |
|
|
}; // appendChild |
142 |
|
|
|
143 |
|
|
function JSText (data) { |
144 |
|
|
this.data = data; |
145 |
|
|
} // JSText |
146 |
|
|
|
147 |
|
|
JSDocument.prototype.manakaiAppendText = |
148 |
|
|
JSElement.prototype.manakaiAppendText = |
149 |
|
|
function (s) { |
150 |
|
|
if (this.childNodes.length > 0 && |
151 |
|
|
this.childNodes[this.childNodes.length - 1] instanceof JSText) { |
152 |
|
|
this.childNodes[this.childNodes.length - 1].data += s; |
153 |
|
|
} else { |
154 |
|
|
this.childNodes.push (new JSText (s)); |
155 |
|
|
} |
156 |
|
|
}; // manakaiAppendText |
157 |
|
|
|
158 |
|
|
function dumpTree (n, indent) { |
159 |
|
|
var r = ''; |
160 |
|
|
for (var i = 0; i < n.childNodes.length; i++) { |
161 |
|
|
var node = n.childNodes[i]; |
162 |
|
|
if (node instanceof JSElement) { |
163 |
|
|
r += '| ' + indent + node.localName + '\n'; |
164 |
|
|
r += dumpTree (node, indent + ' '); |
165 |
|
|
} else if (node instanceof JSText) { |
166 |
|
|
r += '| ' + indent + '"' + node.data + '"\n'; |
167 |
|
|
} else { |
168 |
|
|
r += '| ' + indent + node + '\n'; |
169 |
|
|
} |
170 |
|
|
} |
171 |
|
|
return r; |
172 |
|
|
} // dumpTree |
173 |
|
|
</script> |
174 |
|
|
</head> |
175 |
|
|
<body onload=" |
176 |
|
|
document.sourceElement = document.getElementsByTagName ('textarea')[0]; |
177 |
|
|
document.logElement = document.getElementsByTagName ('output')[0]; |
178 |
|
|
update (); |
179 |
|
|
"> |
180 |
|
|
|
181 |
|
|
<textarea onchange=" update () "><html> |
182 |
|
|
<head></head><body> |
183 |
|
|
<p> |
184 |
|
|
<script> |
185 |
|
|
document.write ('aaaaaaa</p>\n<script>\ndocument.write("cccccc")\n</', 'script>\nbbbbbb'); |
186 |
|
|
</script> |
187 |
|
|
<p> |
188 |
|
|
</textarea> |
189 |
|
|
|
190 |
|
|
<output></output> |
191 |
|
|
|
192 |
|
|
</body> |
193 |
|
|
</html> |