1 |
<!DOCTYPE HTML> |
2 |
<html lang=en> |
3 |
<head> |
4 |
<title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title> |
5 |
<style> |
6 |
textarea { |
7 |
display: block; |
8 |
width: 80%; |
9 |
margin-left: auto; |
10 |
margin-right: auto; |
11 |
min-height: 20em; |
12 |
} |
13 |
output { |
14 |
display: block; |
15 |
font-family: monospace; |
16 |
white-space: pre; |
17 |
} |
18 |
</style> |
19 |
<script> |
20 |
function update () { |
21 |
document.logElement.textContent = ''; |
22 |
var p = new Parser (); |
23 |
p.parse (new InputStream (document.sourceElement.value)); |
24 |
log (dumpTree (p.doc, '')); |
25 |
} // update |
26 |
|
27 |
function log (s) { |
28 |
document.logElement.appendChild (document.createTextNode (s + "\n")); |
29 |
} // log |
30 |
|
31 |
function InputStream (s) { |
32 |
this.s = s; |
33 |
} // InputStream |
34 |
|
35 |
function Parser () { |
36 |
this.parseMode = 'pcdata'; |
37 |
this.doc = new JSDocument (); |
38 |
this.openElements = [this.doc]; |
39 |
} // Parser |
40 |
|
41 |
Parser.prototype.getNextToken = function (i) { |
42 |
if (this.parseMode == 'script') { |
43 |
var token; |
44 |
i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, |
45 |
function (s, t) { |
46 |
token = {type: 'char', value: t}; |
47 |
return '<' + '/script>'; |
48 |
}); |
49 |
if (token) return token; |
50 |
i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () { |
51 |
token = {type: 'end-tag', value: 'script'}; |
52 |
return ''; |
53 |
}); |
54 |
if (token) return token; |
55 |
return {type: 'eof'}; |
56 |
} |
57 |
|
58 |
var token; |
59 |
i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) { |
60 |
token = {type: 'end-tag', value: e.toLowerCase ()}; |
61 |
return ''; |
62 |
}); |
63 |
if (token) return token; |
64 |
i.s = i.s.replace (/^<([^>]+)>/, function (s, e) { |
65 |
token = {type: 'start-tag', value: e.toLowerCase ()}; |
66 |
return ''; |
67 |
}); |
68 |
if (token) return token; |
69 |
i.s = i.s.replace (/^[^<]+/, function (s) { |
70 |
token = {type: 'char', value: s}; |
71 |
return ''; |
72 |
}); |
73 |
if (token) return token; |
74 |
i.s = i.s.replace (/^[\s\S]/, function (s) { |
75 |
token = {type: 'char', value: s}; |
76 |
return ''; |
77 |
}); |
78 |
if (token) return token; |
79 |
return {type: 'eof'}; |
80 |
} // getNextToken |
81 |
|
82 |
Parser.prototype.parse = function (i) { |
83 |
log ('start parsing'); |
84 |
|
85 |
while (true) { |
86 |
var token = this.getNextToken (i); |
87 |
log ('token: ' + token.type + ' "' + token.value + '"'); |
88 |
|
89 |
if (token.type == 'start-tag') { |
90 |
var el = new JSElement (token.value); |
91 |
if (token.value == 'script') { |
92 |
this.parseMode = 'script'; |
93 |
|
94 |
while (true) { |
95 |
var token = this.getNextToken (i); |
96 |
log ('token: ' + token.type + ' "' + token.value + '"'); |
97 |
|
98 |
if (token.type == 'char') { |
99 |
el.manakaiAppendText (token.value); |
100 |
} else if (token.type == 'eof' || |
101 |
(token.type == 'end-tag' && token.value == 'script')) { |
102 |
this.parseMode = 'pcdata'; |
103 |
break; |
104 |
} |
105 |
} |
106 |
|
107 |
this.openElements[this.openElements.length - 1].appendChild (el); |
108 |
} else { |
109 |
this.openElements[this.openElements.length - 1].appendChild (el); |
110 |
this.openElements.push (el); |
111 |
} |
112 |
} else if (token.type == 'end-tag') { |
113 |
if (this.openElements[this.openElements.length - 1].localName == |
114 |
token.value) { |
115 |
this.openElements.pop (); |
116 |
} else { |
117 |
log ('parse error: unmatched end tag: ' + token.value); |
118 |
} |
119 |
} else if (token.type == 'eof') { |
120 |
break; |
121 |
} |
122 |
} |
123 |
|
124 |
log ('stop parsing'); |
125 |
} // parse |
126 |
|
127 |
function JSDocument () { |
128 |
this.childNodes = []; |
129 |
} // JSDocument |
130 |
|
131 |
function JSElement (localName) { |
132 |
this.localName = localName; |
133 |
this.childNodes = []; |
134 |
} // JSElement |
135 |
|
136 |
JSDocument.prototype.appendChild = JSElement.prototype.appendChild = |
137 |
function (e) { |
138 |
this.childNodes.push (e); |
139 |
e.parentNode = this; |
140 |
return e; |
141 |
}; // appendChild |
142 |
|
143 |
function JSText (data) { |
144 |
this.data = data; |
145 |
} // JSText |
146 |
|
147 |
JSDocument.prototype.manakaiAppendText = |
148 |
JSElement.prototype.manakaiAppendText = |
149 |
function (s) { |
150 |
if (this.childNodes.length > 0 && |
151 |
this.childNodes[this.childNodes.length - 1] instanceof JSText) { |
152 |
this.childNodes[this.childNodes.length - 1].data += s; |
153 |
} else { |
154 |
this.childNodes.push (new JSText (s)); |
155 |
} |
156 |
}; // manakaiAppendText |
157 |
|
158 |
function dumpTree (n, indent) { |
159 |
var r = ''; |
160 |
for (var i = 0; i < n.childNodes.length; i++) { |
161 |
var node = n.childNodes[i]; |
162 |
if (node instanceof JSElement) { |
163 |
r += '| ' + indent + node.localName + '\n'; |
164 |
r += dumpTree (node, indent + ' '); |
165 |
} else if (node instanceof JSText) { |
166 |
r += '| ' + indent + '"' + node.data + '"\n'; |
167 |
} else { |
168 |
r += '| ' + indent + node + '\n'; |
169 |
} |
170 |
} |
171 |
return r; |
172 |
} // dumpTree |
173 |
</script> |
174 |
</head> |
175 |
<body onload=" |
176 |
document.sourceElement = document.getElementsByTagName ('textarea')[0]; |
177 |
document.logElement = document.getElementsByTagName ('output')[0]; |
178 |
update (); |
179 |
"> |
180 |
|
181 |
<textarea onchange=" update () "><html> |
182 |
<head></head><body> |
183 |
<p> |
184 |
<script> |
185 |
document.write ('aaaaaaa</p>\n<script>\ndocument.write("cccccc")\n</', 'script>\nbbbbbb'); |
186 |
</script> |
187 |
<p> |
188 |
</textarea> |
189 |
|
190 |
<output></output> |
191 |
|
192 |
</body> |
193 |
</html> |