| 1 |
<!DOCTYPE HTML> |
| 2 |
<html lang=en> |
| 3 |
<head> |
| 4 |
<title>Demo of HTML5 Parsing Algorithm with Scripting Enabled</title> |
| 5 |
<style> |
| 6 |
textarea { |
| 7 |
display: block; |
| 8 |
width: 80%; |
| 9 |
margin-left: auto; |
| 10 |
margin-right: auto; |
| 11 |
min-height: 20em; |
| 12 |
} |
| 13 |
output { |
| 14 |
display: block; |
| 15 |
font-family: monospace; |
| 16 |
white-space: pre; |
| 17 |
} |
| 18 |
</style> |
| 19 |
<script> |
| 20 |
function update () { |
| 21 |
document.logElement.textContent = ''; |
| 22 |
var p = new Parser (); |
| 23 |
p.parse (new InputStream (document.sourceElement.value)); |
| 24 |
log (dumpTree (p.doc, '')); |
| 25 |
} // update |
| 26 |
|
| 27 |
function log (s) { |
| 28 |
document.logElement.appendChild (document.createTextNode (s + "\n")); |
| 29 |
} // log |
| 30 |
|
| 31 |
function InputStream (s) { |
| 32 |
this.s = s; |
| 33 |
} // InputStream |
| 34 |
|
| 35 |
function Parser () { |
| 36 |
this.parseMode = 'pcdata'; |
| 37 |
this.doc = new JSDocument (); |
| 38 |
this.openElements = [this.doc]; |
| 39 |
} // Parser |
| 40 |
|
| 41 |
Parser.prototype.getNextToken = function (i) { |
| 42 |
if (this.parseMode == 'script') { |
| 43 |
var token; |
| 44 |
i.s = i.s.replace (/^([\s\S]+?)<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, |
| 45 |
function (s, t) { |
| 46 |
token = {type: 'char', value: t}; |
| 47 |
return '<' + '/script>'; |
| 48 |
}); |
| 49 |
if (token) return token; |
| 50 |
i.s = i.s.replace (/^<\/[Ss][Cc][Rr][Ii][Pp][Tt]>/, function () { |
| 51 |
token = {type: 'end-tag', value: 'script'}; |
| 52 |
return ''; |
| 53 |
}); |
| 54 |
if (token) return token; |
| 55 |
return {type: 'eof'}; |
| 56 |
} |
| 57 |
|
| 58 |
var token; |
| 59 |
i.s = i.s.replace (/^<\/([^>]+)>/, function (s, e) { |
| 60 |
token = {type: 'end-tag', value: e.toLowerCase ()}; |
| 61 |
return ''; |
| 62 |
}); |
| 63 |
if (token) return token; |
| 64 |
i.s = i.s.replace (/^<([^>]+)>/, function (s, e) { |
| 65 |
token = {type: 'start-tag', value: e.toLowerCase ()}; |
| 66 |
return ''; |
| 67 |
}); |
| 68 |
if (token) return token; |
| 69 |
i.s = i.s.replace (/^[^<]+/, function (s) { |
| 70 |
token = {type: 'char', value: s}; |
| 71 |
return ''; |
| 72 |
}); |
| 73 |
if (token) return token; |
| 74 |
i.s = i.s.replace (/^[\s\S]/, function (s) { |
| 75 |
token = {type: 'char', value: s}; |
| 76 |
return ''; |
| 77 |
}); |
| 78 |
if (token) return token; |
| 79 |
return {type: 'eof'}; |
| 80 |
} // getNextToken |
| 81 |
|
| 82 |
Parser.prototype.parse = function (i) { |
| 83 |
log ('start parsing'); |
| 84 |
|
| 85 |
while (true) { |
| 86 |
var token = this.getNextToken (i); |
| 87 |
log ('token: ' + token.type + ' "' + token.value + '"'); |
| 88 |
|
| 89 |
if (token.type == 'start-tag') { |
| 90 |
var el = new JSElement (token.value); |
| 91 |
if (token.value == 'script') { |
| 92 |
this.parseMode = 'script'; |
| 93 |
|
| 94 |
while (true) { |
| 95 |
var token = this.getNextToken (i); |
| 96 |
log ('token: ' + token.type + ' "' + token.value + '"'); |
| 97 |
|
| 98 |
if (token.type == 'char') { |
| 99 |
el.manakaiAppendText (token.value); |
| 100 |
} else if (token.type == 'eof' || |
| 101 |
(token.type == 'end-tag' && token.value == 'script')) { |
| 102 |
this.parseMode = 'pcdata'; |
| 103 |
break; |
| 104 |
} |
| 105 |
} |
| 106 |
|
| 107 |
this.openElements[this.openElements.length - 1].appendChild (el); |
| 108 |
} else { |
| 109 |
this.openElements[this.openElements.length - 1].appendChild (el); |
| 110 |
this.openElements.push (el); |
| 111 |
} |
| 112 |
} else if (token.type == 'end-tag') { |
| 113 |
if (this.openElements[this.openElements.length - 1].localName == |
| 114 |
token.value) { |
| 115 |
this.openElements.pop (); |
| 116 |
} else { |
| 117 |
log ('parse error: unmatched end tag: ' + token.value); |
| 118 |
} |
| 119 |
} else if (token.type == 'eof') { |
| 120 |
break; |
| 121 |
} |
| 122 |
} |
| 123 |
|
| 124 |
log ('stop parsing'); |
| 125 |
} // parse |
| 126 |
|
| 127 |
function JSDocument () { |
| 128 |
this.childNodes = []; |
| 129 |
} // JSDocument |
| 130 |
|
| 131 |
function JSElement (localName) { |
| 132 |
this.localName = localName; |
| 133 |
this.childNodes = []; |
| 134 |
} // JSElement |
| 135 |
|
| 136 |
JSDocument.prototype.appendChild = JSElement.prototype.appendChild = |
| 137 |
function (e) { |
| 138 |
this.childNodes.push (e); |
| 139 |
e.parentNode = this; |
| 140 |
return e; |
| 141 |
}; // appendChild |
| 142 |
|
| 143 |
function JSText (data) { |
| 144 |
this.data = data; |
| 145 |
} // JSText |
| 146 |
|
| 147 |
JSDocument.prototype.manakaiAppendText = |
| 148 |
JSElement.prototype.manakaiAppendText = |
| 149 |
function (s) { |
| 150 |
if (this.childNodes.length > 0 && |
| 151 |
this.childNodes[this.childNodes.length - 1] instanceof JSText) { |
| 152 |
this.childNodes[this.childNodes.length - 1].data += s; |
| 153 |
} else { |
| 154 |
this.childNodes.push (new JSText (s)); |
| 155 |
} |
| 156 |
}; // manakaiAppendText |
| 157 |
|
| 158 |
function dumpTree (n, indent) { |
| 159 |
var r = ''; |
| 160 |
for (var i = 0; i < n.childNodes.length; i++) { |
| 161 |
var node = n.childNodes[i]; |
| 162 |
if (node instanceof JSElement) { |
| 163 |
r += '| ' + indent + node.localName + '\n'; |
| 164 |
r += dumpTree (node, indent + ' '); |
| 165 |
} else if (node instanceof JSText) { |
| 166 |
r += '| ' + indent + '"' + node.data + '"\n'; |
| 167 |
} else { |
| 168 |
r += '| ' + indent + node + '\n'; |
| 169 |
} |
| 170 |
} |
| 171 |
return r; |
| 172 |
} // dumpTree |
| 173 |
</script> |
| 174 |
</head> |
| 175 |
<body onload=" |
| 176 |
document.sourceElement = document.getElementsByTagName ('textarea')[0]; |
| 177 |
document.logElement = document.getElementsByTagName ('output')[0]; |
| 178 |
update (); |
| 179 |
"> |
| 180 |
|
| 181 |
<textarea onchange=" update () "><html> |
| 182 |
<head></head><body> |
| 183 |
<p> |
| 184 |
<script> |
| 185 |
document.write ('aaaaaaa</p>\n<script>\ndocument.write("cccccc")\n</', 'script>\nbbbbbb'); |
| 186 |
</script> |
| 187 |
<p> |
| 188 |
</textarea> |
| 189 |
|
| 190 |
<output></output> |
| 191 |
|
| 192 |
</body> |
| 193 |
</html> |