| 83 |
doc = new JSDocument (this); |
doc = new JSDocument (this); |
| 84 |
doc.manakaiIsHTML = true; |
doc.manakaiIsHTML = true; |
| 85 |
} |
} |
| 86 |
|
this.nextToken = []; |
| 87 |
this.doc = doc; |
this.doc = doc; |
| 88 |
this.openElements = [doc]; |
this.openElements = [doc]; |
| 89 |
this.input = i; |
this.input = i; |
| 93 |
} // Parser |
} // Parser |
| 94 |
|
|
| 95 |
Parser.prototype.getNextToken = function () { |
Parser.prototype.getNextToken = function () { |
| 96 |
|
if (this.nextToken.length) { |
| 97 |
|
return this.nextToken.shift (); |
| 98 |
|
} |
| 99 |
|
|
| 100 |
var p = this; |
var p = this; |
| 101 |
var i = this.input; |
var i = this.input; |
| 102 |
if (this.parseMode == 'cdata') { |
if (this.parseMode == 'cdata') { |
| 151 |
i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) { |
i.s = i.s.replace (/^<\/([^>]+)(?:>|$)/, function (s, e) { |
| 152 |
if (p.insertionPoint < s.length || |
if (p.insertionPoint < s.length || |
| 153 |
(p.insertionPoint <= s.length && |
(p.insertionPoint <= s.length && |
| 154 |
s.substring (s.length - 1, 1) != '>')) { |
s.substring (s.length - 1, s.length) != '>')) { |
| 155 |
token = {type: 'abort'}; |
token = {type: 'abort'}; |
| 156 |
return s; |
return s; |
| 157 |
} |
} |
| 163 |
i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) { |
i.s = i.s.replace (/^<([^>]+)(?:>|$)/, function (s, e) { |
| 164 |
if (p.insertionPoint < s.length || |
if (p.insertionPoint < s.length || |
| 165 |
(p.insertionPoint <= s.length && |
(p.insertionPoint <= s.length && |
| 166 |
s.substring (s.length - 1, 1) != '>')) { |
s.substring (s.length - 1, s.length) != '>')) { |
| 167 |
token = {type: 'abort'}; |
token = {type: 'abort'}; |
| 168 |
return s; |
return s; |
| 169 |
} |
} |
| 226 |
var token = this.getNextToken (); |
var token = this.getNextToken (); |
| 227 |
log ('token: ' + token.type + ' "' + token.value + '"'); |
log ('token: ' + token.type + ' "' + token.value + '"'); |
| 228 |
|
|
| 229 |
|
if (this.cdataEndTagRequired) { |
| 230 |
|
// Generic CDATA parsing algorithm |
| 231 |
|
|
| 232 |
|
if (token.type != 'abort') { |
| 233 |
|
// 7. |
| 234 |
|
if (token.type == 'end-tag' && token.value == this.endTagName) { |
| 235 |
|
// 7.1. Ignores it. |
| 236 |
|
// |
| 237 |
|
} else { |
| 238 |
|
// 7.2. Parse error. |
| 239 |
|
log ('Parse error: no </' + this.endTagName + '>'); |
| 240 |
|
this.nextToken.unshift (token); |
| 241 |
|
} |
| 242 |
|
this.cdataEndTagRequired = false; |
| 243 |
|
continue; |
| 244 |
|
} |
| 245 |
|
} |
| 246 |
|
|
| 247 |
if (token.type == 'start-tag') { |
if (token.type == 'start-tag') { |
| 248 |
if (token.value == 'script') { |
if (token.value == 'script') { |
| 249 |
// 1. Create an element for the token in the HTML namespace. |
// 1. Create an element for the token in the HTML namespace. |
| 280 |
if (!(token.type == 'end-tag' && token.value == 'script')) { |
if (!(token.type == 'end-tag' && token.value == 'script')) { |
| 281 |
// 7.2. This is a parse error. |
// 7.2. This is a parse error. |
| 282 |
log ('Parse error: no </' + 'script>'); |
log ('Parse error: no </' + 'script>'); |
| 283 |
|
this.nextToken.unshift (token); |
| 284 |
|
|
| 285 |
// 7.3. Mark the script element as "already executed". |
// 7.3. Mark the script element as "already executed". |
| 286 |
el.manakaiAlreadyExecuted = true; |
el.manakaiAlreadyExecuted = true; |
| 295 |
// 8.1. If the parser were originally created for the ... |
// 8.1. If the parser were originally created for the ... |
| 296 |
if (this.fragmentParsingMode) { |
if (this.fragmentParsingMode) { |
| 297 |
// 8.2. Mark the script element as "already executed" and ... |
// 8.2. Mark the script element as "already executed" and ... |
| 298 |
el.alreadyExecuted = true; |
el.manakaiAlreadyExecuted = true; |
| 299 |
continue; |
continue; |
| 300 |
} |
} |
| 301 |
|
|
| 312 |
oldInsertionPoint += this.insertionPoint; |
oldInsertionPoint += this.insertionPoint; |
| 313 |
this.setInsertionPoint (oldInsertionPoint); |
this.setInsertionPoint (oldInsertionPoint); |
| 314 |
|
|
| 315 |
// 12. If there is a script that will execute as soon as ... |
// 12. If there is a pending external script |
| 316 |
while (this.scriptExecutedWhenParserResumes) { |
while (this.pendingExternalScript) { |
| 317 |
// 12.1. If the tree construction stage is being called reentrantly |
// 12.1. If the tree construction stage is being called reentrantly |
| 318 |
if (this.reentrant) { |
if (this.reentrant) { |
| 319 |
log ('parse: abort (reentrance)'); |
log ('parse: abort (reentrance)'); |
| 323 |
// 12.2. Otherwise |
// 12.2. Otherwise |
| 324 |
} else { |
} else { |
| 325 |
// 1. |
// 1. |
| 326 |
var script = this.scriptExecutedWhenParserResumes; |
var script = this.pendingExternalScript; |
| 327 |
this.scriptExecutedWhenParserResumes = null; |
this.pendingExternalScript = null; |
| 328 |
|
|
| 329 |
// 2. Pause until the script has completed loading. |
// 2. Pause until the script has completed loading. |
| 330 |
// |
// |
| 372 |
// 6. Switched back to the PCDATA state. |
// 6. Switched back to the PCDATA state. |
| 373 |
this.parseMode = 'pcdata'; |
this.parseMode = 'pcdata'; |
| 374 |
|
|
| 375 |
|
if (token.type == 'abort') { |
| 376 |
|
this.cdataEndTagRequired = true; |
| 377 |
|
break; |
| 378 |
|
} |
| 379 |
|
|
| 380 |
// 7.1. If the next token is not an end tag token with ... |
// 7.1. If the next token is not an end tag token with ... |
| 381 |
if (!(token.type == 'end-tag' && |
if (!(token.type == 'end-tag' && |
| 382 |
token.value == this.endTagName)) { |
token.value == this.endTagName)) { |
| 383 |
// 7.2. This is a parse error. |
// 7.2. This is a parse error. |
| 384 |
log ('Parse error: no </' + this.endTagName + '>'); |
log ('Parse error: no </' + this.endTagName + '>'); |
| 385 |
|
this.nextToken.unshift (token); |
| 386 |
|
|
| 387 |
// 7.3. Mark the script element as "already executed". |
// 7.3. Mark the script element as "already executed". |
| 388 |
el.manakaiAlreadyExecuted = true; |
el.manakaiAlreadyExecuted = true; |
| 521 |
var doc = this.ownerDocument || this; |
var doc = this.ownerDocument || this; |
| 522 |
var p = doc._parser; |
var p = doc._parser; |
| 523 |
|
|
| 524 |
// 1. Script type |
// 1.The script's type |
| 525 |
|
// |
| 526 |
|
|
| 527 |
|
// 2. The cript's character encoding |
| 528 |
// |
// |
| 529 |
|
|
| 530 |
// 2.1. If scripting is disabled |
// 3.1. If without script |
| 531 |
// |
// |
| 532 |
// 2.2. If the script element was created by an XML ... innerHTML ... |
// 2.2. If the script element was created by an XML ... innerHTML ... |
| 533 |
// |
// |
| 534 |
// 2.3. If the user agent does not support the scripting language ... |
// 2.3. If the user agent does not support the scripting language ... |
| 535 |
// |
// |
| 536 |
// 2.4. If the script element has its "already executed" flag set |
if (false) { |
|
if (e.manakaiAlreadyExecuted) { |
|
| 537 |
// 2.5. Abort these steps at this point. |
// 2.5. Abort these steps at this point. |
| 538 |
log ('Running a script: aborted (already executed)'); |
log ('Running a script: aborted (noscript)'); |
| 539 |
logIndentLevel--; |
logIndentLevel--; |
| 540 |
return e; |
return e; |
| 541 |
} |
} |
| 542 |
|
|
| 543 |
// 3. Set the element's "already executed" flag. |
// 4. Set the element's "already executed" flag. |
| 544 |
e.manakaiAlreadyExecuted = true; |
e.manakaiAlreadyExecuted = true; |
| 545 |
|
|
| 546 |
// 4. If the element has a src attribute, then a load for ... |
// 5. If the element has a src attribute, then a load for ... |
| 547 |
// TODO: load an external resource |
// TODO: load an external resource |
| 548 |
|
|
| 549 |
// 5. The first of the following options: |
// 5. The first of the following options: |
| 550 |
|
|
|
// 5.1. |
|
| 551 |
if (/* TODO: If the document is still being parsed && */ |
if (/* TODO: If the document is still being parsed && */ |
| 552 |
e.defer && !e.async) { |
e.defer && !e.async) { |
| 553 |
|
// 6.1. |
| 554 |
p.scriptsExecutedAfterParsing.push (e); |
p.scriptsExecutedAfterParsing.push (e); |
| 555 |
log ('Running a script: aborted (defer)'); |
log ('Running a script: aborted (defer)'); |
| 556 |
} else if (e.async && e.src != null) { |
} else if (e.async && e.src != null) { |
| 557 |
|
// 6.2. |
| 558 |
p.scriptsExecutedAsynchronously.push (e); |
p.scriptsExecutedAsynchronously.push (e); |
| 559 |
log ('Running a script: aborted (async src)'); |
log ('Running a script: aborted (async src)'); |
| 560 |
} else if (e.async && e.src == null && |
} else if (e.async && e.src == null && |
| 561 |
p.scriptsExecutedAsynchronously.length > 0) { |
p.scriptsExecutedAsynchronously.length > 0) { |
| 562 |
|
// 6.3. |
| 563 |
p.scriptsExecutedAsynchronously.push (e); |
p.scriptsExecutedAsynchronously.push (e); |
| 564 |
log ('Running a script: aborted (async)'); |
log ('Running a script: aborted (async)'); |
|
// ISSUE: What is the difference with the case above? |
|
| 565 |
} else if (e.src != null && e.manakaiParserInserted) { |
} else if (e.src != null && e.manakaiParserInserted) { |
| 566 |
if (p.scriptExecutedWhenParserResumes) { |
// 6.4. |
| 567 |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
if (p.pendingExternalScript) { |
| 568 |
|
log ('Error: There is a pending external script.'); |
| 569 |
} |
} |
| 570 |
p.scriptExecutedWhenParserResumes = e; |
p.pendingExternalScript = e; |
| 571 |
log ('Running a script: aborted (src parser-inserted)'); |
log ('Running a script: aborted (src parser-inserted)'); |
| 572 |
} else if (e.src != null) { |
} else if (e.src != null) { |
| 573 |
|
// 6.5. |
| 574 |
p.scriptsExecutedSoon.push (e); |
p.scriptsExecutedSoon.push (e); |
| 575 |
log ('Running a script: aborted (src)'); |
log ('Running a script: aborted (src)'); |
| 576 |
} else { |
} else { |
| 577 |
|
// 6.6. |
| 578 |
executeScript (doc, e); // even if other scripts are already executing. |
executeScript (doc, e); // even if other scripts are already executing. |
| 579 |
} |
} |
| 580 |
|
|
| 604 |
} |
} |
| 605 |
|
|
| 606 |
// If the load was successful |
// If the load was successful |
|
log ('load event fired at the script element'); |
|
| 607 |
|
|
| 608 |
if (true) { |
if (true) { |
| 609 |
// Scripting is enabled, Document.designMode is disabled, |
// Scripting is enabled, Document.designMode is disabled, |
| 612 |
parseAndRunScript (doc, s); |
parseAndRunScript (doc, s); |
| 613 |
} |
} |
| 614 |
|
|
| 615 |
|
log ('load event fired at the script element'); |
| 616 |
|
|
| 617 |
log ('executing a script block: end'); |
log ('executing a script block: end'); |
| 618 |
} // executeScript |
} // executeScript |
| 619 |
|
|
| 771 |
+ p.input.s.substring (p.insertionPoint, p.input.s.length); |
+ p.input.s.substring (p.insertionPoint, p.input.s.length); |
| 772 |
p.insertionPoint += s.length; |
p.insertionPoint += s.length; |
| 773 |
|
|
| 774 |
// 3. If there is a script that will execute as soon as the parser resumes |
// 3. If there is a pending external script |
| 775 |
if (p.scriptExecutedAfterParserResumes) { |
if (p.pendingExternalScript) { |
| 776 |
log ('document.write: processed later (there is an unprocessed <script src>)'); |
log ('document.write: processed later (there is an unprocessed <script src>)'); |
| 777 |
logIndentLevel--; |
logIndentLevel--; |
| 778 |
log ('document.write: return'); |
log ('document.write: return'); |
| 890 |
|
|
| 891 |
<h2 id=notes>Notes</h2> |
<h2 id=notes>Notes</h2> |
| 892 |
|
|
| 893 |
<p>This is a <em>simplified</em> implementation of |
<p>This is a <em>simplified</em> implementation of <a |
| 894 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
| 895 |
Parsing Algorithm</a>. It only implements script-related part of the |
Parsing Algorithm</a> (revision 2138). It only implements |
| 896 |
algorithm. Especially, this parser: |
scripting-related parts of the algorithm. Especially, this parser: |
| 897 |
|
|
| 898 |
<ul> |
<ul> |
| 899 |
<li>Does not support <code>DOCTYPE</code> and comment tokens. |
<li>Does not support <code>DOCTYPE</code> and comment tokens. |
| 900 |
<li>Does not support entities except for <code>&quot;</code>, |
<li>Does not support entities except for <code>&quot;</code>, |
| 904 |
algorithm, and so on. |
algorithm, and so on. |
| 905 |
<li>Does not raise parse errors for invalid attribute specifications in start |
<li>Does not raise parse errors for invalid attribute specifications in start |
| 906 |
or end tags. |
or end tags. |
| 907 |
<li>Does not support PCDATA elements (<code>title</code> and |
<li>Does not support RCDATA elements (<code>title</code> and |
| 908 |
<code>textarea</code>). |
<code>textarea</code>). |
| 909 |
<li>Does not strip the first newline in <code>pre</code> elements. |
<li>Does not strip the first newline in <code>pre</code>, |
| 910 |
|
<code>listing</code>, and <code>textarea</code> elements. |
| 911 |
<li>Does not support <code><!--</code>..<code>--></code> parsing rule |
<li>Does not support <code><!--</code>..<code>--></code> parsing rule |
| 912 |
in <code>script</code> element. |
in CDATA/RCDATA elements. |
| 913 |
<li>Does not support foreign (SVG or MathML) elements. |
<li>Does not support foreign (SVG or MathML) elements. |
| 914 |
<li>Only supports <code>script</code> <code>type</code> |
<li>Only supports <code>script</code> <code>type</code> |
| 915 |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
| 951 |
executed. Currently it does not matter, since we don't allow dynamic |
executed. Currently it does not matter, since we don't allow dynamic |
| 952 |
modification to the |src| content/DOM attribute value yet. --> |
modification to the |src| content/DOM attribute value yet. --> |
| 953 |
|
|
| 954 |
|
<p>See also |
| 955 |
|
<a href="http://suika.fam.cx/gate/2005/sw/Live%20Scripting%20HTML%20Parser">SuikaWiki: |
| 956 |
|
Live Scripting HTML Parser</a>. |
| 957 |
|
|
| 958 |
</body> |
</body> |
| 959 |
</html> |
</html> |
| 960 |
<!-- $Date$ --> |
<!-- $Date$ --> |