47 |
var p = new Parser (new InputStream (v)); |
var p = new Parser (new InputStream (v)); |
48 |
var doc = p.doc; |
var doc = p.doc; |
49 |
p.parse (); |
p.parse (); |
50 |
|
|
51 |
log (dumpTree (doc, '')); |
log (dumpTree (doc, '')); |
52 |
|
|
53 |
|
if (p.hasAsyncScript) { |
54 |
|
log ('Some script codes are executed asynchronously; it means that the document might be rendered in different ways depending on the network condition and other factors'); |
55 |
|
} |
56 |
} |
} |
57 |
} // update2 |
} // update2 |
58 |
|
|
78 |
this.openElements = [doc]; |
this.openElements = [doc]; |
79 |
this.input = i; |
this.input = i; |
80 |
this.scriptsExecutedAfterParsing = []; |
this.scriptsExecutedAfterParsing = []; |
81 |
|
this.scriptsExecutedSoon = []; |
82 |
} // Parser |
} // Parser |
83 |
|
|
84 |
Parser.prototype.getNextToken = function () { |
Parser.prototype.getNextToken = function () { |
156 |
tagName = v.toLowerCase (); |
tagName = v.toLowerCase (); |
157 |
return ''; |
return ''; |
158 |
}); |
}); |
159 |
e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"']+)))?/, |
while (true) { |
160 |
function (x, attrName, attrValue1, attrValue2, attrValue3) { |
var m = false; |
161 |
v = attrValue1 || attrValue2 || attrValue3; |
e = e.replace (/^\s*([^\s=]+)\s*(?:=\s*(?:"([^"]*)"|'([^']*)'|([^"'\s]*)))?/, |
162 |
v = v.replace (/"/g, '"').replace (/'/g, "'") |
function (x, attrName, attrValue1, attrValue2, attrValue3) { |
163 |
.replace (/&/g, '&'); |
v = attrValue1 || attrValue2 || attrValue3; |
164 |
attrs[attrName.toLowerCase ()] = v; |
v = v.replace (/"/g, '"').replace (/'/g, "'") |
165 |
return ''; |
.replace (/&/g, '&'); |
166 |
}); |
attrs[attrName.toLowerCase ()] = v; |
167 |
|
m = true; |
168 |
|
return ''; |
169 |
|
}); |
170 |
|
if (!m) break; |
171 |
|
} |
172 |
if (e.length) { |
if (e.length) { |
173 |
log ('Broken start tag: "' + e + '"'); |
log ('Broken start tag: "' + e + '"'); |
174 |
} |
} |
335 |
|
|
336 |
// "When a script completes loading" rules start applying. |
// "When a script completes loading" rules start applying. |
337 |
|
|
338 |
// TODO: Handles "list of scripts that will execute as soon as possible" |
// List of scripts that will execute as soon as possible |
339 |
// and "list of scripts that will execute asynchronously" |
for (var i = 0; i < this.scriptsExecutedSoon.length; i++) { |
340 |
|
var e = this.scriptsExecutedSoon[i]; |
341 |
|
|
342 |
|
// If it has completed loading |
343 |
|
log ('Execute an external script not inserted by parser...'); |
344 |
|
executeScript (this.doc, e); |
345 |
|
|
346 |
|
// NOTE: It MAY be executed before the end of the parsing, according |
347 |
|
// to the spec. |
348 |
|
this.hasAsyncScript = true; |
349 |
|
} |
350 |
|
|
351 |
|
// TODO: Handles |
352 |
|
// "list of scripts that will execute asynchronously" |
353 |
|
|
354 |
// Handle "list of scripts that will execute when the document has finished |
// Handle "list of scripts that will execute when the document has finished |
355 |
// parsing". |
// parsing". |
453 |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
log ('Error: There is a script that will execute as soon as the parser resumes.'); |
454 |
} |
} |
455 |
p.scriptExecutedWhenParserResumes = e; |
p.scriptExecutedWhenParserResumes = e; |
456 |
log ('Running a script: aborted (src)'); |
log ('Running a script: aborted (src parser-inserted)'); |
457 |
} else if (e.src != null) { |
} else if (e.src != null) { |
458 |
// TODO |
p.scriptsExecutedSoon.push (e); |
459 |
|
log ('Running a script: aborted (src)'); |
460 |
} else { |
} else { |
461 |
executeScript (doc, e); // even if other scripts are already executing. |
executeScript (doc, e); // even if other scripts are already executing. |
462 |
} |
} |
504 |
var m; |
var m; |
505 |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
if (m = uri.match (/^javascript:\s*(?:'([^']*)'|"([^"]+)")\s*$/i)) { |
506 |
if (m[1]) { |
if (m[1]) { |
507 |
return m[1]; |
return unescapeJSLiteral (m[1]); |
508 |
} else if (m[2]) { |
} else if (m[2]) { |
509 |
return m[2]; |
return unescapeJSLiteral (m[2]); |
510 |
} else { |
} else { |
511 |
return null; |
return null; |
512 |
} |
} |
527 |
matched = true; |
matched = true; |
528 |
var args = []; |
var args = []; |
529 |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
t.replace (/('[^']*'|"[^"]*")/g, function (s, v) { |
530 |
args.push (v.substring (1, v.length - 1)); |
args.push (unescapeJSLiteral (v.substring (1, v.length - 1))); |
531 |
return ''; |
return ''; |
532 |
}); |
}); |
533 |
doc.write.apply (doc, args); |
doc.write.apply (doc, args); |
534 |
return ''; |
return ''; |
535 |
}); |
}); |
536 |
|
s = s.replace (/^\s*var\s+s\s*=\s*document\.createElement\s*\(\s*['"]script['"]\s*\)\s*;\s*s\.src\s*=\s*(?:'(javascript:[^']*)'|"(javascript:[^"]*)")\s*;\s*document\.documentElement\.appendChild\s*\(\s*s\s*\)\s*;\s*/, |
537 |
|
function (s, t, u) { |
538 |
|
matched = true; |
539 |
|
var args = [unescapeJSLiteral (t ? t : u)]; |
540 |
|
doc._insertExternalScript.apply (doc, args); |
541 |
|
return ''; |
542 |
|
}); |
543 |
if (s == '') break; |
if (s == '') break; |
544 |
if (!matched) { |
if (!matched) { |
545 |
log ('Script parse error: "' + s + '"'); |
log ('Script parse error: "' + s + '"'); |
548 |
} |
} |
549 |
} // parseAndRunScript |
} // parseAndRunScript |
550 |
|
|
551 |
|
function unescapeJSLiteral (s) { |
552 |
|
return s.replace (/\\u([0-9A-Fa-f]{4})/g, function (t, v) { |
553 |
|
return String.fromCharCode (parseInt ('0x' + v)); |
554 |
|
}); |
555 |
|
} // unescapeJSLiteral |
556 |
|
|
557 |
function JSText (data) { |
function JSText (data) { |
558 |
this.data = data; |
this.data = data; |
559 |
} // JSText |
} // JSText |
664 |
return; |
return; |
665 |
}; // document.write |
}; // document.write |
666 |
|
|
667 |
|
JSDocument.prototype._insertExternalScript = function (uri) { |
668 |
|
var s = new JSElement (this, 'script'); |
669 |
|
s.src = uri; |
670 |
|
this.documentElement.appendChild (s); |
671 |
|
}; // _insertExternalScript |
672 |
|
|
673 |
|
JSDocument.prototype.__defineGetter__ ('documentElement', function () { |
674 |
|
var cn = this.childNodes; |
675 |
|
for (var i = 0; i < cn.length; i++) { |
676 |
|
if (cn[i] instanceof JSElement) { |
677 |
|
return cn[i] |
678 |
|
} |
679 |
|
} |
680 |
|
return null; |
681 |
|
}); |
682 |
|
|
683 |
JSElement.prototype.__defineGetter__ ('text', function () { |
JSElement.prototype.__defineGetter__ ('text', function () { |
684 |
var r = ''; |
var r = ''; |
685 |
for (var i = 0; i < this.childNodes.length; i++) { |
for (var i = 0; i < this.childNodes.length; i++) { |
698 |
r += '| ' + indent + node.localName + '\n'; |
r += '| ' + indent + node.localName + '\n'; |
699 |
if (node.async) r += '| ' + indent + ' async=""\n'; |
if (node.async) r += '| ' + indent + ' async=""\n'; |
700 |
if (node.defer) r += '| ' + indent + ' defer=""\n'; |
if (node.defer) r += '| ' + indent + ' defer=""\n'; |
701 |
if (node.src) r += '| ' + indent + ' src="' + node.src + '"\n'; |
if (node.src != null) { |
702 |
|
r += '| ' + indent + ' src="' + node.src + '"\n'; |
703 |
|
} |
704 |
r += dumpTree (node, indent + ' '); |
r += dumpTree (node, indent + ' '); |
705 |
} else if (node instanceof JSText) { |
} else if (node instanceof JSText) { |
706 |
r += '| ' + indent + '"' + node.data + '"\n'; |
r += '| ' + indent + '"' + node.data + '"\n'; |
749 |
<p> |
<p> |
750 |
</textarea> |
</textarea> |
751 |
|
|
752 |
<h2>Log</h2> |
<h2 id=log>Log</h2> |
753 |
<p><output></output> |
<p><output></output> |
754 |
|
|
755 |
<h2>Note</h2> |
<h2 id=notes>Notes</h2> |
756 |
|
|
757 |
<p>This is a <em>simplified</em> implementation of |
<p>This is a <em>simplified</em> implementation of |
758 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
<a href="http://www.whatwg.org/specs/web-apps/current-work/#parsing">HTML5 |
774 |
<li>Only supports <code>script</code> <code>type</code> |
<li>Only supports <code>script</code> <code>type</code> |
775 |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
<code>text/javascript</code>. <code>type</code> and <code>language</code> |
776 |
attributes are ignored. |
attributes are ignored. |
777 |
<li>Only supports <code>document.write</code>. |
<li>Only supports limited statements. It must consist of zero or more |
778 |
The script code must be match to the regular expression |
of statements looking similar to the following statements, possibly |
779 |
<code>^\s*(?:document\.write\s*\(<var>v</var>\s*(?:,\s*<var>v</var>\s*)*\)\s*;\s*)*$</code> |
introduced, followed, or separated by white space characters: |
780 |
where <var>v</var> is <code>"[^"]*"|'[^']*'</code>. |
<ul> |
781 |
|
<li><code>document.write ("<var>string</var>", ["<var>string</var>", ...]);</code>. |
782 |
|
<li><code>var s = document.createElement ("script"); |
783 |
|
s.src = "<var>string</var>"; |
784 |
|
document.documentElement.appendChild (s);</code> |
785 |
|
</ul> |
786 |
|
Note that strings may be delimited by <code>'</code>s instead of |
787 |
|
<code>"</code>s. |
788 |
<li>Only supports <code>javascript:</code> |
<li>Only supports <code>javascript:</code> |
789 |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
<abbr title="Uniform Resourace Identifiers">URI</abbr> scheme in the |
790 |
<code>src</code> attribute of the <code>script</code> element. In addition, |
<code>src</code> attribute of the <code>script</code> element. In addition, |
791 |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
the <abbr title="Uniform Resource Identifiers">URI</abbr> must be conform to |
792 |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
the regular expression <code>^javascript:\s*(?:"[^"]*"|'[^']*')\s*$</code>. |
793 |
|
<li>Only supports <code>\u<var>HHHH</var></code> escapes in JavaScript |
794 |
|
string literals. |
795 |
</ul> |
</ul> |
796 |
|
|
797 |
<p>For some reason, this parser does not work in browsers that do |
<p>For some reason, this parser does not work in browsers that do |
798 |
not support JavaScript 1.5. |
not support JavaScript 1.5. |
799 |
|
|
800 |
<!-- TODO: multiple attributes are not supported yet --> |
<!-- TODO: license --> |
801 |
|
|
802 |
</body> |
</body> |
803 |
</html> |
</html> |