lepu-test-platform-web/node_modules/parse5/lib/parser/stream.js

141 lines
4.6 KiB
JavaScript

'use strict';
var WritableStream = require('stream').Writable,
inherits = require('util').inherits,
Parser = require('./index');
/**
* Streaming HTML parser with scripting support.
* A [writable stream]{@link https://nodejs.org/api/stream.html#stream_class_stream_writable}.
* @class ParserStream
* @memberof parse5
* @instance
* @extends stream.Writable
* @param {ParserOptions} options - Parsing options.
* @example
* var parse5 = require('parse5');
* var http = require('http');
*
* // Fetch the google.com content and obtain it's <body> node
* http.get('http://google.com', function(res) {
* var parser = new parse5.ParserStream();
*
* parser.on('finish', function() {
* var body = parser.document.childNodes[0].childNodes[1];
* });
*
* res.pipe(parser);
* });
*/
var ParserStream = module.exports = function (options) {
WritableStream.call(this);
this.parser = new Parser(options);
this.lastChunkWritten = false;
this.writeCallback = null;
this.pausedByScript = false;
/**
* The resulting document node.
* @member {ASTNode<document>} document
* @memberof parse5#ParserStream
* @instance
*/
this.document = this.parser.treeAdapter.createDocument();
this.pendingHtmlInsertions = [];
this._resume = this._resume.bind(this);
this._documentWrite = this._documentWrite.bind(this);
this._scriptHandler = this._scriptHandler.bind(this);
this.parser._bootstrap(this.document, null);
};
inherits(ParserStream, WritableStream);
//WritableStream implementation
ParserStream.prototype._write = function (chunk, encoding, callback) {
this.writeCallback = callback;
this.parser.tokenizer.write(chunk.toString('utf8'), this.lastChunkWritten);
this._runParsingLoop();
};
ParserStream.prototype.end = function (chunk, encoding, callback) {
this.lastChunkWritten = true;
WritableStream.prototype.end.call(this, chunk, encoding, callback);
};
//Scriptable parser implementation
ParserStream.prototype._runParsingLoop = function () {
this.parser._runParsingLoop(this.writeCallback, this._scriptHandler);
};
ParserStream.prototype._resume = function () {
if (!this.pausedByScript)
throw new Error('Parser was already resumed');
while (this.pendingHtmlInsertions.length) {
var html = this.pendingHtmlInsertions.pop();
this.parser.tokenizer.insertHtmlAtCurrentPos(html);
}
this.pausedByScript = false;
//NOTE: keep parsing if we don't wait for the next input chunk
if (this.parser.tokenizer.active)
this._runParsingLoop();
};
ParserStream.prototype._documentWrite = function (html) {
if (!this.parser.stopped)
this.pendingHtmlInsertions.push(html);
};
ParserStream.prototype._scriptHandler = function (scriptElement) {
if (this.listeners('script').length) {
this.pausedByScript = true;
/**
* Raised then parser encounters a `<script>` element.
* If this event has listeners, parsing will be suspended once it is emitted.
* So, if `<script>` has the `src` attribute, you can fetch it, execute and then resume parsing just like browsers do.
* @event script
* @memberof parse5#ParserStream
* @instance
* @type {Function}
* @param {ASTNode} scriptElement - The script element that caused the event.
* @param {Function} documentWrite(html) - Write additional `html` at the current parsing position.
* Suitable for implementing the DOM `document.write` and `document.writeln` methods.
* @param {Function} resume - Resumes parsing.
* @example
* var parse = require('parse5');
* var http = require('http');
*
* var parser = new parse5.ParserStream();
*
* parser.on('script', function(scriptElement, documentWrite, resume) {
* var src = parse5.treeAdapters.default.getAttrList(scriptElement)[0].value;
*
* http.get(src, function(res) {
* // Fetch the script content, execute it with DOM built around `parser.document` and
* // `document.write` implemented using `documentWrite`.
* ...
* // Then resume parsing.
* resume();
* });
* });
*
* parser.end('<script src="example.com/script.js"></script>');
*/
this.emit('script', scriptElement, this._documentWrite, this._resume);
}
else
this._runParsingLoop();
};