'use strict'; var WritableStream = require('stream').Writable, inherits = require('util').inherits, Parser = require('./index'); /** * Streaming HTML parser with scripting support. * A [writable stream]{@link https://nodejs.org/api/stream.html#stream_class_stream_writable}. * @class ParserStream * @memberof parse5 * @instance * @extends stream.Writable * @param {ParserOptions} options - Parsing options. * @example * var parse5 = require('parse5'); * var http = require('http'); * * // Fetch the google.com content and obtain it's node * http.get('http://google.com', function(res) { * var parser = new parse5.ParserStream(); * * parser.on('finish', function() { * var body = parser.document.childNodes[0].childNodes[1]; * }); * * res.pipe(parser); * }); */ var ParserStream = module.exports = function (options) { WritableStream.call(this); this.parser = new Parser(options); this.lastChunkWritten = false; this.writeCallback = null; this.pausedByScript = false; /** * The resulting document node. * @member {ASTNode} document * @memberof parse5#ParserStream * @instance */ this.document = this.parser.treeAdapter.createDocument(); this.pendingHtmlInsertions = []; this._resume = this._resume.bind(this); this._documentWrite = this._documentWrite.bind(this); this._scriptHandler = this._scriptHandler.bind(this); this.parser._bootstrap(this.document, null); }; inherits(ParserStream, WritableStream); //WritableStream implementation ParserStream.prototype._write = function (chunk, encoding, callback) { this.writeCallback = callback; this.parser.tokenizer.write(chunk.toString('utf8'), this.lastChunkWritten); this._runParsingLoop(); }; ParserStream.prototype.end = function (chunk, encoding, callback) { this.lastChunkWritten = true; WritableStream.prototype.end.call(this, chunk, encoding, callback); }; //Scriptable parser implementation ParserStream.prototype._runParsingLoop = function () { this.parser._runParsingLoop(this.writeCallback, this._scriptHandler); }; ParserStream.prototype._resume = function () { if (!this.pausedByScript) throw new Error('Parser was already resumed'); while (this.pendingHtmlInsertions.length) { var html = this.pendingHtmlInsertions.pop(); this.parser.tokenizer.insertHtmlAtCurrentPos(html); } this.pausedByScript = false; //NOTE: keep parsing if we don't wait for the next input chunk if (this.parser.tokenizer.active) this._runParsingLoop(); }; ParserStream.prototype._documentWrite = function (html) { if (!this.parser.stopped) this.pendingHtmlInsertions.push(html); }; ParserStream.prototype._scriptHandler = function (scriptElement) { if (this.listeners('script').length) { this.pausedByScript = true; /** * Raised then parser encounters a `'); */ this.emit('script', scriptElement, this._documentWrite, this._resume); } else this._runParsingLoop(); };