123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- 'use strict'
- /**
- * Copyright Brian White. All rights reserved.
- *
- * @see https://github.com/mscdex/streamsearch
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
- * by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
- */
- const EventEmitter = require('node:events').EventEmitter
- const inherits = require('node:util').inherits
- function SBMH (needle) {
- if (typeof needle === 'string') {
- needle = Buffer.from(needle)
- }
- if (!Buffer.isBuffer(needle)) {
- throw new TypeError('The needle has to be a String or a Buffer.')
- }
- const needleLength = needle.length
- if (needleLength === 0) {
- throw new Error('The needle cannot be an empty String/Buffer.')
- }
- if (needleLength > 256) {
- throw new Error('The needle cannot have a length bigger than 256.')
- }
- this.maxMatches = Infinity
- this.matches = 0
- this._occ = new Array(256)
- .fill(needleLength) // Initialize occurrence table.
- this._lookbehind_size = 0
- this._needle = needle
- this._bufpos = 0
- this._lookbehind = Buffer.alloc(needleLength)
- // Populate occurrence table with analysis of the needle,
- // ignoring last letter.
- for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var
- this._occ[needle[i]] = needleLength - 1 - i
- }
- }
- inherits(SBMH, EventEmitter)
- SBMH.prototype.reset = function () {
- this._lookbehind_size = 0
- this.matches = 0
- this._bufpos = 0
- }
- SBMH.prototype.push = function (chunk, pos) {
- if (!Buffer.isBuffer(chunk)) {
- chunk = Buffer.from(chunk, 'binary')
- }
- const chlen = chunk.length
- this._bufpos = pos || 0
- let r
- while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) }
- return r
- }
- SBMH.prototype._sbmh_feed = function (data) {
- const len = data.length
- const needle = this._needle
- const needleLength = needle.length
- const lastNeedleChar = needle[needleLength - 1]
- // Positive: points to a position in `data`
- // pos == 3 points to data[3]
- // Negative: points to a position in the lookbehind buffer
- // pos == -2 points to lookbehind[lookbehind_size - 2]
- let pos = -this._lookbehind_size
- let ch
- if (pos < 0) {
- // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
- // search with character lookup code that considers both the
- // lookbehind buffer and the current round's haystack data.
- //
- // Loop until
- // there is a match.
- // or until
- // we've moved past the position that requires the
- // lookbehind buffer. In this case we switch to the
- // optimized loop.
- // or until
- // the character to look at lies outside the haystack.
- while (pos < 0 && pos <= len - needleLength) {
- ch = this._sbmh_lookup_char(data, pos + needleLength - 1)
- if (
- ch === lastNeedleChar &&
- this._sbmh_memcmp(data, pos, needleLength - 1)
- ) {
- this._lookbehind_size = 0
- ++this.matches
- this.emit('info', true)
- return (this._bufpos = pos + needleLength)
- }
- pos += this._occ[ch]
- }
- // No match.
- if (pos < 0) {
- // There's too few data for Boyer-Moore-Horspool to run,
- // so let's use a different algorithm to skip as much as
- // we can.
- // Forward pos until
- // the trailing part of lookbehind + data
- // looks like the beginning of the needle
- // or until
- // pos == 0
- while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos }
- }
- if (pos >= 0) {
- // Discard lookbehind buffer.
- this.emit('info', false, this._lookbehind, 0, this._lookbehind_size)
- this._lookbehind_size = 0
- } else {
- // Cut off part of the lookbehind buffer that has
- // been processed and append the entire haystack
- // into it.
- const bytesToCutOff = this._lookbehind_size + pos
- if (bytesToCutOff > 0) {
- // The cut off data is guaranteed not to contain the needle.
- this.emit('info', false, this._lookbehind, 0, bytesToCutOff)
- }
- this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff,
- this._lookbehind_size - bytesToCutOff)
- this._lookbehind_size -= bytesToCutOff
- data.copy(this._lookbehind, this._lookbehind_size)
- this._lookbehind_size += len
- this._bufpos = len
- return len
- }
- }
- pos += (pos >= 0) * this._bufpos
- // Lookbehind buffer is now empty. We only need to check if the
- // needle is in the haystack.
- if (data.indexOf(needle, pos) !== -1) {
- pos = data.indexOf(needle, pos)
- ++this.matches
- if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) }
- return (this._bufpos = pos + needleLength)
- } else {
- pos = len - needleLength
- }
- // There was no match. If there's trailing haystack data that we cannot
- // match yet using the Boyer-Moore-Horspool algorithm (because the trailing
- // data is less than the needle size) then match using a modified
- // algorithm that starts matching from the beginning instead of the end.
- // Whatever trailing data is left after running this algorithm is added to
- // the lookbehind buffer.
- while (
- pos < len &&
- (
- data[pos] !== needle[0] ||
- (
- (Buffer.compare(
- data.subarray(pos, pos + len - pos),
- needle.subarray(0, len - pos)
- ) !== 0)
- )
- )
- ) {
- ++pos
- }
- if (pos < len) {
- data.copy(this._lookbehind, 0, pos, pos + (len - pos))
- this._lookbehind_size = len - pos
- }
- // Everything until pos is guaranteed not to contain needle data.
- if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) }
- this._bufpos = len
- return len
- }
- SBMH.prototype._sbmh_lookup_char = function (data, pos) {
- return (pos < 0)
- ? this._lookbehind[this._lookbehind_size + pos]
- : data[pos]
- }
- SBMH.prototype._sbmh_memcmp = function (data, pos, len) {
- for (var i = 0; i < len; ++i) { // eslint-disable-line no-var
- if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false }
- }
- return true
- }
- module.exports = SBMH
|