123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642 |
- import reFactory from './lib/re.mjs'
- //
- // Helpers
- //
- // Merge objects
- //
- function assign (obj /* from1, from2, from3, ... */) {
- const sources = Array.prototype.slice.call(arguments, 1)
- sources.forEach(function (source) {
- if (!source) { return }
- Object.keys(source).forEach(function (key) {
- obj[key] = source[key]
- })
- })
- return obj
- }
- function _class (obj) { return Object.prototype.toString.call(obj) }
- function isString (obj) { return _class(obj) === '[object String]' }
- function isObject (obj) { return _class(obj) === '[object Object]' }
- function isRegExp (obj) { return _class(obj) === '[object RegExp]' }
- function isFunction (obj) { return _class(obj) === '[object Function]' }
- function escapeRE (str) { return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&') }
- //
- const defaultOptions = {
- fuzzyLink: true,
- fuzzyEmail: true,
- fuzzyIP: false
- }
- function isOptionsObj (obj) {
- return Object.keys(obj || {}).reduce(function (acc, k) {
- /* eslint-disable-next-line no-prototype-builtins */
- return acc || defaultOptions.hasOwnProperty(k)
- }, false)
- }
- const defaultSchemas = {
- 'http:': {
- validate: function (text, pos, self) {
- const tail = text.slice(pos)
- if (!self.re.http) {
- // compile lazily, because "host"-containing variables can change on tlds update.
- self.re.http = new RegExp(
- '^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i'
- )
- }
- if (self.re.http.test(tail)) {
- return tail.match(self.re.http)[0].length
- }
- return 0
- }
- },
- 'https:': 'http:',
- 'ftp:': 'http:',
- '//': {
- validate: function (text, pos, self) {
- const tail = text.slice(pos)
- if (!self.re.no_http) {
- // compile lazily, because "host"-containing variables can change on tlds update.
- self.re.no_http = new RegExp(
- '^' +
- self.re.src_auth +
- // Don't allow single-level domains, because of false positives like '//test'
- // with code comments
- '(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' +
- self.re.src_port +
- self.re.src_host_terminator +
- self.re.src_path,
- 'i'
- )
- }
- if (self.re.no_http.test(tail)) {
- // should not be `://` & `///`, that protects from errors in protocol name
- if (pos >= 3 && text[pos - 3] === ':') { return 0 }
- if (pos >= 3 && text[pos - 3] === '/') { return 0 }
- return tail.match(self.re.no_http)[0].length
- }
- return 0
- }
- },
- 'mailto:': {
- validate: function (text, pos, self) {
- const tail = text.slice(pos)
- if (!self.re.mailto) {
- self.re.mailto = new RegExp(
- '^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i'
- )
- }
- if (self.re.mailto.test(tail)) {
- return tail.match(self.re.mailto)[0].length
- }
- return 0
- }
- }
- }
- // RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
- /* eslint-disable-next-line max-len */
- const tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]'
- // DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
- const tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|')
- function resetScanCache (self) {
- self.__index__ = -1
- self.__text_cache__ = ''
- }
- function createValidator (re) {
- return function (text, pos) {
- const tail = text.slice(pos)
- if (re.test(tail)) {
- return tail.match(re)[0].length
- }
- return 0
- }
- }
- function createNormalizer () {
- return function (match, self) {
- self.normalize(match)
- }
- }
- // Schemas compiler. Build regexps.
- //
- function compile (self) {
- // Load & clone RE patterns.
- const re = self.re = reFactory(self.__opts__)
- // Define dynamic patterns
- const tlds = self.__tlds__.slice()
- self.onCompile()
- if (!self.__tlds_replaced__) {
- tlds.push(tlds_2ch_src_re)
- }
- tlds.push(re.src_xn)
- re.src_tlds = tlds.join('|')
- function untpl (tpl) { return tpl.replace('%TLDS%', re.src_tlds) }
- re.email_fuzzy = RegExp(untpl(re.tpl_email_fuzzy), 'i')
- re.link_fuzzy = RegExp(untpl(re.tpl_link_fuzzy), 'i')
- re.link_no_ip_fuzzy = RegExp(untpl(re.tpl_link_no_ip_fuzzy), 'i')
- re.host_fuzzy_test = RegExp(untpl(re.tpl_host_fuzzy_test), 'i')
- //
- // Compile each schema
- //
- const aliases = []
- self.__compiled__ = {} // Reset compiled data
- function schemaError (name, val) {
- throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val)
- }
- Object.keys(self.__schemas__).forEach(function (name) {
- const val = self.__schemas__[name]
- // skip disabled methods
- if (val === null) { return }
- const compiled = { validate: null, link: null }
- self.__compiled__[name] = compiled
- if (isObject(val)) {
- if (isRegExp(val.validate)) {
- compiled.validate = createValidator(val.validate)
- } else if (isFunction(val.validate)) {
- compiled.validate = val.validate
- } else {
- schemaError(name, val)
- }
- if (isFunction(val.normalize)) {
- compiled.normalize = val.normalize
- } else if (!val.normalize) {
- compiled.normalize = createNormalizer()
- } else {
- schemaError(name, val)
- }
- return
- }
- if (isString(val)) {
- aliases.push(name)
- return
- }
- schemaError(name, val)
- })
- //
- // Compile postponed aliases
- //
- aliases.forEach(function (alias) {
- if (!self.__compiled__[self.__schemas__[alias]]) {
- // Silently fail on missed schemas to avoid errons on disable.
- // schemaError(alias, self.__schemas__[alias]);
- return
- }
- self.__compiled__[alias].validate =
- self.__compiled__[self.__schemas__[alias]].validate
- self.__compiled__[alias].normalize =
- self.__compiled__[self.__schemas__[alias]].normalize
- })
- //
- // Fake record for guessed links
- //
- self.__compiled__[''] = { validate: null, normalize: createNormalizer() }
- //
- // Build schema condition
- //
- const slist = Object.keys(self.__compiled__)
- .filter(function (name) {
- // Filter disabled & fake schemas
- return name.length > 0 && self.__compiled__[name]
- })
- .map(escapeRE)
- .join('|')
- // (?!_) cause 1.5x slowdown
- self.re.schema_test = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'i')
- self.re.schema_search = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'ig')
- self.re.schema_at_start = RegExp('^' + self.re.schema_search.source, 'i')
- self.re.pretest = RegExp(
- '(' + self.re.schema_test.source + ')|(' + self.re.host_fuzzy_test.source + ')|@',
- 'i'
- )
- //
- // Cleanup
- //
- resetScanCache(self)
- }
- /**
- * class Match
- *
- * Match result. Single element of array, returned by [[LinkifyIt#match]]
- **/
- function Match (self, shift) {
- const start = self.__index__
- const end = self.__last_index__
- const text = self.__text_cache__.slice(start, end)
- /**
- * Match#schema -> String
- *
- * Prefix (protocol) for matched string.
- **/
- this.schema = self.__schema__.toLowerCase()
- /**
- * Match#index -> Number
- *
- * First position of matched string.
- **/
- this.index = start + shift
- /**
- * Match#lastIndex -> Number
- *
- * Next position after matched string.
- **/
- this.lastIndex = end + shift
- /**
- * Match#raw -> String
- *
- * Matched string.
- **/
- this.raw = text
- /**
- * Match#text -> String
- *
- * Notmalized text of matched string.
- **/
- this.text = text
- /**
- * Match#url -> String
- *
- * Normalized url of matched string.
- **/
- this.url = text
- }
- function createMatch (self, shift) {
- const match = new Match(self, shift)
- self.__compiled__[match.schema].normalize(match, self)
- return match
- }
- /**
- * class LinkifyIt
- **/
- /**
- * new LinkifyIt(schemas, options)
- * - schemas (Object): Optional. Additional schemas to validate (prefix/validator)
- * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
- *
- * Creates new linkifier instance with optional additional schemas.
- * Can be called without `new` keyword for convenience.
- *
- * By default understands:
- *
- * - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
- * - "fuzzy" links and emails (example.com, foo@bar.com).
- *
- * `schemas` is an object, where each key/value describes protocol/rule:
- *
- * - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
- * for example). `linkify-it` makes shure that prefix is not preceeded with
- * alphanumeric char and symbols. Only whitespaces and punctuation allowed.
- * - __value__ - rule to check tail after link prefix
- * - _String_ - just alias to existing rule
- * - _Object_
- * - _validate_ - validator function (should return matched length on success),
- * or `RegExp`.
- * - _normalize_ - optional function to normalize text & url of matched result
- * (for example, for @twitter mentions).
- *
- * `options`:
- *
- * - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.
- * - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
- * like version numbers. Default `false`.
- * - __fuzzyEmail__ - recognize emails without `mailto:` prefix.
- *
- **/
- function LinkifyIt (schemas, options) {
- if (!(this instanceof LinkifyIt)) {
- return new LinkifyIt(schemas, options)
- }
- if (!options) {
- if (isOptionsObj(schemas)) {
- options = schemas
- schemas = {}
- }
- }
- this.__opts__ = assign({}, defaultOptions, options)
- // Cache last tested result. Used to skip repeating steps on next `match` call.
- this.__index__ = -1
- this.__last_index__ = -1 // Next scan position
- this.__schema__ = ''
- this.__text_cache__ = ''
- this.__schemas__ = assign({}, defaultSchemas, schemas)
- this.__compiled__ = {}
- this.__tlds__ = tlds_default
- this.__tlds_replaced__ = false
- this.re = {}
- compile(this)
- }
- /** chainable
- * LinkifyIt#add(schema, definition)
- * - schema (String): rule name (fixed pattern prefix)
- * - definition (String|RegExp|Object): schema definition
- *
- * Add new rule definition. See constructor description for details.
- **/
- LinkifyIt.prototype.add = function add (schema, definition) {
- this.__schemas__[schema] = definition
- compile(this)
- return this
- }
- /** chainable
- * LinkifyIt#set(options)
- * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
- *
- * Set recognition options for links without schema.
- **/
- LinkifyIt.prototype.set = function set (options) {
- this.__opts__ = assign(this.__opts__, options)
- return this
- }
- /**
- * LinkifyIt#test(text) -> Boolean
- *
- * Searches linkifiable pattern and returns `true` on success or `false` on fail.
- **/
- LinkifyIt.prototype.test = function test (text) {
- // Reset scan cache
- this.__text_cache__ = text
- this.__index__ = -1
- if (!text.length) { return false }
- let m, ml, me, len, shift, next, re, tld_pos, at_pos
- // try to scan for link with schema - that's the most simple rule
- if (this.re.schema_test.test(text)) {
- re = this.re.schema_search
- re.lastIndex = 0
- while ((m = re.exec(text)) !== null) {
- len = this.testSchemaAt(text, m[2], re.lastIndex)
- if (len) {
- this.__schema__ = m[2]
- this.__index__ = m.index + m[1].length
- this.__last_index__ = m.index + m[0].length + len
- break
- }
- }
- }
- if (this.__opts__.fuzzyLink && this.__compiled__['http:']) {
- // guess schemaless links
- tld_pos = text.search(this.re.host_fuzzy_test)
- if (tld_pos >= 0) {
- // if tld is located after found link - no need to check fuzzy pattern
- if (this.__index__ < 0 || tld_pos < this.__index__) {
- if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) {
- shift = ml.index + ml[1].length
- if (this.__index__ < 0 || shift < this.__index__) {
- this.__schema__ = ''
- this.__index__ = shift
- this.__last_index__ = ml.index + ml[0].length
- }
- }
- }
- }
- }
- if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) {
- // guess schemaless emails
- at_pos = text.indexOf('@')
- if (at_pos >= 0) {
- // We can't skip this check, because this cases are possible:
- // 192.168.1.1@gmail.com, my.in@example.com
- if ((me = text.match(this.re.email_fuzzy)) !== null) {
- shift = me.index + me[1].length
- next = me.index + me[0].length
- if (this.__index__ < 0 || shift < this.__index__ ||
- (shift === this.__index__ && next > this.__last_index__)) {
- this.__schema__ = 'mailto:'
- this.__index__ = shift
- this.__last_index__ = next
- }
- }
- }
- }
- return this.__index__ >= 0
- }
- /**
- * LinkifyIt#pretest(text) -> Boolean
- *
- * Very quick check, that can give false positives. Returns true if link MAY BE
- * can exists. Can be used for speed optimization, when you need to check that
- * link NOT exists.
- **/
- LinkifyIt.prototype.pretest = function pretest (text) {
- return this.re.pretest.test(text)
- }
- /**
- * LinkifyIt#testSchemaAt(text, name, position) -> Number
- * - text (String): text to scan
- * - name (String): rule (schema) name
- * - position (Number): text offset to check from
- *
- * Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly
- * at given position. Returns length of found pattern (0 on fail).
- **/
- LinkifyIt.prototype.testSchemaAt = function testSchemaAt (text, schema, pos) {
- // If not supported schema check requested - terminate
- if (!this.__compiled__[schema.toLowerCase()]) {
- return 0
- }
- return this.__compiled__[schema.toLowerCase()].validate(text, pos, this)
- }
- /**
- * LinkifyIt#match(text) -> Array|null
- *
- * Returns array of found link descriptions or `null` on fail. We strongly
- * recommend to use [[LinkifyIt#test]] first, for best speed.
- *
- * ##### Result match description
- *
- * - __schema__ - link schema, can be empty for fuzzy links, or `//` for
- * protocol-neutral links.
- * - __index__ - offset of matched text
- * - __lastIndex__ - index of next char after mathch end
- * - __raw__ - matched text
- * - __text__ - normalized text
- * - __url__ - link, generated from matched text
- **/
- LinkifyIt.prototype.match = function match (text) {
- const result = []
- let shift = 0
- // Try to take previous element from cache, if .test() called before
- if (this.__index__ >= 0 && this.__text_cache__ === text) {
- result.push(createMatch(this, shift))
- shift = this.__last_index__
- }
- // Cut head if cache was used
- let tail = shift ? text.slice(shift) : text
- // Scan string until end reached
- while (this.test(tail)) {
- result.push(createMatch(this, shift))
- tail = tail.slice(this.__last_index__)
- shift += this.__last_index__
- }
- if (result.length) {
- return result
- }
- return null
- }
- /**
- * LinkifyIt#matchAtStart(text) -> Match|null
- *
- * Returns fully-formed (not fuzzy) link if it starts at the beginning
- * of the string, and null otherwise.
- **/
- LinkifyIt.prototype.matchAtStart = function matchAtStart (text) {
- // Reset scan cache
- this.__text_cache__ = text
- this.__index__ = -1
- if (!text.length) return null
- const m = this.re.schema_at_start.exec(text)
- if (!m) return null
- const len = this.testSchemaAt(text, m[2], m[0].length)
- if (!len) return null
- this.__schema__ = m[2]
- this.__index__ = m.index + m[1].length
- this.__last_index__ = m.index + m[0].length + len
- return createMatch(this, 0)
- }
- /** chainable
- * LinkifyIt#tlds(list [, keepOld]) -> this
- * - list (Array): list of tlds
- * - keepOld (Boolean): merge with current list if `true` (`false` by default)
- *
- * Load (or merge) new tlds list. Those are user for fuzzy links (without prefix)
- * to avoid false positives. By default this algorythm used:
- *
- * - hostname with any 2-letter root zones are ok.
- * - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
- * are ok.
- * - encoded (`xn--...`) root zones are ok.
- *
- * If list is replaced, then exact match for 2-chars root zones will be checked.
- **/
- LinkifyIt.prototype.tlds = function tlds (list, keepOld) {
- list = Array.isArray(list) ? list : [list]
- if (!keepOld) {
- this.__tlds__ = list.slice()
- this.__tlds_replaced__ = true
- compile(this)
- return this
- }
- this.__tlds__ = this.__tlds__.concat(list)
- .sort()
- .filter(function (el, idx, arr) {
- return el !== arr[idx - 1]
- })
- .reverse()
- compile(this)
- return this
- }
- /**
- * LinkifyIt#normalize(match)
- *
- * Default normalizer (if schema does not define it's own).
- **/
- LinkifyIt.prototype.normalize = function normalize (match) {
- // Do minimal possible changes by default. Need to collect feedback prior
- // to move forward https://github.com/markdown-it/linkify-it/issues/1
- if (!match.schema) { match.url = 'http://' + match.url }
- if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) {
- match.url = 'mailto:' + match.url
- }
- }
- /**
- * LinkifyIt#onCompile()
- *
- * Override to modify basic RegExp-s.
- **/
- LinkifyIt.prototype.onCompile = function onCompile () {
- }
- export default LinkifyIt
|