index.cjs.js 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
  1. 'use strict';
  2. var uc_micro = require('uc.micro');
  3. function reFactory (opts) {
  4. const re = {};
  5. opts = opts || {};
  6. re.src_Any = uc_micro.Any.source;
  7. re.src_Cc = uc_micro.Cc.source;
  8. re.src_Z = uc_micro.Z.source;
  9. re.src_P = uc_micro.P.source;
  10. // \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation)
  11. re.src_ZPCc = [re.src_Z, re.src_P, re.src_Cc].join('|');
  12. // \p{\Z\Cc} (white spaces + control)
  13. re.src_ZCc = [re.src_Z, re.src_Cc].join('|');
  14. // Experimental. List of chars, completely prohibited in links
  15. // because can separate it from other part of text
  16. const text_separators = '[><\uff5c]';
  17. // All possible word characters (everything without punctuation, spaces & controls)
  18. // Defined via punctuation & spaces to save space
  19. // Should be something like \p{\L\N\S\M} (\w but without `_`)
  20. re.src_pseudo_letter = '(?:(?!' + text_separators + '|' + re.src_ZPCc + ')' + re.src_Any + ')';
  21. // The same as abothe but without [0-9]
  22. // var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')';
  23. re.src_ip4 =
  24. '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)';
  25. // Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch.
  26. re.src_auth = '(?:(?:(?!' + re.src_ZCc + '|[@/\\[\\]()]).)+@)?';
  27. re.src_port =
  28. '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?';
  29. re.src_host_terminator =
  30. '(?=$|' + text_separators + '|' + re.src_ZPCc + ')' +
  31. '(?!' + (opts['---'] ? '-(?!--)|' : '-|') + '_|:\\d|\\.-|\\.(?!$|' + re.src_ZPCc + '))';
  32. re.src_path =
  33. '(?:' +
  34. '[/?#]' +
  35. '(?:' +
  36. '(?!' + re.src_ZCc + '|' + text_separators + '|[()[\\]{}.,"\'?!\\-;]).|' +
  37. '\\[(?:(?!' + re.src_ZCc + '|\\]).)*\\]|' +
  38. '\\((?:(?!' + re.src_ZCc + '|[)]).)*\\)|' +
  39. '\\{(?:(?!' + re.src_ZCc + '|[}]).)*\\}|' +
  40. '\\"(?:(?!' + re.src_ZCc + '|["]).)+\\"|' +
  41. "\\'(?:(?!" + re.src_ZCc + "|[']).)+\\'|" +
  42. // allow `I'm_king` if no pair found
  43. "\\'(?=" + re.src_pseudo_letter + '|[-])|' +
  44. // google has many dots in "google search" links (#66, #81).
  45. // github has ... in commit range links,
  46. // Restrict to
  47. // - english
  48. // - percent-encoded
  49. // - parts of file path
  50. // - params separator
  51. // until more examples found.
  52. '\\.{2,}[a-zA-Z0-9%/&]|' +
  53. '\\.(?!' + re.src_ZCc + '|[.]|$)|' +
  54. (opts['---']
  55. ? '\\-(?!--(?:[^-]|$))(?:-*)|' // `---` => long dash, terminate
  56. : '\\-+|'
  57. ) +
  58. // allow `,,,` in paths
  59. ',(?!' + re.src_ZCc + '|$)|' +
  60. // allow `;` if not followed by space-like char
  61. ';(?!' + re.src_ZCc + '|$)|' +
  62. // allow `!!!` in paths, but not at the end
  63. '\\!+(?!' + re.src_ZCc + '|[!]|$)|' +
  64. '\\?(?!' + re.src_ZCc + '|[?]|$)' +
  65. ')+' +
  66. '|\\/' +
  67. ')?';
  68. // Allow anything in markdown spec, forbid quote (") at the first position
  69. // because emails enclosed in quotes are far more common
  70. re.src_email_name =
  71. '[\\-;:&=\\+\\$,\\.a-zA-Z0-9_][\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]*';
  72. re.src_xn =
  73. 'xn--[a-z0-9\\-]{1,59}';
  74. // More to read about domain names
  75. // http://serverfault.com/questions/638260/
  76. re.src_domain_root =
  77. // Allow letters & digits (http://test1)
  78. '(?:' +
  79. re.src_xn +
  80. '|' +
  81. re.src_pseudo_letter + '{1,63}' +
  82. ')';
  83. re.src_domain =
  84. '(?:' +
  85. re.src_xn +
  86. '|' +
  87. '(?:' + re.src_pseudo_letter + ')' +
  88. '|' +
  89. '(?:' + re.src_pseudo_letter + '(?:-|' + re.src_pseudo_letter + '){0,61}' + re.src_pseudo_letter + ')' +
  90. ')';
  91. re.src_host =
  92. '(?:' +
  93. // Don't need IP check, because digits are already allowed in normal domain names
  94. // src_ip4 +
  95. // '|' +
  96. '(?:(?:(?:' + re.src_domain + ')\\.)*' + re.src_domain/* _root */ + ')' +
  97. ')';
  98. re.tpl_host_fuzzy =
  99. '(?:' +
  100. re.src_ip4 +
  101. '|' +
  102. '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))' +
  103. ')';
  104. re.tpl_host_no_ip_fuzzy =
  105. '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))';
  106. re.src_host_strict =
  107. re.src_host + re.src_host_terminator;
  108. re.tpl_host_fuzzy_strict =
  109. re.tpl_host_fuzzy + re.src_host_terminator;
  110. re.src_host_port_strict =
  111. re.src_host + re.src_port + re.src_host_terminator;
  112. re.tpl_host_port_fuzzy_strict =
  113. re.tpl_host_fuzzy + re.src_port + re.src_host_terminator;
  114. re.tpl_host_port_no_ip_fuzzy_strict =
  115. re.tpl_host_no_ip_fuzzy + re.src_port + re.src_host_terminator;
  116. //
  117. // Main rules
  118. //
  119. // Rude test fuzzy links by host, for quick deny
  120. re.tpl_host_fuzzy_test =
  121. 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + re.src_ZPCc + '|>|$))';
  122. re.tpl_email_fuzzy =
  123. '(^|' + text_separators + '|"|\\(|' + re.src_ZCc + ')' +
  124. '(' + re.src_email_name + '@' + re.tpl_host_fuzzy_strict + ')';
  125. re.tpl_link_fuzzy =
  126. // Fuzzy link can't be prepended with .:/\- and non punctuation.
  127. // but can start with > (markdown blockquote)
  128. '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' +
  129. '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_fuzzy_strict + re.src_path + ')';
  130. re.tpl_link_no_ip_fuzzy =
  131. // Fuzzy link can't be prepended with .:/\- and non punctuation.
  132. // but can start with > (markdown blockquote)
  133. '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' +
  134. '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ')';
  135. return re
  136. }
  137. //
  138. // Helpers
  139. //
  140. // Merge objects
  141. //
  142. function assign (obj /* from1, from2, from3, ... */) {
  143. const sources = Array.prototype.slice.call(arguments, 1);
  144. sources.forEach(function (source) {
  145. if (!source) { return }
  146. Object.keys(source).forEach(function (key) {
  147. obj[key] = source[key];
  148. });
  149. });
  150. return obj
  151. }
  152. function _class (obj) { return Object.prototype.toString.call(obj) }
  153. function isString (obj) { return _class(obj) === '[object String]' }
  154. function isObject (obj) { return _class(obj) === '[object Object]' }
  155. function isRegExp (obj) { return _class(obj) === '[object RegExp]' }
  156. function isFunction (obj) { return _class(obj) === '[object Function]' }
  157. function escapeRE (str) { return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&') }
  158. //
  159. const defaultOptions = {
  160. fuzzyLink: true,
  161. fuzzyEmail: true,
  162. fuzzyIP: false
  163. };
  164. function isOptionsObj (obj) {
  165. return Object.keys(obj || {}).reduce(function (acc, k) {
  166. /* eslint-disable-next-line no-prototype-builtins */
  167. return acc || defaultOptions.hasOwnProperty(k)
  168. }, false)
  169. }
  170. const defaultSchemas = {
  171. 'http:': {
  172. validate: function (text, pos, self) {
  173. const tail = text.slice(pos);
  174. if (!self.re.http) {
  175. // compile lazily, because "host"-containing variables can change on tlds update.
  176. self.re.http = new RegExp(
  177. '^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i'
  178. );
  179. }
  180. if (self.re.http.test(tail)) {
  181. return tail.match(self.re.http)[0].length
  182. }
  183. return 0
  184. }
  185. },
  186. 'https:': 'http:',
  187. 'ftp:': 'http:',
  188. '//': {
  189. validate: function (text, pos, self) {
  190. const tail = text.slice(pos);
  191. if (!self.re.no_http) {
  192. // compile lazily, because "host"-containing variables can change on tlds update.
  193. self.re.no_http = new RegExp(
  194. '^' +
  195. self.re.src_auth +
  196. // Don't allow single-level domains, because of false positives like '//test'
  197. // with code comments
  198. '(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' +
  199. self.re.src_port +
  200. self.re.src_host_terminator +
  201. self.re.src_path,
  202. 'i'
  203. );
  204. }
  205. if (self.re.no_http.test(tail)) {
  206. // should not be `://` & `///`, that protects from errors in protocol name
  207. if (pos >= 3 && text[pos - 3] === ':') { return 0 }
  208. if (pos >= 3 && text[pos - 3] === '/') { return 0 }
  209. return tail.match(self.re.no_http)[0].length
  210. }
  211. return 0
  212. }
  213. },
  214. 'mailto:': {
  215. validate: function (text, pos, self) {
  216. const tail = text.slice(pos);
  217. if (!self.re.mailto) {
  218. self.re.mailto = new RegExp(
  219. '^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i'
  220. );
  221. }
  222. if (self.re.mailto.test(tail)) {
  223. return tail.match(self.re.mailto)[0].length
  224. }
  225. return 0
  226. }
  227. }
  228. };
  229. // RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
  230. /* eslint-disable-next-line max-len */
  231. const tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]';
  232. // DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
  233. const tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|');
  234. function resetScanCache (self) {
  235. self.__index__ = -1;
  236. self.__text_cache__ = '';
  237. }
  238. function createValidator (re) {
  239. return function (text, pos) {
  240. const tail = text.slice(pos);
  241. if (re.test(tail)) {
  242. return tail.match(re)[0].length
  243. }
  244. return 0
  245. }
  246. }
  247. function createNormalizer () {
  248. return function (match, self) {
  249. self.normalize(match);
  250. }
  251. }
  252. // Schemas compiler. Build regexps.
  253. //
  254. function compile (self) {
  255. // Load & clone RE patterns.
  256. const re = self.re = reFactory(self.__opts__);
  257. // Define dynamic patterns
  258. const tlds = self.__tlds__.slice();
  259. self.onCompile();
  260. if (!self.__tlds_replaced__) {
  261. tlds.push(tlds_2ch_src_re);
  262. }
  263. tlds.push(re.src_xn);
  264. re.src_tlds = tlds.join('|');
  265. function untpl (tpl) { return tpl.replace('%TLDS%', re.src_tlds) }
  266. re.email_fuzzy = RegExp(untpl(re.tpl_email_fuzzy), 'i');
  267. re.link_fuzzy = RegExp(untpl(re.tpl_link_fuzzy), 'i');
  268. re.link_no_ip_fuzzy = RegExp(untpl(re.tpl_link_no_ip_fuzzy), 'i');
  269. re.host_fuzzy_test = RegExp(untpl(re.tpl_host_fuzzy_test), 'i');
  270. //
  271. // Compile each schema
  272. //
  273. const aliases = [];
  274. self.__compiled__ = {}; // Reset compiled data
  275. function schemaError (name, val) {
  276. throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val)
  277. }
  278. Object.keys(self.__schemas__).forEach(function (name) {
  279. const val = self.__schemas__[name];
  280. // skip disabled methods
  281. if (val === null) { return }
  282. const compiled = { validate: null, link: null };
  283. self.__compiled__[name] = compiled;
  284. if (isObject(val)) {
  285. if (isRegExp(val.validate)) {
  286. compiled.validate = createValidator(val.validate);
  287. } else if (isFunction(val.validate)) {
  288. compiled.validate = val.validate;
  289. } else {
  290. schemaError(name, val);
  291. }
  292. if (isFunction(val.normalize)) {
  293. compiled.normalize = val.normalize;
  294. } else if (!val.normalize) {
  295. compiled.normalize = createNormalizer();
  296. } else {
  297. schemaError(name, val);
  298. }
  299. return
  300. }
  301. if (isString(val)) {
  302. aliases.push(name);
  303. return
  304. }
  305. schemaError(name, val);
  306. });
  307. //
  308. // Compile postponed aliases
  309. //
  310. aliases.forEach(function (alias) {
  311. if (!self.__compiled__[self.__schemas__[alias]]) {
  312. // Silently fail on missed schemas to avoid errons on disable.
  313. // schemaError(alias, self.__schemas__[alias]);
  314. return
  315. }
  316. self.__compiled__[alias].validate =
  317. self.__compiled__[self.__schemas__[alias]].validate;
  318. self.__compiled__[alias].normalize =
  319. self.__compiled__[self.__schemas__[alias]].normalize;
  320. });
  321. //
  322. // Fake record for guessed links
  323. //
  324. self.__compiled__[''] = { validate: null, normalize: createNormalizer() };
  325. //
  326. // Build schema condition
  327. //
  328. const slist = Object.keys(self.__compiled__)
  329. .filter(function (name) {
  330. // Filter disabled & fake schemas
  331. return name.length > 0 && self.__compiled__[name]
  332. })
  333. .map(escapeRE)
  334. .join('|');
  335. // (?!_) cause 1.5x slowdown
  336. self.re.schema_test = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'i');
  337. self.re.schema_search = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'ig');
  338. self.re.schema_at_start = RegExp('^' + self.re.schema_search.source, 'i');
  339. self.re.pretest = RegExp(
  340. '(' + self.re.schema_test.source + ')|(' + self.re.host_fuzzy_test.source + ')|@',
  341. 'i'
  342. );
  343. //
  344. // Cleanup
  345. //
  346. resetScanCache(self);
  347. }
  348. /**
  349. * class Match
  350. *
  351. * Match result. Single element of array, returned by [[LinkifyIt#match]]
  352. **/
  353. function Match (self, shift) {
  354. const start = self.__index__;
  355. const end = self.__last_index__;
  356. const text = self.__text_cache__.slice(start, end);
  357. /**
  358. * Match#schema -> String
  359. *
  360. * Prefix (protocol) for matched string.
  361. **/
  362. this.schema = self.__schema__.toLowerCase();
  363. /**
  364. * Match#index -> Number
  365. *
  366. * First position of matched string.
  367. **/
  368. this.index = start + shift;
  369. /**
  370. * Match#lastIndex -> Number
  371. *
  372. * Next position after matched string.
  373. **/
  374. this.lastIndex = end + shift;
  375. /**
  376. * Match#raw -> String
  377. *
  378. * Matched string.
  379. **/
  380. this.raw = text;
  381. /**
  382. * Match#text -> String
  383. *
  384. * Notmalized text of matched string.
  385. **/
  386. this.text = text;
  387. /**
  388. * Match#url -> String
  389. *
  390. * Normalized url of matched string.
  391. **/
  392. this.url = text;
  393. }
  394. function createMatch (self, shift) {
  395. const match = new Match(self, shift);
  396. self.__compiled__[match.schema].normalize(match, self);
  397. return match
  398. }
  399. /**
  400. * class LinkifyIt
  401. **/
  402. /**
  403. * new LinkifyIt(schemas, options)
  404. * - schemas (Object): Optional. Additional schemas to validate (prefix/validator)
  405. * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
  406. *
  407. * Creates new linkifier instance with optional additional schemas.
  408. * Can be called without `new` keyword for convenience.
  409. *
  410. * By default understands:
  411. *
  412. * - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
  413. * - "fuzzy" links and emails (example.com, foo@bar.com).
  414. *
  415. * `schemas` is an object, where each key/value describes protocol/rule:
  416. *
  417. * - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
  418. * for example). `linkify-it` makes shure that prefix is not preceeded with
  419. * alphanumeric char and symbols. Only whitespaces and punctuation allowed.
  420. * - __value__ - rule to check tail after link prefix
  421. * - _String_ - just alias to existing rule
  422. * - _Object_
  423. * - _validate_ - validator function (should return matched length on success),
  424. * or `RegExp`.
  425. * - _normalize_ - optional function to normalize text & url of matched result
  426. * (for example, for @twitter mentions).
  427. *
  428. * `options`:
  429. *
  430. * - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.
  431. * - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
  432. * like version numbers. Default `false`.
  433. * - __fuzzyEmail__ - recognize emails without `mailto:` prefix.
  434. *
  435. **/
  436. function LinkifyIt (schemas, options) {
  437. if (!(this instanceof LinkifyIt)) {
  438. return new LinkifyIt(schemas, options)
  439. }
  440. if (!options) {
  441. if (isOptionsObj(schemas)) {
  442. options = schemas;
  443. schemas = {};
  444. }
  445. }
  446. this.__opts__ = assign({}, defaultOptions, options);
  447. // Cache last tested result. Used to skip repeating steps on next `match` call.
  448. this.__index__ = -1;
  449. this.__last_index__ = -1; // Next scan position
  450. this.__schema__ = '';
  451. this.__text_cache__ = '';
  452. this.__schemas__ = assign({}, defaultSchemas, schemas);
  453. this.__compiled__ = {};
  454. this.__tlds__ = tlds_default;
  455. this.__tlds_replaced__ = false;
  456. this.re = {};
  457. compile(this);
  458. }
  459. /** chainable
  460. * LinkifyIt#add(schema, definition)
  461. * - schema (String): rule name (fixed pattern prefix)
  462. * - definition (String|RegExp|Object): schema definition
  463. *
  464. * Add new rule definition. See constructor description for details.
  465. **/
  466. LinkifyIt.prototype.add = function add (schema, definition) {
  467. this.__schemas__[schema] = definition;
  468. compile(this);
  469. return this
  470. };
  471. /** chainable
  472. * LinkifyIt#set(options)
  473. * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
  474. *
  475. * Set recognition options for links without schema.
  476. **/
  477. LinkifyIt.prototype.set = function set (options) {
  478. this.__opts__ = assign(this.__opts__, options);
  479. return this
  480. };
  481. /**
  482. * LinkifyIt#test(text) -> Boolean
  483. *
  484. * Searches linkifiable pattern and returns `true` on success or `false` on fail.
  485. **/
  486. LinkifyIt.prototype.test = function test (text) {
  487. // Reset scan cache
  488. this.__text_cache__ = text;
  489. this.__index__ = -1;
  490. if (!text.length) { return false }
  491. let m, ml, me, len, shift, next, re, tld_pos, at_pos;
  492. // try to scan for link with schema - that's the most simple rule
  493. if (this.re.schema_test.test(text)) {
  494. re = this.re.schema_search;
  495. re.lastIndex = 0;
  496. while ((m = re.exec(text)) !== null) {
  497. len = this.testSchemaAt(text, m[2], re.lastIndex);
  498. if (len) {
  499. this.__schema__ = m[2];
  500. this.__index__ = m.index + m[1].length;
  501. this.__last_index__ = m.index + m[0].length + len;
  502. break
  503. }
  504. }
  505. }
  506. if (this.__opts__.fuzzyLink && this.__compiled__['http:']) {
  507. // guess schemaless links
  508. tld_pos = text.search(this.re.host_fuzzy_test);
  509. if (tld_pos >= 0) {
  510. // if tld is located after found link - no need to check fuzzy pattern
  511. if (this.__index__ < 0 || tld_pos < this.__index__) {
  512. if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) {
  513. shift = ml.index + ml[1].length;
  514. if (this.__index__ < 0 || shift < this.__index__) {
  515. this.__schema__ = '';
  516. this.__index__ = shift;
  517. this.__last_index__ = ml.index + ml[0].length;
  518. }
  519. }
  520. }
  521. }
  522. }
  523. if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) {
  524. // guess schemaless emails
  525. at_pos = text.indexOf('@');
  526. if (at_pos >= 0) {
  527. // We can't skip this check, because this cases are possible:
  528. // 192.168.1.1@gmail.com, my.in@example.com
  529. if ((me = text.match(this.re.email_fuzzy)) !== null) {
  530. shift = me.index + me[1].length;
  531. next = me.index + me[0].length;
  532. if (this.__index__ < 0 || shift < this.__index__ ||
  533. (shift === this.__index__ && next > this.__last_index__)) {
  534. this.__schema__ = 'mailto:';
  535. this.__index__ = shift;
  536. this.__last_index__ = next;
  537. }
  538. }
  539. }
  540. }
  541. return this.__index__ >= 0
  542. };
  543. /**
  544. * LinkifyIt#pretest(text) -> Boolean
  545. *
  546. * Very quick check, that can give false positives. Returns true if link MAY BE
  547. * can exists. Can be used for speed optimization, when you need to check that
  548. * link NOT exists.
  549. **/
  550. LinkifyIt.prototype.pretest = function pretest (text) {
  551. return this.re.pretest.test(text)
  552. };
  553. /**
  554. * LinkifyIt#testSchemaAt(text, name, position) -> Number
  555. * - text (String): text to scan
  556. * - name (String): rule (schema) name
  557. * - position (Number): text offset to check from
  558. *
  559. * Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly
  560. * at given position. Returns length of found pattern (0 on fail).
  561. **/
  562. LinkifyIt.prototype.testSchemaAt = function testSchemaAt (text, schema, pos) {
  563. // If not supported schema check requested - terminate
  564. if (!this.__compiled__[schema.toLowerCase()]) {
  565. return 0
  566. }
  567. return this.__compiled__[schema.toLowerCase()].validate(text, pos, this)
  568. };
  569. /**
  570. * LinkifyIt#match(text) -> Array|null
  571. *
  572. * Returns array of found link descriptions or `null` on fail. We strongly
  573. * recommend to use [[LinkifyIt#test]] first, for best speed.
  574. *
  575. * ##### Result match description
  576. *
  577. * - __schema__ - link schema, can be empty for fuzzy links, or `//` for
  578. * protocol-neutral links.
  579. * - __index__ - offset of matched text
  580. * - __lastIndex__ - index of next char after mathch end
  581. * - __raw__ - matched text
  582. * - __text__ - normalized text
  583. * - __url__ - link, generated from matched text
  584. **/
  585. LinkifyIt.prototype.match = function match (text) {
  586. const result = [];
  587. let shift = 0;
  588. // Try to take previous element from cache, if .test() called before
  589. if (this.__index__ >= 0 && this.__text_cache__ === text) {
  590. result.push(createMatch(this, shift));
  591. shift = this.__last_index__;
  592. }
  593. // Cut head if cache was used
  594. let tail = shift ? text.slice(shift) : text;
  595. // Scan string until end reached
  596. while (this.test(tail)) {
  597. result.push(createMatch(this, shift));
  598. tail = tail.slice(this.__last_index__);
  599. shift += this.__last_index__;
  600. }
  601. if (result.length) {
  602. return result
  603. }
  604. return null
  605. };
  606. /**
  607. * LinkifyIt#matchAtStart(text) -> Match|null
  608. *
  609. * Returns fully-formed (not fuzzy) link if it starts at the beginning
  610. * of the string, and null otherwise.
  611. **/
  612. LinkifyIt.prototype.matchAtStart = function matchAtStart (text) {
  613. // Reset scan cache
  614. this.__text_cache__ = text;
  615. this.__index__ = -1;
  616. if (!text.length) return null
  617. const m = this.re.schema_at_start.exec(text);
  618. if (!m) return null
  619. const len = this.testSchemaAt(text, m[2], m[0].length);
  620. if (!len) return null
  621. this.__schema__ = m[2];
  622. this.__index__ = m.index + m[1].length;
  623. this.__last_index__ = m.index + m[0].length + len;
  624. return createMatch(this, 0)
  625. };
  626. /** chainable
  627. * LinkifyIt#tlds(list [, keepOld]) -> this
  628. * - list (Array): list of tlds
  629. * - keepOld (Boolean): merge with current list if `true` (`false` by default)
  630. *
  631. * Load (or merge) new tlds list. Those are user for fuzzy links (without prefix)
  632. * to avoid false positives. By default this algorythm used:
  633. *
  634. * - hostname with any 2-letter root zones are ok.
  635. * - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
  636. * are ok.
  637. * - encoded (`xn--...`) root zones are ok.
  638. *
  639. * If list is replaced, then exact match for 2-chars root zones will be checked.
  640. **/
  641. LinkifyIt.prototype.tlds = function tlds (list, keepOld) {
  642. list = Array.isArray(list) ? list : [list];
  643. if (!keepOld) {
  644. this.__tlds__ = list.slice();
  645. this.__tlds_replaced__ = true;
  646. compile(this);
  647. return this
  648. }
  649. this.__tlds__ = this.__tlds__.concat(list)
  650. .sort()
  651. .filter(function (el, idx, arr) {
  652. return el !== arr[idx - 1]
  653. })
  654. .reverse();
  655. compile(this);
  656. return this
  657. };
  658. /**
  659. * LinkifyIt#normalize(match)
  660. *
  661. * Default normalizer (if schema does not define it's own).
  662. **/
  663. LinkifyIt.prototype.normalize = function normalize (match) {
  664. // Do minimal possible changes by default. Need to collect feedback prior
  665. // to move forward https://github.com/markdown-it/linkify-it/issues/1
  666. if (!match.schema) { match.url = 'http://' + match.url; }
  667. if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) {
  668. match.url = 'mailto:' + match.url;
  669. }
  670. };
  671. /**
  672. * LinkifyIt#onCompile()
  673. *
  674. * Override to modify basic RegExp-s.
  675. **/
  676. LinkifyIt.prototype.onCompile = function onCompile () {
  677. };
  678. module.exports = LinkifyIt;