php.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. /*
  2. Language: PHP
  3. Author: Victor Karamzin <Victor.Karamzin@enterra-inc.com>
  4. Contributors: Evgeny Stepanischev <imbolk@gmail.com>, Ivan Sagalaev <maniac@softwaremaniacs.org>
  5. Website: https://www.php.net
  6. Category: common
  7. */
  8. /**
  9. * @param {HLJSApi} hljs
  10. * @returns {LanguageDetail}
  11. * */
  12. function php(hljs) {
  13. const regex = hljs.regex;
  14. // negative look-ahead tries to avoid matching patterns that are not
  15. // Perl at all like $ident$, @ident@, etc.
  16. const NOT_PERL_ETC = /(?![A-Za-z0-9])(?![$])/;
  17. const IDENT_RE = regex.concat(
  18. /[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/,
  19. NOT_PERL_ETC);
  20. // Will not detect camelCase classes
  21. const PASCAL_CASE_CLASS_NAME_RE = regex.concat(
  22. /(\\?[A-Z][a-z0-9_\x7f-\xff]+|\\?[A-Z]+(?=[A-Z][a-z0-9_\x7f-\xff])){1,}/,
  23. NOT_PERL_ETC);
  24. const UPCASE_NAME_RE = regex.concat(
  25. /[A-Z]+/,
  26. NOT_PERL_ETC);
  27. const VARIABLE = {
  28. scope: 'variable',
  29. match: '\\$+' + IDENT_RE,
  30. };
  31. const PREPROCESSOR = {
  32. scope: "meta",
  33. variants: [
  34. { begin: /<\?php/, relevance: 10 }, // boost for obvious PHP
  35. { begin: /<\?=/ },
  36. // less relevant per PSR-1 which says not to use short-tags
  37. { begin: /<\?/, relevance: 0.1 },
  38. { begin: /\?>/ } // end php tag
  39. ]
  40. };
  41. const SUBST = {
  42. scope: 'subst',
  43. variants: [
  44. { begin: /\$\w+/ },
  45. {
  46. begin: /\{\$/,
  47. end: /\}/
  48. }
  49. ]
  50. };
  51. const SINGLE_QUOTED = hljs.inherit(hljs.APOS_STRING_MODE, { illegal: null, });
  52. const DOUBLE_QUOTED = hljs.inherit(hljs.QUOTE_STRING_MODE, {
  53. illegal: null,
  54. contains: hljs.QUOTE_STRING_MODE.contains.concat(SUBST),
  55. });
  56. const HEREDOC = {
  57. begin: /<<<[ \t]*(?:(\w+)|"(\w+)")\n/,
  58. end: /[ \t]*(\w+)\b/,
  59. contains: hljs.QUOTE_STRING_MODE.contains.concat(SUBST),
  60. 'on:begin': (m, resp) => { resp.data._beginMatch = m[1] || m[2]; },
  61. 'on:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch(); },
  62. };
  63. const NOWDOC = hljs.END_SAME_AS_BEGIN({
  64. begin: /<<<[ \t]*'(\w+)'\n/,
  65. end: /[ \t]*(\w+)\b/,
  66. });
  67. // list of valid whitespaces because non-breaking space might be part of a IDENT_RE
  68. const WHITESPACE = '[ \t\n]';
  69. const STRING = {
  70. scope: 'string',
  71. variants: [
  72. DOUBLE_QUOTED,
  73. SINGLE_QUOTED,
  74. HEREDOC,
  75. NOWDOC
  76. ]
  77. };
  78. const NUMBER = {
  79. scope: 'number',
  80. variants: [
  81. { begin: `\\b0[bB][01]+(?:_[01]+)*\\b` }, // Binary w/ underscore support
  82. { begin: `\\b0[oO][0-7]+(?:_[0-7]+)*\\b` }, // Octals w/ underscore support
  83. { begin: `\\b0[xX][\\da-fA-F]+(?:_[\\da-fA-F]+)*\\b` }, // Hex w/ underscore support
  84. // Decimals w/ underscore support, with optional fragments and scientific exponent (e) suffix.
  85. { begin: `(?:\\b\\d+(?:_\\d+)*(\\.(?:\\d+(?:_\\d+)*))?|\\B\\.\\d+)(?:[eE][+-]?\\d+)?` }
  86. ],
  87. relevance: 0
  88. };
  89. const LITERALS = [
  90. "false",
  91. "null",
  92. "true"
  93. ];
  94. const KWS = [
  95. // Magic constants:
  96. // <https://www.php.net/manual/en/language.constants.predefined.php>
  97. "__CLASS__",
  98. "__DIR__",
  99. "__FILE__",
  100. "__FUNCTION__",
  101. "__COMPILER_HALT_OFFSET__",
  102. "__LINE__",
  103. "__METHOD__",
  104. "__NAMESPACE__",
  105. "__TRAIT__",
  106. // Function that look like language construct or language construct that look like function:
  107. // List of keywords that may not require parenthesis
  108. "die",
  109. "echo",
  110. "exit",
  111. "include",
  112. "include_once",
  113. "print",
  114. "require",
  115. "require_once",
  116. // These are not language construct (function) but operate on the currently-executing function and can access the current symbol table
  117. // 'compact extract func_get_arg func_get_args func_num_args get_called_class get_parent_class ' +
  118. // Other keywords:
  119. // <https://www.php.net/manual/en/reserved.php>
  120. // <https://www.php.net/manual/en/language.types.type-juggling.php>
  121. "array",
  122. "abstract",
  123. "and",
  124. "as",
  125. "binary",
  126. "bool",
  127. "boolean",
  128. "break",
  129. "callable",
  130. "case",
  131. "catch",
  132. "class",
  133. "clone",
  134. "const",
  135. "continue",
  136. "declare",
  137. "default",
  138. "do",
  139. "double",
  140. "else",
  141. "elseif",
  142. "empty",
  143. "enddeclare",
  144. "endfor",
  145. "endforeach",
  146. "endif",
  147. "endswitch",
  148. "endwhile",
  149. "enum",
  150. "eval",
  151. "extends",
  152. "final",
  153. "finally",
  154. "float",
  155. "for",
  156. "foreach",
  157. "from",
  158. "global",
  159. "goto",
  160. "if",
  161. "implements",
  162. "instanceof",
  163. "insteadof",
  164. "int",
  165. "integer",
  166. "interface",
  167. "isset",
  168. "iterable",
  169. "list",
  170. "match|0",
  171. "mixed",
  172. "new",
  173. "never",
  174. "object",
  175. "or",
  176. "private",
  177. "protected",
  178. "public",
  179. "readonly",
  180. "real",
  181. "return",
  182. "string",
  183. "switch",
  184. "throw",
  185. "trait",
  186. "try",
  187. "unset",
  188. "use",
  189. "var",
  190. "void",
  191. "while",
  192. "xor",
  193. "yield"
  194. ];
  195. const BUILT_INS = [
  196. // Standard PHP library:
  197. // <https://www.php.net/manual/en/book.spl.php>
  198. "Error|0",
  199. "AppendIterator",
  200. "ArgumentCountError",
  201. "ArithmeticError",
  202. "ArrayIterator",
  203. "ArrayObject",
  204. "AssertionError",
  205. "BadFunctionCallException",
  206. "BadMethodCallException",
  207. "CachingIterator",
  208. "CallbackFilterIterator",
  209. "CompileError",
  210. "Countable",
  211. "DirectoryIterator",
  212. "DivisionByZeroError",
  213. "DomainException",
  214. "EmptyIterator",
  215. "ErrorException",
  216. "Exception",
  217. "FilesystemIterator",
  218. "FilterIterator",
  219. "GlobIterator",
  220. "InfiniteIterator",
  221. "InvalidArgumentException",
  222. "IteratorIterator",
  223. "LengthException",
  224. "LimitIterator",
  225. "LogicException",
  226. "MultipleIterator",
  227. "NoRewindIterator",
  228. "OutOfBoundsException",
  229. "OutOfRangeException",
  230. "OuterIterator",
  231. "OverflowException",
  232. "ParentIterator",
  233. "ParseError",
  234. "RangeException",
  235. "RecursiveArrayIterator",
  236. "RecursiveCachingIterator",
  237. "RecursiveCallbackFilterIterator",
  238. "RecursiveDirectoryIterator",
  239. "RecursiveFilterIterator",
  240. "RecursiveIterator",
  241. "RecursiveIteratorIterator",
  242. "RecursiveRegexIterator",
  243. "RecursiveTreeIterator",
  244. "RegexIterator",
  245. "RuntimeException",
  246. "SeekableIterator",
  247. "SplDoublyLinkedList",
  248. "SplFileInfo",
  249. "SplFileObject",
  250. "SplFixedArray",
  251. "SplHeap",
  252. "SplMaxHeap",
  253. "SplMinHeap",
  254. "SplObjectStorage",
  255. "SplObserver",
  256. "SplPriorityQueue",
  257. "SplQueue",
  258. "SplStack",
  259. "SplSubject",
  260. "SplTempFileObject",
  261. "TypeError",
  262. "UnderflowException",
  263. "UnexpectedValueException",
  264. "UnhandledMatchError",
  265. // Reserved interfaces:
  266. // <https://www.php.net/manual/en/reserved.interfaces.php>
  267. "ArrayAccess",
  268. "BackedEnum",
  269. "Closure",
  270. "Fiber",
  271. "Generator",
  272. "Iterator",
  273. "IteratorAggregate",
  274. "Serializable",
  275. "Stringable",
  276. "Throwable",
  277. "Traversable",
  278. "UnitEnum",
  279. "WeakReference",
  280. "WeakMap",
  281. // Reserved classes:
  282. // <https://www.php.net/manual/en/reserved.classes.php>
  283. "Directory",
  284. "__PHP_Incomplete_Class",
  285. "parent",
  286. "php_user_filter",
  287. "self",
  288. "static",
  289. "stdClass"
  290. ];
  291. /** Dual-case keywords
  292. *
  293. * ["then","FILE"] =>
  294. * ["then", "THEN", "FILE", "file"]
  295. *
  296. * @param {string[]} items */
  297. const dualCase = (items) => {
  298. /** @type string[] */
  299. const result = [];
  300. items.forEach(item => {
  301. result.push(item);
  302. if (item.toLowerCase() === item) {
  303. result.push(item.toUpperCase());
  304. } else {
  305. result.push(item.toLowerCase());
  306. }
  307. });
  308. return result;
  309. };
  310. const KEYWORDS = {
  311. keyword: KWS,
  312. literal: dualCase(LITERALS),
  313. built_in: BUILT_INS,
  314. };
  315. /**
  316. * @param {string[]} items */
  317. const normalizeKeywords = (items) => {
  318. return items.map(item => {
  319. return item.replace(/\|\d+$/, "");
  320. });
  321. };
  322. const CONSTRUCTOR_CALL = { variants: [
  323. {
  324. match: [
  325. /new/,
  326. regex.concat(WHITESPACE, "+"),
  327. // to prevent built ins from being confused as the class constructor call
  328. regex.concat("(?!", normalizeKeywords(BUILT_INS).join("\\b|"), "\\b)"),
  329. PASCAL_CASE_CLASS_NAME_RE,
  330. ],
  331. scope: {
  332. 1: "keyword",
  333. 4: "title.class",
  334. },
  335. }
  336. ] };
  337. const CONSTANT_REFERENCE = regex.concat(IDENT_RE, "\\b(?!\\()");
  338. const LEFT_AND_RIGHT_SIDE_OF_DOUBLE_COLON = { variants: [
  339. {
  340. match: [
  341. regex.concat(
  342. /::/,
  343. regex.lookahead(/(?!class\b)/)
  344. ),
  345. CONSTANT_REFERENCE,
  346. ],
  347. scope: { 2: "variable.constant", },
  348. },
  349. {
  350. match: [
  351. /::/,
  352. /class/,
  353. ],
  354. scope: { 2: "variable.language", },
  355. },
  356. {
  357. match: [
  358. PASCAL_CASE_CLASS_NAME_RE,
  359. regex.concat(
  360. /::/,
  361. regex.lookahead(/(?!class\b)/)
  362. ),
  363. CONSTANT_REFERENCE,
  364. ],
  365. scope: {
  366. 1: "title.class",
  367. 3: "variable.constant",
  368. },
  369. },
  370. {
  371. match: [
  372. PASCAL_CASE_CLASS_NAME_RE,
  373. regex.concat(
  374. "::",
  375. regex.lookahead(/(?!class\b)/)
  376. ),
  377. ],
  378. scope: { 1: "title.class", },
  379. },
  380. {
  381. match: [
  382. PASCAL_CASE_CLASS_NAME_RE,
  383. /::/,
  384. /class/,
  385. ],
  386. scope: {
  387. 1: "title.class",
  388. 3: "variable.language",
  389. },
  390. }
  391. ] };
  392. const NAMED_ARGUMENT = {
  393. scope: 'attr',
  394. match: regex.concat(IDENT_RE, regex.lookahead(':'), regex.lookahead(/(?!::)/)),
  395. };
  396. const PARAMS_MODE = {
  397. relevance: 0,
  398. begin: /\(/,
  399. end: /\)/,
  400. keywords: KEYWORDS,
  401. contains: [
  402. NAMED_ARGUMENT,
  403. VARIABLE,
  404. LEFT_AND_RIGHT_SIDE_OF_DOUBLE_COLON,
  405. hljs.C_BLOCK_COMMENT_MODE,
  406. STRING,
  407. NUMBER,
  408. CONSTRUCTOR_CALL,
  409. ],
  410. };
  411. const FUNCTION_INVOKE = {
  412. relevance: 0,
  413. match: [
  414. /\b/,
  415. // to prevent keywords from being confused as the function title
  416. regex.concat("(?!fn\\b|function\\b|", normalizeKeywords(KWS).join("\\b|"), "|", normalizeKeywords(BUILT_INS).join("\\b|"), "\\b)"),
  417. IDENT_RE,
  418. regex.concat(WHITESPACE, "*"),
  419. regex.lookahead(/(?=\()/)
  420. ],
  421. scope: { 3: "title.function.invoke", },
  422. contains: [ PARAMS_MODE ]
  423. };
  424. PARAMS_MODE.contains.push(FUNCTION_INVOKE);
  425. const ATTRIBUTE_CONTAINS = [
  426. NAMED_ARGUMENT,
  427. LEFT_AND_RIGHT_SIDE_OF_DOUBLE_COLON,
  428. hljs.C_BLOCK_COMMENT_MODE,
  429. STRING,
  430. NUMBER,
  431. CONSTRUCTOR_CALL,
  432. ];
  433. const ATTRIBUTES = {
  434. begin: regex.concat(/#\[\s*\\?/,
  435. regex.either(
  436. PASCAL_CASE_CLASS_NAME_RE,
  437. UPCASE_NAME_RE
  438. )
  439. ),
  440. beginScope: "meta",
  441. end: /]/,
  442. endScope: "meta",
  443. keywords: {
  444. literal: LITERALS,
  445. keyword: [
  446. 'new',
  447. 'array',
  448. ]
  449. },
  450. contains: [
  451. {
  452. begin: /\[/,
  453. end: /]/,
  454. keywords: {
  455. literal: LITERALS,
  456. keyword: [
  457. 'new',
  458. 'array',
  459. ]
  460. },
  461. contains: [
  462. 'self',
  463. ...ATTRIBUTE_CONTAINS,
  464. ]
  465. },
  466. ...ATTRIBUTE_CONTAINS,
  467. {
  468. scope: 'meta',
  469. variants: [
  470. { match: PASCAL_CASE_CLASS_NAME_RE },
  471. { match: UPCASE_NAME_RE }
  472. ]
  473. }
  474. ]
  475. };
  476. return {
  477. case_insensitive: false,
  478. keywords: KEYWORDS,
  479. contains: [
  480. ATTRIBUTES,
  481. hljs.HASH_COMMENT_MODE,
  482. hljs.COMMENT('//', '$'),
  483. hljs.COMMENT(
  484. '/\\*',
  485. '\\*/',
  486. { contains: [
  487. {
  488. scope: 'doctag',
  489. match: '@[A-Za-z]+'
  490. }
  491. ] }
  492. ),
  493. {
  494. match: /__halt_compiler\(\);/,
  495. keywords: '__halt_compiler',
  496. starts: {
  497. scope: "comment",
  498. end: hljs.MATCH_NOTHING_RE,
  499. contains: [
  500. {
  501. match: /\?>/,
  502. scope: "meta",
  503. endsParent: true
  504. }
  505. ]
  506. }
  507. },
  508. PREPROCESSOR,
  509. {
  510. scope: 'variable.language',
  511. match: /\$this\b/
  512. },
  513. VARIABLE,
  514. FUNCTION_INVOKE,
  515. LEFT_AND_RIGHT_SIDE_OF_DOUBLE_COLON,
  516. {
  517. match: [
  518. /const/,
  519. /\s/,
  520. IDENT_RE,
  521. ],
  522. scope: {
  523. 1: "keyword",
  524. 3: "variable.constant",
  525. },
  526. },
  527. CONSTRUCTOR_CALL,
  528. {
  529. scope: 'function',
  530. relevance: 0,
  531. beginKeywords: 'fn function',
  532. end: /[;{]/,
  533. excludeEnd: true,
  534. illegal: '[$%\\[]',
  535. contains: [
  536. { beginKeywords: 'use', },
  537. hljs.UNDERSCORE_TITLE_MODE,
  538. {
  539. begin: '=>', // No markup, just a relevance booster
  540. endsParent: true
  541. },
  542. {
  543. scope: 'params',
  544. begin: '\\(',
  545. end: '\\)',
  546. excludeBegin: true,
  547. excludeEnd: true,
  548. keywords: KEYWORDS,
  549. contains: [
  550. 'self',
  551. ATTRIBUTES,
  552. VARIABLE,
  553. LEFT_AND_RIGHT_SIDE_OF_DOUBLE_COLON,
  554. hljs.C_BLOCK_COMMENT_MODE,
  555. STRING,
  556. NUMBER
  557. ]
  558. },
  559. ]
  560. },
  561. {
  562. scope: 'class',
  563. variants: [
  564. {
  565. beginKeywords: "enum",
  566. illegal: /[($"]/
  567. },
  568. {
  569. beginKeywords: "class interface trait",
  570. illegal: /[:($"]/
  571. }
  572. ],
  573. relevance: 0,
  574. end: /\{/,
  575. excludeEnd: true,
  576. contains: [
  577. { beginKeywords: 'extends implements' },
  578. hljs.UNDERSCORE_TITLE_MODE
  579. ]
  580. },
  581. // both use and namespace still use "old style" rules (vs multi-match)
  582. // because the namespace name can include `\` and we still want each
  583. // element to be treated as its own *individual* title
  584. {
  585. beginKeywords: 'namespace',
  586. relevance: 0,
  587. end: ';',
  588. illegal: /[.']/,
  589. contains: [ hljs.inherit(hljs.UNDERSCORE_TITLE_MODE, { scope: "title.class" }) ]
  590. },
  591. {
  592. beginKeywords: 'use',
  593. relevance: 0,
  594. end: ';',
  595. contains: [
  596. // TODO: title.function vs title.class
  597. {
  598. match: /\b(as|const|function)\b/,
  599. scope: "keyword"
  600. },
  601. // TODO: could be title.class or title.function
  602. hljs.UNDERSCORE_TITLE_MODE
  603. ]
  604. },
  605. STRING,
  606. NUMBER,
  607. ]
  608. };
  609. }
  610. module.exports = php;