_stop_words.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. # This list of English stop words is taken from the "Glasgow Information
  2. # Retrieval Group". The original list can be found at
  3. # http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words
  4. ENGLISH_STOP_WORDS = frozenset(
  5. [
  6. "a",
  7. "about",
  8. "above",
  9. "across",
  10. "after",
  11. "afterwards",
  12. "again",
  13. "against",
  14. "all",
  15. "almost",
  16. "alone",
  17. "along",
  18. "already",
  19. "also",
  20. "although",
  21. "always",
  22. "am",
  23. "among",
  24. "amongst",
  25. "amoungst",
  26. "amount",
  27. "an",
  28. "and",
  29. "another",
  30. "any",
  31. "anyhow",
  32. "anyone",
  33. "anything",
  34. "anyway",
  35. "anywhere",
  36. "are",
  37. "around",
  38. "as",
  39. "at",
  40. "back",
  41. "be",
  42. "became",
  43. "because",
  44. "become",
  45. "becomes",
  46. "becoming",
  47. "been",
  48. "before",
  49. "beforehand",
  50. "behind",
  51. "being",
  52. "below",
  53. "beside",
  54. "besides",
  55. "between",
  56. "beyond",
  57. "bill",
  58. "both",
  59. "bottom",
  60. "but",
  61. "by",
  62. "call",
  63. "can",
  64. "cannot",
  65. "cant",
  66. "co",
  67. "con",
  68. "could",
  69. "couldnt",
  70. "cry",
  71. "de",
  72. "describe",
  73. "detail",
  74. "do",
  75. "done",
  76. "down",
  77. "due",
  78. "during",
  79. "each",
  80. "eg",
  81. "eight",
  82. "either",
  83. "eleven",
  84. "else",
  85. "elsewhere",
  86. "empty",
  87. "enough",
  88. "etc",
  89. "even",
  90. "ever",
  91. "every",
  92. "everyone",
  93. "everything",
  94. "everywhere",
  95. "except",
  96. "few",
  97. "fifteen",
  98. "fifty",
  99. "fill",
  100. "find",
  101. "fire",
  102. "first",
  103. "five",
  104. "for",
  105. "former",
  106. "formerly",
  107. "forty",
  108. "found",
  109. "four",
  110. "from",
  111. "front",
  112. "full",
  113. "further",
  114. "get",
  115. "give",
  116. "go",
  117. "had",
  118. "has",
  119. "hasnt",
  120. "have",
  121. "he",
  122. "hence",
  123. "her",
  124. "here",
  125. "hereafter",
  126. "hereby",
  127. "herein",
  128. "hereupon",
  129. "hers",
  130. "herself",
  131. "him",
  132. "himself",
  133. "his",
  134. "how",
  135. "however",
  136. "hundred",
  137. "i",
  138. "ie",
  139. "if",
  140. "in",
  141. "inc",
  142. "indeed",
  143. "interest",
  144. "into",
  145. "is",
  146. "it",
  147. "its",
  148. "itself",
  149. "keep",
  150. "last",
  151. "latter",
  152. "latterly",
  153. "least",
  154. "less",
  155. "ltd",
  156. "made",
  157. "many",
  158. "may",
  159. "me",
  160. "meanwhile",
  161. "might",
  162. "mill",
  163. "mine",
  164. "more",
  165. "moreover",
  166. "most",
  167. "mostly",
  168. "move",
  169. "much",
  170. "must",
  171. "my",
  172. "myself",
  173. "name",
  174. "namely",
  175. "neither",
  176. "never",
  177. "nevertheless",
  178. "next",
  179. "nine",
  180. "no",
  181. "nobody",
  182. "none",
  183. "noone",
  184. "nor",
  185. "not",
  186. "nothing",
  187. "now",
  188. "nowhere",
  189. "of",
  190. "off",
  191. "often",
  192. "on",
  193. "once",
  194. "one",
  195. "only",
  196. "onto",
  197. "or",
  198. "other",
  199. "others",
  200. "otherwise",
  201. "our",
  202. "ours",
  203. "ourselves",
  204. "out",
  205. "over",
  206. "own",
  207. "part",
  208. "per",
  209. "perhaps",
  210. "please",
  211. "put",
  212. "rather",
  213. "re",
  214. "same",
  215. "see",
  216. "seem",
  217. "seemed",
  218. "seeming",
  219. "seems",
  220. "serious",
  221. "several",
  222. "she",
  223. "should",
  224. "show",
  225. "side",
  226. "since",
  227. "sincere",
  228. "six",
  229. "sixty",
  230. "so",
  231. "some",
  232. "somehow",
  233. "someone",
  234. "something",
  235. "sometime",
  236. "sometimes",
  237. "somewhere",
  238. "still",
  239. "such",
  240. "system",
  241. "take",
  242. "ten",
  243. "than",
  244. "that",
  245. "the",
  246. "their",
  247. "them",
  248. "themselves",
  249. "then",
  250. "thence",
  251. "there",
  252. "thereafter",
  253. "thereby",
  254. "therefore",
  255. "therein",
  256. "thereupon",
  257. "these",
  258. "they",
  259. "thick",
  260. "thin",
  261. "third",
  262. "this",
  263. "those",
  264. "though",
  265. "three",
  266. "through",
  267. "throughout",
  268. "thru",
  269. "thus",
  270. "to",
  271. "together",
  272. "too",
  273. "top",
  274. "toward",
  275. "towards",
  276. "twelve",
  277. "twenty",
  278. "two",
  279. "un",
  280. "under",
  281. "until",
  282. "up",
  283. "upon",
  284. "us",
  285. "very",
  286. "via",
  287. "was",
  288. "we",
  289. "well",
  290. "were",
  291. "what",
  292. "whatever",
  293. "when",
  294. "whence",
  295. "whenever",
  296. "where",
  297. "whereafter",
  298. "whereas",
  299. "whereby",
  300. "wherein",
  301. "whereupon",
  302. "wherever",
  303. "whether",
  304. "which",
  305. "while",
  306. "whither",
  307. "who",
  308. "whoever",
  309. "whole",
  310. "whom",
  311. "whose",
  312. "why",
  313. "will",
  314. "with",
  315. "within",
  316. "without",
  317. "would",
  318. "yet",
  319. "you",
  320. "your",
  321. "yours",
  322. "yourself",
  323. "yourselves",
  324. ]
  325. )