encoding.js 99 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301
  1. // This is free and unencumbered software released into the public domain.
  2. // See LICENSE.md for more information.
  3. /**
  4. * @fileoverview Global |this| required for resolving indexes in node.
  5. * @suppress {globalThis}
  6. */
  7. (function(global) {
  8. 'use strict';
  9. //
  10. // Utilities
  11. //
  12. /**
  13. * @param {number} a The number to test.
  14. * @param {number} min The minimum value in the range, inclusive.
  15. * @param {number} max The maximum value in the range, inclusive.
  16. * @return {boolean} True if a >= min and a <= max.
  17. */
  18. function inRange(a, min, max) {
  19. return min <= a && a <= max;
  20. }
  21. /**
  22. * @param {!Array.<*>} array The array to check.
  23. * @param {*} item The item to look for in the array.
  24. * @return {boolean} True if the item appears in the array.
  25. */
  26. function includes(array, item) {
  27. return array.indexOf(item) !== -1;
  28. }
  29. var floor = Math.floor;
  30. /**
  31. * @param {*} o
  32. * @return {Object}
  33. */
  34. function ToDictionary(o) {
  35. if (o === undefined) return {};
  36. if (o === Object(o)) return o;
  37. throw TypeError('Could not convert argument to dictionary');
  38. }
  39. /**
  40. * @param {string} string Input string of UTF-16 code units.
  41. * @return {!Array.<number>} Code points.
  42. */
  43. function stringToCodePoints(string) {
  44. // https://heycam.github.io/webidl/#dfn-obtain-unicode
  45. // 1. Let S be the DOMString value.
  46. var s = String(string);
  47. // 2. Let n be the length of S.
  48. var n = s.length;
  49. // 3. Initialize i to 0.
  50. var i = 0;
  51. // 4. Initialize U to be an empty sequence of Unicode characters.
  52. var u = [];
  53. // 5. While i < n:
  54. while (i < n) {
  55. // 1. Let c be the code unit in S at index i.
  56. var c = s.charCodeAt(i);
  57. // 2. Depending on the value of c:
  58. // c < 0xD800 or c > 0xDFFF
  59. if (c < 0xD800 || c > 0xDFFF) {
  60. // Append to U the Unicode character with code point c.
  61. u.push(c);
  62. }
  63. // 0xDC00 ≤ c ≤ 0xDFFF
  64. else if (0xDC00 <= c && c <= 0xDFFF) {
  65. // Append to U a U+FFFD REPLACEMENT CHARACTER.
  66. u.push(0xFFFD);
  67. }
  68. // 0xD800 ≤ c ≤ 0xDBFF
  69. else if (0xD800 <= c && c <= 0xDBFF) {
  70. // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
  71. // CHARACTER.
  72. if (i === n - 1) {
  73. u.push(0xFFFD);
  74. }
  75. // 2. Otherwise, i < n−1:
  76. else {
  77. // 1. Let d be the code unit in S at index i+1.
  78. var d = s.charCodeAt(i + 1);
  79. // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
  80. if (0xDC00 <= d && d <= 0xDFFF) {
  81. // 1. Let a be c & 0x3FF.
  82. var a = c & 0x3FF;
  83. // 2. Let b be d & 0x3FF.
  84. var b = d & 0x3FF;
  85. // 3. Append to U the Unicode character with code point
  86. // 2^16+2^10*a+b.
  87. u.push(0x10000 + (a << 10) + b);
  88. // 4. Set i to i+1.
  89. i += 1;
  90. }
  91. // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
  92. // U+FFFD REPLACEMENT CHARACTER.
  93. else {
  94. u.push(0xFFFD);
  95. }
  96. }
  97. }
  98. // 3. Set i to i+1.
  99. i += 1;
  100. }
  101. // 6. Return U.
  102. return u;
  103. }
  104. /**
  105. * @param {!Array.<number>} code_points Array of code points.
  106. * @return {string} string String of UTF-16 code units.
  107. */
  108. function codePointsToString(code_points) {
  109. var s = '';
  110. for (var i = 0; i < code_points.length; ++i) {
  111. var cp = code_points[i];
  112. if (cp <= 0xFFFF) {
  113. s += String.fromCharCode(cp);
  114. } else {
  115. cp -= 0x10000;
  116. s += String.fromCharCode((cp >> 10) + 0xD800,
  117. (cp & 0x3FF) + 0xDC00);
  118. }
  119. }
  120. return s;
  121. }
  122. //
  123. // Implementation of Encoding specification
  124. // https://encoding.spec.whatwg.org/
  125. //
  126. //
  127. // 4. Terminology
  128. //
  129. /**
  130. * An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
  131. * @param {number} a The number to test.
  132. * @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
  133. */
  134. function isASCIIByte(a) {
  135. return 0x00 <= a && a <= 0x7F;
  136. }
  137. /**
  138. * An ASCII code point is a code point in the range U+0000 to
  139. * U+007F, inclusive.
  140. */
  141. var isASCIICodePoint = isASCIIByte;
  142. /**
  143. * End-of-stream is a special token that signifies no more tokens
  144. * are in the stream.
  145. * @const
  146. */ var end_of_stream = -1;
  147. /**
  148. * A stream represents an ordered sequence of tokens.
  149. *
  150. * @constructor
  151. * @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide
  152. * the stream.
  153. */
  154. function Stream(tokens) {
  155. /** @type {!Array.<number>} */
  156. this.tokens = [].slice.call(tokens);
  157. // Reversed as push/pop is more efficient than shift/unshift.
  158. this.tokens.reverse();
  159. }
  160. Stream.prototype = {
  161. /**
  162. * @return {boolean} True if end-of-stream has been hit.
  163. */
  164. endOfStream: function() {
  165. return !this.tokens.length;
  166. },
  167. /**
  168. * When a token is read from a stream, the first token in the
  169. * stream must be returned and subsequently removed, and
  170. * end-of-stream must be returned otherwise.
  171. *
  172. * @return {number} Get the next token from the stream, or
  173. * end_of_stream.
  174. */
  175. read: function() {
  176. if (!this.tokens.length)
  177. return end_of_stream;
  178. return this.tokens.pop();
  179. },
  180. /**
  181. * When one or more tokens are prepended to a stream, those tokens
  182. * must be inserted, in given order, before the first token in the
  183. * stream.
  184. *
  185. * @param {(number|!Array.<number>)} token The token(s) to prepend to the
  186. * stream.
  187. */
  188. prepend: function(token) {
  189. if (Array.isArray(token)) {
  190. var tokens = /**@type {!Array.<number>}*/(token);
  191. while (tokens.length)
  192. this.tokens.push(tokens.pop());
  193. } else {
  194. this.tokens.push(token);
  195. }
  196. },
  197. /**
  198. * When one or more tokens are pushed to a stream, those tokens
  199. * must be inserted, in given order, after the last token in the
  200. * stream.
  201. *
  202. * @param {(number|!Array.<number>)} token The tokens(s) to push to the
  203. * stream.
  204. */
  205. push: function(token) {
  206. if (Array.isArray(token)) {
  207. var tokens = /**@type {!Array.<number>}*/(token);
  208. while (tokens.length)
  209. this.tokens.unshift(tokens.shift());
  210. } else {
  211. this.tokens.unshift(token);
  212. }
  213. }
  214. };
  215. //
  216. // 5. Encodings
  217. //
  218. // 5.1 Encoders and decoders
  219. /** @const */
  220. var finished = -1;
  221. /**
  222. * @param {boolean} fatal If true, decoding errors raise an exception.
  223. * @param {number=} opt_code_point Override the standard fallback code point.
  224. * @return {number} The code point to insert on a decoding error.
  225. */
  226. function decoderError(fatal, opt_code_point) {
  227. if (fatal)
  228. throw TypeError('Decoder error');
  229. return opt_code_point || 0xFFFD;
  230. }
  231. /**
  232. * @param {number} code_point The code point that could not be encoded.
  233. * @return {number} Always throws, no value is actually returned.
  234. */
  235. function encoderError(code_point) {
  236. throw TypeError('The code point ' + code_point + ' could not be encoded.');
  237. }
  238. /** @interface */
  239. function Decoder() {}
  240. Decoder.prototype = {
  241. /**
  242. * @param {Stream} stream The stream of bytes being decoded.
  243. * @param {number} bite The next byte read from the stream.
  244. * @return {?(number|!Array.<number>)} The next code point(s)
  245. * decoded, or null if not enough data exists in the input
  246. * stream to decode a complete code point, or |finished|.
  247. */
  248. handler: function(stream, bite) {}
  249. };
  250. /** @interface */
  251. function Encoder() {}
  252. Encoder.prototype = {
  253. /**
  254. * @param {Stream} stream The stream of code points being encoded.
  255. * @param {number} code_point Next code point read from the stream.
  256. * @return {(number|!Array.<number>)} Byte(s) to emit, or |finished|.
  257. */
  258. handler: function(stream, code_point) {}
  259. };
  260. // 5.2 Names and labels
  261. // TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
  262. // https://github.com/google/closure-compiler/issues/247
  263. /**
  264. * @param {string} label The encoding label.
  265. * @return {?{name:string,labels:Array.<string>}}
  266. */
  267. function getEncoding(label) {
  268. // 1. Remove any leading and trailing ASCII whitespace from label.
  269. label = String(label).trim().toLowerCase();
  270. // 2. If label is an ASCII case-insensitive match for any of the
  271. // labels listed in the table below, return the corresponding
  272. // encoding, and failure otherwise.
  273. if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
  274. return label_to_encoding[label];
  275. }
  276. return null;
  277. }
  278. /**
  279. * Encodings table: https://encoding.spec.whatwg.org/encodings.json
  280. * @const
  281. * @type {!Array.<{
  282. * heading: string,
  283. * encodings: Array.<{name:string,labels:Array.<string>}>
  284. * }>}
  285. */
  286. var encodings = [
  287. {
  288. "encodings": [
  289. {
  290. "labels": [
  291. "unicode-1-1-utf-8",
  292. "utf-8",
  293. "utf8"
  294. ],
  295. "name": "UTF-8"
  296. }
  297. ],
  298. "heading": "The Encoding"
  299. },
  300. {
  301. "encodings": [
  302. {
  303. "labels": [
  304. "866",
  305. "cp866",
  306. "csibm866",
  307. "ibm866"
  308. ],
  309. "name": "IBM866"
  310. },
  311. {
  312. "labels": [
  313. "csisolatin2",
  314. "iso-8859-2",
  315. "iso-ir-101",
  316. "iso8859-2",
  317. "iso88592",
  318. "iso_8859-2",
  319. "iso_8859-2:1987",
  320. "l2",
  321. "latin2"
  322. ],
  323. "name": "ISO-8859-2"
  324. },
  325. {
  326. "labels": [
  327. "csisolatin3",
  328. "iso-8859-3",
  329. "iso-ir-109",
  330. "iso8859-3",
  331. "iso88593",
  332. "iso_8859-3",
  333. "iso_8859-3:1988",
  334. "l3",
  335. "latin3"
  336. ],
  337. "name": "ISO-8859-3"
  338. },
  339. {
  340. "labels": [
  341. "csisolatin4",
  342. "iso-8859-4",
  343. "iso-ir-110",
  344. "iso8859-4",
  345. "iso88594",
  346. "iso_8859-4",
  347. "iso_8859-4:1988",
  348. "l4",
  349. "latin4"
  350. ],
  351. "name": "ISO-8859-4"
  352. },
  353. {
  354. "labels": [
  355. "csisolatincyrillic",
  356. "cyrillic",
  357. "iso-8859-5",
  358. "iso-ir-144",
  359. "iso8859-5",
  360. "iso88595",
  361. "iso_8859-5",
  362. "iso_8859-5:1988"
  363. ],
  364. "name": "ISO-8859-5"
  365. },
  366. {
  367. "labels": [
  368. "arabic",
  369. "asmo-708",
  370. "csiso88596e",
  371. "csiso88596i",
  372. "csisolatinarabic",
  373. "ecma-114",
  374. "iso-8859-6",
  375. "iso-8859-6-e",
  376. "iso-8859-6-i",
  377. "iso-ir-127",
  378. "iso8859-6",
  379. "iso88596",
  380. "iso_8859-6",
  381. "iso_8859-6:1987"
  382. ],
  383. "name": "ISO-8859-6"
  384. },
  385. {
  386. "labels": [
  387. "csisolatingreek",
  388. "ecma-118",
  389. "elot_928",
  390. "greek",
  391. "greek8",
  392. "iso-8859-7",
  393. "iso-ir-126",
  394. "iso8859-7",
  395. "iso88597",
  396. "iso_8859-7",
  397. "iso_8859-7:1987",
  398. "sun_eu_greek"
  399. ],
  400. "name": "ISO-8859-7"
  401. },
  402. {
  403. "labels": [
  404. "csiso88598e",
  405. "csisolatinhebrew",
  406. "hebrew",
  407. "iso-8859-8",
  408. "iso-8859-8-e",
  409. "iso-ir-138",
  410. "iso8859-8",
  411. "iso88598",
  412. "iso_8859-8",
  413. "iso_8859-8:1988",
  414. "visual"
  415. ],
  416. "name": "ISO-8859-8"
  417. },
  418. {
  419. "labels": [
  420. "csiso88598i",
  421. "iso-8859-8-i",
  422. "logical"
  423. ],
  424. "name": "ISO-8859-8-I"
  425. },
  426. {
  427. "labels": [
  428. "csisolatin6",
  429. "iso-8859-10",
  430. "iso-ir-157",
  431. "iso8859-10",
  432. "iso885910",
  433. "l6",
  434. "latin6"
  435. ],
  436. "name": "ISO-8859-10"
  437. },
  438. {
  439. "labels": [
  440. "iso-8859-13",
  441. "iso8859-13",
  442. "iso885913"
  443. ],
  444. "name": "ISO-8859-13"
  445. },
  446. {
  447. "labels": [
  448. "iso-8859-14",
  449. "iso8859-14",
  450. "iso885914"
  451. ],
  452. "name": "ISO-8859-14"
  453. },
  454. {
  455. "labels": [
  456. "csisolatin9",
  457. "iso-8859-15",
  458. "iso8859-15",
  459. "iso885915",
  460. "iso_8859-15",
  461. "l9"
  462. ],
  463. "name": "ISO-8859-15"
  464. },
  465. {
  466. "labels": [
  467. "iso-8859-16"
  468. ],
  469. "name": "ISO-8859-16"
  470. },
  471. {
  472. "labels": [
  473. "cskoi8r",
  474. "koi",
  475. "koi8",
  476. "koi8-r",
  477. "koi8_r"
  478. ],
  479. "name": "KOI8-R"
  480. },
  481. {
  482. "labels": [
  483. "koi8-ru",
  484. "koi8-u"
  485. ],
  486. "name": "KOI8-U"
  487. },
  488. {
  489. "labels": [
  490. "csmacintosh",
  491. "mac",
  492. "macintosh",
  493. "x-mac-roman"
  494. ],
  495. "name": "macintosh"
  496. },
  497. {
  498. "labels": [
  499. "dos-874",
  500. "iso-8859-11",
  501. "iso8859-11",
  502. "iso885911",
  503. "tis-620",
  504. "windows-874"
  505. ],
  506. "name": "windows-874"
  507. },
  508. {
  509. "labels": [
  510. "cp1250",
  511. "windows-1250",
  512. "x-cp1250"
  513. ],
  514. "name": "windows-1250"
  515. },
  516. {
  517. "labels": [
  518. "cp1251",
  519. "windows-1251",
  520. "x-cp1251"
  521. ],
  522. "name": "windows-1251"
  523. },
  524. {
  525. "labels": [
  526. "ansi_x3.4-1968",
  527. "ascii",
  528. "cp1252",
  529. "cp819",
  530. "csisolatin1",
  531. "ibm819",
  532. "iso-8859-1",
  533. "iso-ir-100",
  534. "iso8859-1",
  535. "iso88591",
  536. "iso_8859-1",
  537. "iso_8859-1:1987",
  538. "l1",
  539. "latin1",
  540. "us-ascii",
  541. "windows-1252",
  542. "x-cp1252"
  543. ],
  544. "name": "windows-1252"
  545. },
  546. {
  547. "labels": [
  548. "cp1253",
  549. "windows-1253",
  550. "x-cp1253"
  551. ],
  552. "name": "windows-1253"
  553. },
  554. {
  555. "labels": [
  556. "cp1254",
  557. "csisolatin5",
  558. "iso-8859-9",
  559. "iso-ir-148",
  560. "iso8859-9",
  561. "iso88599",
  562. "iso_8859-9",
  563. "iso_8859-9:1989",
  564. "l5",
  565. "latin5",
  566. "windows-1254",
  567. "x-cp1254"
  568. ],
  569. "name": "windows-1254"
  570. },
  571. {
  572. "labels": [
  573. "cp1255",
  574. "windows-1255",
  575. "x-cp1255"
  576. ],
  577. "name": "windows-1255"
  578. },
  579. {
  580. "labels": [
  581. "cp1256",
  582. "windows-1256",
  583. "x-cp1256"
  584. ],
  585. "name": "windows-1256"
  586. },
  587. {
  588. "labels": [
  589. "cp1257",
  590. "windows-1257",
  591. "x-cp1257"
  592. ],
  593. "name": "windows-1257"
  594. },
  595. {
  596. "labels": [
  597. "cp1258",
  598. "windows-1258",
  599. "x-cp1258"
  600. ],
  601. "name": "windows-1258"
  602. },
  603. {
  604. "labels": [
  605. "x-mac-cyrillic",
  606. "x-mac-ukrainian"
  607. ],
  608. "name": "x-mac-cyrillic"
  609. }
  610. ],
  611. "heading": "Legacy single-byte encodings"
  612. },
  613. {
  614. "encodings": [
  615. {
  616. "labels": [
  617. "chinese",
  618. "csgb2312",
  619. "csiso58gb231280",
  620. "gb2312",
  621. "gb_2312",
  622. "gb_2312-80",
  623. "gbk",
  624. "iso-ir-58",
  625. "x-gbk"
  626. ],
  627. "name": "GBK"
  628. },
  629. {
  630. "labels": [
  631. "gb18030"
  632. ],
  633. "name": "gb18030"
  634. }
  635. ],
  636. "heading": "Legacy multi-byte Chinese (simplified) encodings"
  637. },
  638. {
  639. "encodings": [
  640. {
  641. "labels": [
  642. "big5",
  643. "big5-hkscs",
  644. "cn-big5",
  645. "csbig5",
  646. "x-x-big5"
  647. ],
  648. "name": "Big5"
  649. }
  650. ],
  651. "heading": "Legacy multi-byte Chinese (traditional) encodings"
  652. },
  653. {
  654. "encodings": [
  655. {
  656. "labels": [
  657. "cseucpkdfmtjapanese",
  658. "euc-jp",
  659. "x-euc-jp"
  660. ],
  661. "name": "EUC-JP"
  662. },
  663. {
  664. "labels": [
  665. "csiso2022jp",
  666. "iso-2022-jp"
  667. ],
  668. "name": "ISO-2022-JP"
  669. },
  670. {
  671. "labels": [
  672. "csshiftjis",
  673. "ms932",
  674. "ms_kanji",
  675. "shift-jis",
  676. "shift_jis",
  677. "sjis",
  678. "windows-31j",
  679. "x-sjis"
  680. ],
  681. "name": "Shift_JIS"
  682. }
  683. ],
  684. "heading": "Legacy multi-byte Japanese encodings"
  685. },
  686. {
  687. "encodings": [
  688. {
  689. "labels": [
  690. "cseuckr",
  691. "csksc56011987",
  692. "euc-kr",
  693. "iso-ir-149",
  694. "korean",
  695. "ks_c_5601-1987",
  696. "ks_c_5601-1989",
  697. "ksc5601",
  698. "ksc_5601",
  699. "windows-949"
  700. ],
  701. "name": "EUC-KR"
  702. }
  703. ],
  704. "heading": "Legacy multi-byte Korean encodings"
  705. },
  706. {
  707. "encodings": [
  708. {
  709. "labels": [
  710. "csiso2022kr",
  711. "hz-gb-2312",
  712. "iso-2022-cn",
  713. "iso-2022-cn-ext",
  714. "iso-2022-kr"
  715. ],
  716. "name": "replacement"
  717. },
  718. {
  719. "labels": [
  720. "utf-16be"
  721. ],
  722. "name": "UTF-16BE"
  723. },
  724. {
  725. "labels": [
  726. "utf-16",
  727. "utf-16le"
  728. ],
  729. "name": "UTF-16LE"
  730. },
  731. {
  732. "labels": [
  733. "x-user-defined"
  734. ],
  735. "name": "x-user-defined"
  736. }
  737. ],
  738. "heading": "Legacy miscellaneous encodings"
  739. }
  740. ];
  741. // Label to encoding registry.
  742. /** @type {Object.<string,{name:string,labels:Array.<string>}>} */
  743. var label_to_encoding = {};
  744. encodings.forEach(function(category) {
  745. category.encodings.forEach(function(encoding) {
  746. encoding.labels.forEach(function(label) {
  747. label_to_encoding[label] = encoding;
  748. });
  749. });
  750. });
  751. // Registry of of encoder/decoder factories, by encoding name.
  752. /** @type {Object.<string, function({fatal:boolean}): Encoder>} */
  753. var encoders = {};
  754. /** @type {Object.<string, function({fatal:boolean}): Decoder>} */
  755. var decoders = {};
  756. //
  757. // 6. Indexes
  758. //
  759. /**
  760. * @param {number} pointer The |pointer| to search for.
  761. * @param {(!Array.<?number>|undefined)} index The |index| to search within.
  762. * @return {?number} The code point corresponding to |pointer| in |index|,
  763. * or null if |code point| is not in |index|.
  764. */
  765. function indexCodePointFor(pointer, index) {
  766. if (!index) return null;
  767. return index[pointer] || null;
  768. }
  769. /**
  770. * @param {number} code_point The |code point| to search for.
  771. * @param {!Array.<?number>} index The |index| to search within.
  772. * @return {?number} The first pointer corresponding to |code point| in
  773. * |index|, or null if |code point| is not in |index|.
  774. */
  775. function indexPointerFor(code_point, index) {
  776. var pointer = index.indexOf(code_point);
  777. return pointer === -1 ? null : pointer;
  778. }
  779. /**
  780. * @param {string} name Name of the index.
  781. * @return {(!Array.<number>|!Array.<Array.<number>>)}
  782. * */
  783. function index(name) {
  784. if (!('encoding-indexes' in global)) {
  785. throw Error("Indexes missing." +
  786. " Did you forget to include encoding-indexes.js first?");
  787. }
  788. return global['encoding-indexes'][name];
  789. }
  790. /**
  791. * @param {number} pointer The |pointer| to search for in the gb18030 index.
  792. * @return {?number} The code point corresponding to |pointer| in |index|,
  793. * or null if |code point| is not in the gb18030 index.
  794. */
  795. function indexGB18030RangesCodePointFor(pointer) {
  796. // 1. If pointer is greater than 39419 and less than 189000, or
  797. // pointer is greater than 1237575, return null.
  798. if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575))
  799. return null;
  800. // 2. If pointer is 7457, return code point U+E7C7.
  801. if (pointer === 7457) return 0xE7C7;
  802. // 3. Let offset be the last pointer in index gb18030 ranges that
  803. // is equal to or less than pointer and let code point offset be
  804. // its corresponding code point.
  805. var offset = 0;
  806. var code_point_offset = 0;
  807. var idx = index('gb18030-ranges');
  808. var i;
  809. for (i = 0; i < idx.length; ++i) {
  810. /** @type {!Array.<number>} */
  811. var entry = idx[i];
  812. if (entry[0] <= pointer) {
  813. offset = entry[0];
  814. code_point_offset = entry[1];
  815. } else {
  816. break;
  817. }
  818. }
  819. // 4. Return a code point whose value is code point offset +
  820. // pointer − offset.
  821. return code_point_offset + pointer - offset;
  822. }
  823. /**
  824. * @param {number} code_point The |code point| to locate in the gb18030 index.
  825. * @return {number} The first pointer corresponding to |code point| in the
  826. * gb18030 index.
  827. */
  828. function indexGB18030RangesPointerFor(code_point) {
  829. // 1. If code point is U+E7C7, return pointer 7457.
  830. if (code_point === 0xE7C7) return 7457;
  831. // 2. Let offset be the last code point in index gb18030 ranges
  832. // that is equal to or less than code point and let pointer offset
  833. // be its corresponding pointer.
  834. var offset = 0;
  835. var pointer_offset = 0;
  836. var idx = index('gb18030-ranges');
  837. var i;
  838. for (i = 0; i < idx.length; ++i) {
  839. /** @type {!Array.<number>} */
  840. var entry = idx[i];
  841. if (entry[1] <= code_point) {
  842. offset = entry[1];
  843. pointer_offset = entry[0];
  844. } else {
  845. break;
  846. }
  847. }
  848. // 3. Return a pointer whose value is pointer offset + code point
  849. // − offset.
  850. return pointer_offset + code_point - offset;
  851. }
  852. /**
  853. * @param {number} code_point The |code_point| to search for in the Shift_JIS
  854. * index.
  855. * @return {?number} The code point corresponding to |pointer| in |index|,
  856. * or null if |code point| is not in the Shift_JIS index.
  857. */
  858. function indexShiftJISPointerFor(code_point) {
  859. // 1. Let index be index jis0208 excluding all entries whose
  860. // pointer is in the range 8272 to 8835, inclusive.
  861. shift_jis_index = shift_jis_index ||
  862. index('jis0208').map(function(code_point, pointer) {
  863. return inRange(pointer, 8272, 8835) ? null : code_point;
  864. });
  865. var index_ = shift_jis_index;
  866. // 2. Return the index pointer for code point in index.
  867. return index_.indexOf(code_point);
  868. }
  869. var shift_jis_index;
  870. /**
  871. * @param {number} code_point The |code_point| to search for in the big5
  872. * index.
  873. * @return {?number} The code point corresponding to |pointer| in |index|,
  874. * or null if |code point| is not in the big5 index.
  875. */
  876. function indexBig5PointerFor(code_point) {
  877. // 1. Let index be index Big5 excluding all entries whose pointer
  878. big5_index_no_hkscs = big5_index_no_hkscs ||
  879. index('big5').map(function(code_point, pointer) {
  880. return (pointer < (0xA1 - 0x81) * 157) ? null : code_point;
  881. });
  882. var index_ = big5_index_no_hkscs;
  883. // 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
  884. // U+5345, return the last pointer corresponding to code point in
  885. // index.
  886. if (code_point === 0x2550 || code_point === 0x255E ||
  887. code_point === 0x2561 || code_point === 0x256A ||
  888. code_point === 0x5341 || code_point === 0x5345) {
  889. return index_.lastIndexOf(code_point);
  890. }
  891. // 3. Return the index pointer for code point in index.
  892. return indexPointerFor(code_point, index_);
  893. }
  894. var big5_index_no_hkscs;
  895. //
  896. // 8. API
  897. //
  898. /** @const */ var DEFAULT_ENCODING = 'utf-8';
  899. // 8.1 Interface TextDecoder
  900. /**
  901. * @constructor
  902. * @param {string=} label The label of the encoding;
  903. * defaults to 'utf-8'.
  904. * @param {Object=} options
  905. */
  906. function TextDecoder(label, options) {
  907. // Web IDL conventions
  908. if (!(this instanceof TextDecoder))
  909. throw TypeError('Called as a function. Did you forget \'new\'?');
  910. label = label !== undefined ? String(label) : DEFAULT_ENCODING;
  911. options = ToDictionary(options);
  912. // A TextDecoder object has an associated encoding, decoder,
  913. // stream, ignore BOM flag (initially unset), BOM seen flag
  914. // (initially unset), error mode (initially replacement), and do
  915. // not flush flag (initially unset).
  916. /** @private */
  917. this._encoding = null;
  918. /** @private @type {?Decoder} */
  919. this._decoder = null;
  920. /** @private @type {boolean} */
  921. this._ignoreBOM = false;
  922. /** @private @type {boolean} */
  923. this._BOMseen = false;
  924. /** @private @type {string} */
  925. this._error_mode = 'replacement';
  926. /** @private @type {boolean} */
  927. this._do_not_flush = false;
  928. // 1. Let encoding be the result of getting an encoding from
  929. // label.
  930. var encoding = getEncoding(label);
  931. // 2. If encoding is failure or replacement, throw a RangeError.
  932. if (encoding === null || encoding.name === 'replacement')
  933. throw RangeError('Unknown encoding: ' + label);
  934. if (!decoders[encoding.name]) {
  935. throw Error('Decoder not present.' +
  936. ' Did you forget to include encoding-indexes.js first?');
  937. }
  938. // 3. Let dec be a new TextDecoder object.
  939. var dec = this;
  940. // 4. Set dec's encoding to encoding.
  941. dec._encoding = encoding;
  942. // 5. If options's fatal member is true, set dec's error mode to
  943. // fatal.
  944. if (Boolean(options['fatal']))
  945. dec._error_mode = 'fatal';
  946. // 6. If options's ignoreBOM member is true, set dec's ignore BOM
  947. // flag.
  948. if (Boolean(options['ignoreBOM']))
  949. dec._ignoreBOM = true;
  950. // For pre-ES5 runtimes:
  951. if (!Object.defineProperty) {
  952. this.encoding = dec._encoding.name.toLowerCase();
  953. this.fatal = dec._error_mode === 'fatal';
  954. this.ignoreBOM = dec._ignoreBOM;
  955. }
  956. // 7. Return dec.
  957. return dec;
  958. }
  959. if (Object.defineProperty) {
  960. // The encoding attribute's getter must return encoding's name.
  961. Object.defineProperty(TextDecoder.prototype, 'encoding', {
  962. /** @this {TextDecoder} */
  963. get: function() { return this._encoding.name.toLowerCase(); }
  964. });
  965. // The fatal attribute's getter must return true if error mode
  966. // is fatal, and false otherwise.
  967. Object.defineProperty(TextDecoder.prototype, 'fatal', {
  968. /** @this {TextDecoder} */
  969. get: function() { return this._error_mode === 'fatal'; }
  970. });
  971. // The ignoreBOM attribute's getter must return true if ignore
  972. // BOM flag is set, and false otherwise.
  973. Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
  974. /** @this {TextDecoder} */
  975. get: function() { return this._ignoreBOM; }
  976. });
  977. }
  978. /**
  979. * @param {BufferSource=} input The buffer of bytes to decode.
  980. * @param {Object=} options
  981. * @return {string} The decoded string.
  982. */
  983. TextDecoder.prototype.decode = function decode(input, options) {
  984. var bytes;
  985. if (typeof input === 'object' && input instanceof ArrayBuffer) {
  986. bytes = new Uint8Array(input);
  987. } else if (typeof input === 'object' && 'buffer' in input &&
  988. input.buffer instanceof ArrayBuffer) {
  989. bytes = new Uint8Array(input.buffer,
  990. input.byteOffset,
  991. input.byteLength);
  992. } else {
  993. bytes = new Uint8Array(0);
  994. }
  995. options = ToDictionary(options);
  996. // 1. If the do not flush flag is unset, set decoder to a new
  997. // encoding's decoder, set stream to a new stream, and unset the
  998. // BOM seen flag.
  999. if (!this._do_not_flush) {
  1000. this._decoder = decoders[this._encoding.name]({
  1001. fatal: this._error_mode === 'fatal'});
  1002. this._BOMseen = false;
  1003. }
  1004. // 2. If options's stream is true, set the do not flush flag, and
  1005. // unset the do not flush flag otherwise.
  1006. this._do_not_flush = Boolean(options['stream']);
  1007. // 3. If input is given, push a copy of input to stream.
  1008. // TODO: Align with spec algorithm - maintain stream on instance.
  1009. var input_stream = new Stream(bytes);
  1010. // 4. Let output be a new stream.
  1011. var output = [];
  1012. /** @type {?(number|!Array.<number>)} */
  1013. var result;
  1014. // 5. While true:
  1015. while (true) {
  1016. // 1. Let token be the result of reading from stream.
  1017. var token = input_stream.read();
  1018. // 2. If token is end-of-stream and the do not flush flag is
  1019. // set, return output, serialized.
  1020. // TODO: Align with spec algorithm.
  1021. if (token === end_of_stream)
  1022. break;
  1023. // 3. Otherwise, run these subsubsteps:
  1024. // 1. Let result be the result of processing token for decoder,
  1025. // stream, output, and error mode.
  1026. result = this._decoder.handler(input_stream, token);
  1027. // 2. If result is finished, return output, serialized.
  1028. if (result === finished)
  1029. break;
  1030. if (result !== null) {
  1031. if (Array.isArray(result))
  1032. output.push.apply(output, /**@type {!Array.<number>}*/(result));
  1033. else
  1034. output.push(result);
  1035. }
  1036. // 3. Otherwise, if result is error, throw a TypeError.
  1037. // (Thrown in handler)
  1038. // 4. Otherwise, do nothing.
  1039. }
  1040. // TODO: Align with spec algorithm.
  1041. if (!this._do_not_flush) {
  1042. do {
  1043. result = this._decoder.handler(input_stream, input_stream.read());
  1044. if (result === finished)
  1045. break;
  1046. if (result === null)
  1047. continue;
  1048. if (Array.isArray(result))
  1049. output.push.apply(output, /**@type {!Array.<number>}*/(result));
  1050. else
  1051. output.push(result);
  1052. } while (!input_stream.endOfStream());
  1053. this._decoder = null;
  1054. }
  1055. // A TextDecoder object also has an associated serialize stream
  1056. // algorithm...
  1057. /**
  1058. * @param {!Array.<number>} stream
  1059. * @return {string}
  1060. * @this {TextDecoder}
  1061. */
  1062. function serializeStream(stream) {
  1063. // 1. Let token be the result of reading from stream.
  1064. // (Done in-place on array, rather than as a stream)
  1065. // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
  1066. // BOM flag and BOM seen flag are unset, run these subsubsteps:
  1067. if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
  1068. !this._ignoreBOM && !this._BOMseen) {
  1069. if (stream.length > 0 && stream[0] === 0xFEFF) {
  1070. // 1. If token is U+FEFF, set BOM seen flag.
  1071. this._BOMseen = true;
  1072. stream.shift();
  1073. } else if (stream.length > 0) {
  1074. // 2. Otherwise, if token is not end-of-stream, set BOM seen
  1075. // flag and append token to stream.
  1076. this._BOMseen = true;
  1077. } else {
  1078. // 3. Otherwise, if token is not end-of-stream, append token
  1079. // to output.
  1080. // (no-op)
  1081. }
  1082. }
  1083. // 4. Otherwise, return output.
  1084. return codePointsToString(stream);
  1085. }
  1086. return serializeStream.call(this, output);
  1087. };
  1088. // 8.2 Interface TextEncoder
  1089. /**
  1090. * @constructor
  1091. * @param {string=} label The label of the encoding. NONSTANDARD.
  1092. * @param {Object=} options NONSTANDARD.
  1093. */
  1094. function TextEncoder(label, options) {
  1095. // Web IDL conventions
  1096. if (!(this instanceof TextEncoder))
  1097. throw TypeError('Called as a function. Did you forget \'new\'?');
  1098. options = ToDictionary(options);
  1099. // A TextEncoder object has an associated encoding and encoder.
  1100. /** @private */
  1101. this._encoding = null;
  1102. /** @private @type {?Encoder} */
  1103. this._encoder = null;
  1104. // Non-standard
  1105. /** @private @type {boolean} */
  1106. this._do_not_flush = false;
  1107. /** @private @type {string} */
  1108. this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement';
  1109. // 1. Let enc be a new TextEncoder object.
  1110. var enc = this;
  1111. // 2. Set enc's encoding to UTF-8's encoder.
  1112. if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) {
  1113. // NONSTANDARD behavior.
  1114. label = label !== undefined ? String(label) : DEFAULT_ENCODING;
  1115. var encoding = getEncoding(label);
  1116. if (encoding === null || encoding.name === 'replacement')
  1117. throw RangeError('Unknown encoding: ' + label);
  1118. if (!encoders[encoding.name]) {
  1119. throw Error('Encoder not present.' +
  1120. ' Did you forget to include encoding-indexes.js first?');
  1121. }
  1122. enc._encoding = encoding;
  1123. } else {
  1124. // Standard behavior.
  1125. enc._encoding = getEncoding('utf-8');
  1126. if (label !== undefined && 'console' in global) {
  1127. console.warn('TextEncoder constructor called with encoding label, '
  1128. + 'which is ignored.');
  1129. }
  1130. }
  1131. // For pre-ES5 runtimes:
  1132. if (!Object.defineProperty)
  1133. this.encoding = enc._encoding.name.toLowerCase();
  1134. // 3. Return enc.
  1135. return enc;
  1136. }
  1137. if (Object.defineProperty) {
  1138. // The encoding attribute's getter must return encoding's name.
  1139. Object.defineProperty(TextEncoder.prototype, 'encoding', {
  1140. /** @this {TextEncoder} */
  1141. get: function() { return this._encoding.name.toLowerCase(); }
  1142. });
  1143. }
  1144. /**
  1145. * @param {string=} opt_string The string to encode.
  1146. * @param {Object=} options
  1147. * @return {!Uint8Array} Encoded bytes, as a Uint8Array.
  1148. */
  1149. TextEncoder.prototype.encode = function encode(opt_string, options) {
  1150. opt_string = opt_string === undefined ? '' : String(opt_string);
  1151. options = ToDictionary(options);
  1152. // NOTE: This option is nonstandard. None of the encodings
  1153. // permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
  1154. // the input is a USVString so streaming is not necessary.
  1155. if (!this._do_not_flush)
  1156. this._encoder = encoders[this._encoding.name]({
  1157. fatal: this._fatal === 'fatal'});
  1158. this._do_not_flush = Boolean(options['stream']);
  1159. // 1. Convert input to a stream.
  1160. var input = new Stream(stringToCodePoints(opt_string));
  1161. // 2. Let output be a new stream
  1162. var output = [];
  1163. /** @type {?(number|!Array.<number>)} */
  1164. var result;
  1165. // 3. While true, run these substeps:
  1166. while (true) {
  1167. // 1. Let token be the result of reading from input.
  1168. var token = input.read();
  1169. if (token === end_of_stream)
  1170. break;
  1171. // 2. Let result be the result of processing token for encoder,
  1172. // input, output.
  1173. result = this._encoder.handler(input, token);
  1174. if (result === finished)
  1175. break;
  1176. if (Array.isArray(result))
  1177. output.push.apply(output, /**@type {!Array.<number>}*/(result));
  1178. else
  1179. output.push(result);
  1180. }
  1181. // TODO: Align with spec algorithm.
  1182. if (!this._do_not_flush) {
  1183. while (true) {
  1184. result = this._encoder.handler(input, input.read());
  1185. if (result === finished)
  1186. break;
  1187. if (Array.isArray(result))
  1188. output.push.apply(output, /**@type {!Array.<number>}*/(result));
  1189. else
  1190. output.push(result);
  1191. }
  1192. this._encoder = null;
  1193. }
  1194. // 3. If result is finished, convert output into a byte sequence,
  1195. // and then return a Uint8Array object wrapping an ArrayBuffer
  1196. // containing output.
  1197. return new Uint8Array(output);
  1198. };
  1199. //
  1200. // 9. The encoding
  1201. //
  1202. // 9.1 utf-8
  1203. // 9.1.1 utf-8 decoder
  1204. /**
  1205. * @constructor
  1206. * @implements {Decoder}
  1207. * @param {{fatal: boolean}} options
  1208. */
  1209. function UTF8Decoder(options) {
  1210. var fatal = options.fatal;
  1211. // utf-8's decoder's has an associated utf-8 code point, utf-8
  1212. // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
  1213. // lower boundary (initially 0x80), and a utf-8 upper boundary
  1214. // (initially 0xBF).
  1215. var /** @type {number} */ utf8_code_point = 0,
  1216. /** @type {number} */ utf8_bytes_seen = 0,
  1217. /** @type {number} */ utf8_bytes_needed = 0,
  1218. /** @type {number} */ utf8_lower_boundary = 0x80,
  1219. /** @type {number} */ utf8_upper_boundary = 0xBF;
  1220. /**
  1221. * @param {Stream} stream The stream of bytes being decoded.
  1222. * @param {number} bite The next byte read from the stream.
  1223. * @return {?(number|!Array.<number>)} The next code point(s)
  1224. * decoded, or null if not enough data exists in the input
  1225. * stream to decode a complete code point.
  1226. */
  1227. this.handler = function(stream, bite) {
  1228. // 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
  1229. // set utf-8 bytes needed to 0 and return error.
  1230. if (bite === end_of_stream && utf8_bytes_needed !== 0) {
  1231. utf8_bytes_needed = 0;
  1232. return decoderError(fatal);
  1233. }
  1234. // 2. If byte is end-of-stream, return finished.
  1235. if (bite === end_of_stream)
  1236. return finished;
  1237. // 3. If utf-8 bytes needed is 0, based on byte:
  1238. if (utf8_bytes_needed === 0) {
  1239. // 0x00 to 0x7F
  1240. if (inRange(bite, 0x00, 0x7F)) {
  1241. // Return a code point whose value is byte.
  1242. return bite;
  1243. }
  1244. // 0xC2 to 0xDF
  1245. else if (inRange(bite, 0xC2, 0xDF)) {
  1246. // 1. Set utf-8 bytes needed to 1.
  1247. utf8_bytes_needed = 1;
  1248. // 2. Set UTF-8 code point to byte & 0x1F.
  1249. utf8_code_point = bite & 0x1F;
  1250. }
  1251. // 0xE0 to 0xEF
  1252. else if (inRange(bite, 0xE0, 0xEF)) {
  1253. // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
  1254. if (bite === 0xE0)
  1255. utf8_lower_boundary = 0xA0;
  1256. // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
  1257. if (bite === 0xED)
  1258. utf8_upper_boundary = 0x9F;
  1259. // 3. Set utf-8 bytes needed to 2.
  1260. utf8_bytes_needed = 2;
  1261. // 4. Set UTF-8 code point to byte & 0xF.
  1262. utf8_code_point = bite & 0xF;
  1263. }
  1264. // 0xF0 to 0xF4
  1265. else if (inRange(bite, 0xF0, 0xF4)) {
  1266. // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
  1267. if (bite === 0xF0)
  1268. utf8_lower_boundary = 0x90;
  1269. // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
  1270. if (bite === 0xF4)
  1271. utf8_upper_boundary = 0x8F;
  1272. // 3. Set utf-8 bytes needed to 3.
  1273. utf8_bytes_needed = 3;
  1274. // 4. Set UTF-8 code point to byte & 0x7.
  1275. utf8_code_point = bite & 0x7;
  1276. }
  1277. // Otherwise
  1278. else {
  1279. // Return error.
  1280. return decoderError(fatal);
  1281. }
  1282. // Return continue.
  1283. return null;
  1284. }
  1285. // 4. If byte is not in the range utf-8 lower boundary to utf-8
  1286. // upper boundary, inclusive, run these substeps:
  1287. if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
  1288. // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
  1289. // bytes seen to 0, set utf-8 lower boundary to 0x80, and set
  1290. // utf-8 upper boundary to 0xBF.
  1291. utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
  1292. utf8_lower_boundary = 0x80;
  1293. utf8_upper_boundary = 0xBF;
  1294. // 2. Prepend byte to stream.
  1295. stream.prepend(bite);
  1296. // 3. Return error.
  1297. return decoderError(fatal);
  1298. }
  1299. // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
  1300. // to 0xBF.
  1301. utf8_lower_boundary = 0x80;
  1302. utf8_upper_boundary = 0xBF;
  1303. // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
  1304. // 0x3F)
  1305. utf8_code_point = (utf8_code_point << 6) | (bite & 0x3F);
  1306. // 7. Increase utf-8 bytes seen by one.
  1307. utf8_bytes_seen += 1;
  1308. // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
  1309. // continue.
  1310. if (utf8_bytes_seen !== utf8_bytes_needed)
  1311. return null;
  1312. // 9. Let code point be utf-8 code point.
  1313. var code_point = utf8_code_point;
  1314. // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
  1315. // seen to 0.
  1316. utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
  1317. // 11. Return a code point whose value is code point.
  1318. return code_point;
  1319. };
  1320. }
  1321. // 9.1.2 utf-8 encoder
  1322. /**
  1323. * @constructor
  1324. * @implements {Encoder}
  1325. * @param {{fatal: boolean}} options
  1326. */
  1327. function UTF8Encoder(options) {
  1328. var fatal = options.fatal;
  1329. /**
  1330. * @param {Stream} stream Input stream.
  1331. * @param {number} code_point Next code point read from the stream.
  1332. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1333. */
  1334. this.handler = function(stream, code_point) {
  1335. // 1. If code point is end-of-stream, return finished.
  1336. if (code_point === end_of_stream)
  1337. return finished;
  1338. // 2. If code point is an ASCII code point, return a byte whose
  1339. // value is code point.
  1340. if (isASCIICodePoint(code_point))
  1341. return code_point;
  1342. // 3. Set count and offset based on the range code point is in:
  1343. var count, offset;
  1344. // U+0080 to U+07FF, inclusive:
  1345. if (inRange(code_point, 0x0080, 0x07FF)) {
  1346. // 1 and 0xC0
  1347. count = 1;
  1348. offset = 0xC0;
  1349. }
  1350. // U+0800 to U+FFFF, inclusive:
  1351. else if (inRange(code_point, 0x0800, 0xFFFF)) {
  1352. // 2 and 0xE0
  1353. count = 2;
  1354. offset = 0xE0;
  1355. }
  1356. // U+10000 to U+10FFFF, inclusive:
  1357. else if (inRange(code_point, 0x10000, 0x10FFFF)) {
  1358. // 3 and 0xF0
  1359. count = 3;
  1360. offset = 0xF0;
  1361. }
  1362. // 4. Let bytes be a byte sequence whose first byte is (code
  1363. // point >> (6 × count)) + offset.
  1364. var bytes = [(code_point >> (6 * count)) + offset];
  1365. // 5. Run these substeps while count is greater than 0:
  1366. while (count > 0) {
  1367. // 1. Set temp to code point >> (6 × (count − 1)).
  1368. var temp = code_point >> (6 * (count - 1));
  1369. // 2. Append to bytes 0x80 | (temp & 0x3F).
  1370. bytes.push(0x80 | (temp & 0x3F));
  1371. // 3. Decrease count by one.
  1372. count -= 1;
  1373. }
  1374. // 6. Return bytes bytes, in order.
  1375. return bytes;
  1376. };
  1377. }
  1378. /** @param {{fatal: boolean}} options */
  1379. encoders['UTF-8'] = function(options) {
  1380. return new UTF8Encoder(options);
  1381. };
  1382. /** @param {{fatal: boolean}} options */
  1383. decoders['UTF-8'] = function(options) {
  1384. return new UTF8Decoder(options);
  1385. };
  1386. //
  1387. // 10. Legacy single-byte encodings
  1388. //
  1389. // 10.1 single-byte decoder
  1390. /**
  1391. * @constructor
  1392. * @implements {Decoder}
  1393. * @param {!Array.<number>} index The encoding index.
  1394. * @param {{fatal: boolean}} options
  1395. */
  1396. function SingleByteDecoder(index, options) {
  1397. var fatal = options.fatal;
  1398. /**
  1399. * @param {Stream} stream The stream of bytes being decoded.
  1400. * @param {number} bite The next byte read from the stream.
  1401. * @return {?(number|!Array.<number>)} The next code point(s)
  1402. * decoded, or null if not enough data exists in the input
  1403. * stream to decode a complete code point.
  1404. */
  1405. this.handler = function(stream, bite) {
  1406. // 1. If byte is end-of-stream, return finished.
  1407. if (bite === end_of_stream)
  1408. return finished;
  1409. // 2. If byte is an ASCII byte, return a code point whose value
  1410. // is byte.
  1411. if (isASCIIByte(bite))
  1412. return bite;
  1413. // 3. Let code point be the index code point for byte − 0x80 in
  1414. // index single-byte.
  1415. var code_point = index[bite - 0x80];
  1416. // 4. If code point is null, return error.
  1417. if (code_point === null)
  1418. return decoderError(fatal);
  1419. // 5. Return a code point whose value is code point.
  1420. return code_point;
  1421. };
  1422. }
  1423. // 10.2 single-byte encoder
  1424. /**
  1425. * @constructor
  1426. * @implements {Encoder}
  1427. * @param {!Array.<?number>} index The encoding index.
  1428. * @param {{fatal: boolean}} options
  1429. */
  1430. function SingleByteEncoder(index, options) {
  1431. var fatal = options.fatal;
  1432. /**
  1433. * @param {Stream} stream Input stream.
  1434. * @param {number} code_point Next code point read from the stream.
  1435. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1436. */
  1437. this.handler = function(stream, code_point) {
  1438. // 1. If code point is end-of-stream, return finished.
  1439. if (code_point === end_of_stream)
  1440. return finished;
  1441. // 2. If code point is an ASCII code point, return a byte whose
  1442. // value is code point.
  1443. if (isASCIICodePoint(code_point))
  1444. return code_point;
  1445. // 3. Let pointer be the index pointer for code point in index
  1446. // single-byte.
  1447. var pointer = indexPointerFor(code_point, index);
  1448. // 4. If pointer is null, return error with code point.
  1449. if (pointer === null)
  1450. encoderError(code_point);
  1451. // 5. Return a byte whose value is pointer + 0x80.
  1452. return pointer + 0x80;
  1453. };
  1454. }
  1455. (function() {
  1456. if (!('encoding-indexes' in global))
  1457. return;
  1458. encodings.forEach(function(category) {
  1459. if (category.heading !== 'Legacy single-byte encodings')
  1460. return;
  1461. category.encodings.forEach(function(encoding) {
  1462. var name = encoding.name;
  1463. var idx = index(name.toLowerCase());
  1464. /** @param {{fatal: boolean}} options */
  1465. decoders[name] = function(options) {
  1466. return new SingleByteDecoder(idx, options);
  1467. };
  1468. /** @param {{fatal: boolean}} options */
  1469. encoders[name] = function(options) {
  1470. return new SingleByteEncoder(idx, options);
  1471. };
  1472. });
  1473. });
  1474. }());
  1475. //
  1476. // 11. Legacy multi-byte Chinese (simplified) encodings
  1477. //
  1478. // 11.1 gbk
  1479. // 11.1.1 gbk decoder
  1480. // gbk's decoder is gb18030's decoder.
  1481. /** @param {{fatal: boolean}} options */
  1482. decoders['GBK'] = function(options) {
  1483. return new GB18030Decoder(options);
  1484. };
  1485. // 11.1.2 gbk encoder
  1486. // gbk's encoder is gb18030's encoder with its gbk flag set.
  1487. /** @param {{fatal: boolean}} options */
  1488. encoders['GBK'] = function(options) {
  1489. return new GB18030Encoder(options, true);
  1490. };
  1491. // 11.2 gb18030
  1492. // 11.2.1 gb18030 decoder
  1493. /**
  1494. * @constructor
  1495. * @implements {Decoder}
  1496. * @param {{fatal: boolean}} options
  1497. */
  1498. function GB18030Decoder(options) {
  1499. var fatal = options.fatal;
  1500. // gb18030's decoder has an associated gb18030 first, gb18030
  1501. // second, and gb18030 third (all initially 0x00).
  1502. var /** @type {number} */ gb18030_first = 0x00,
  1503. /** @type {number} */ gb18030_second = 0x00,
  1504. /** @type {number} */ gb18030_third = 0x00;
  1505. /**
  1506. * @param {Stream} stream The stream of bytes being decoded.
  1507. * @param {number} bite The next byte read from the stream.
  1508. * @return {?(number|!Array.<number>)} The next code point(s)
  1509. * decoded, or null if not enough data exists in the input
  1510. * stream to decode a complete code point.
  1511. */
  1512. this.handler = function(stream, bite) {
  1513. // 1. If byte is end-of-stream and gb18030 first, gb18030
  1514. // second, and gb18030 third are 0x00, return finished.
  1515. if (bite === end_of_stream && gb18030_first === 0x00 &&
  1516. gb18030_second === 0x00 && gb18030_third === 0x00) {
  1517. return finished;
  1518. }
  1519. // 2. If byte is end-of-stream, and gb18030 first, gb18030
  1520. // second, or gb18030 third is not 0x00, set gb18030 first,
  1521. // gb18030 second, and gb18030 third to 0x00, and return error.
  1522. if (bite === end_of_stream &&
  1523. (gb18030_first !== 0x00 || gb18030_second !== 0x00 ||
  1524. gb18030_third !== 0x00)) {
  1525. gb18030_first = 0x00;
  1526. gb18030_second = 0x00;
  1527. gb18030_third = 0x00;
  1528. decoderError(fatal);
  1529. }
  1530. var code_point;
  1531. // 3. If gb18030 third is not 0x00, run these substeps:
  1532. if (gb18030_third !== 0x00) {
  1533. // 1. Let code point be null.
  1534. code_point = null;
  1535. // 2. If byte is in the range 0x30 to 0x39, inclusive, set
  1536. // code point to the index gb18030 ranges code point for
  1537. // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
  1538. // 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
  1539. if (inRange(bite, 0x30, 0x39)) {
  1540. code_point = indexGB18030RangesCodePointFor(
  1541. (((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 +
  1542. gb18030_third - 0x81) * 10 + bite - 0x30);
  1543. }
  1544. // 3. Let buffer be a byte sequence consisting of gb18030
  1545. // second, gb18030 third, and byte, in order.
  1546. var buffer = [gb18030_second, gb18030_third, bite];
  1547. // 4. Set gb18030 first, gb18030 second, and gb18030 third to
  1548. // 0x00.
  1549. gb18030_first = 0x00;
  1550. gb18030_second = 0x00;
  1551. gb18030_third = 0x00;
  1552. // 5. If code point is null, prepend buffer to stream and
  1553. // return error.
  1554. if (code_point === null) {
  1555. stream.prepend(buffer);
  1556. return decoderError(fatal);
  1557. }
  1558. // 6. Return a code point whose value is code point.
  1559. return code_point;
  1560. }
  1561. // 4. If gb18030 second is not 0x00, run these substeps:
  1562. if (gb18030_second !== 0x00) {
  1563. // 1. If byte is in the range 0x81 to 0xFE, inclusive, set
  1564. // gb18030 third to byte and return continue.
  1565. if (inRange(bite, 0x81, 0xFE)) {
  1566. gb18030_third = bite;
  1567. return null;
  1568. }
  1569. // 2. Prepend gb18030 second followed by byte to stream, set
  1570. // gb18030 first and gb18030 second to 0x00, and return error.
  1571. stream.prepend([gb18030_second, bite]);
  1572. gb18030_first = 0x00;
  1573. gb18030_second = 0x00;
  1574. return decoderError(fatal);
  1575. }
  1576. // 5. If gb18030 first is not 0x00, run these substeps:
  1577. if (gb18030_first !== 0x00) {
  1578. // 1. If byte is in the range 0x30 to 0x39, inclusive, set
  1579. // gb18030 second to byte and return continue.
  1580. if (inRange(bite, 0x30, 0x39)) {
  1581. gb18030_second = bite;
  1582. return null;
  1583. }
  1584. // 2. Let lead be gb18030 first, let pointer be null, and set
  1585. // gb18030 first to 0x00.
  1586. var lead = gb18030_first;
  1587. var pointer = null;
  1588. gb18030_first = 0x00;
  1589. // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
  1590. // otherwise.
  1591. var offset = bite < 0x7F ? 0x40 : 0x41;
  1592. // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  1593. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
  1594. // (byte − offset).
  1595. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
  1596. pointer = (lead - 0x81) * 190 + (bite - offset);
  1597. // 5. Let code point be null if pointer is null and the index
  1598. // code point for pointer in index gb18030 otherwise.
  1599. code_point = pointer === null ? null :
  1600. indexCodePointFor(pointer, index('gb18030'));
  1601. // 6. If code point is null and byte is an ASCII byte, prepend
  1602. // byte to stream.
  1603. if (code_point === null && isASCIIByte(bite))
  1604. stream.prepend(bite);
  1605. // 7. If code point is null, return error.
  1606. if (code_point === null)
  1607. return decoderError(fatal);
  1608. // 8. Return a code point whose value is code point.
  1609. return code_point;
  1610. }
  1611. // 6. If byte is an ASCII byte, return a code point whose value
  1612. // is byte.
  1613. if (isASCIIByte(bite))
  1614. return bite;
  1615. // 7. If byte is 0x80, return code point U+20AC.
  1616. if (bite === 0x80)
  1617. return 0x20AC;
  1618. // 8. If byte is in the range 0x81 to 0xFE, inclusive, set
  1619. // gb18030 first to byte and return continue.
  1620. if (inRange(bite, 0x81, 0xFE)) {
  1621. gb18030_first = bite;
  1622. return null;
  1623. }
  1624. // 9. Return error.
  1625. return decoderError(fatal);
  1626. };
  1627. }
  1628. // 11.2.2 gb18030 encoder
  1629. /**
  1630. * @constructor
  1631. * @implements {Encoder}
  1632. * @param {{fatal: boolean}} options
  1633. * @param {boolean=} gbk_flag
  1634. */
  1635. function GB18030Encoder(options, gbk_flag) {
  1636. var fatal = options.fatal;
  1637. // gb18030's decoder has an associated gbk flag (initially unset).
  1638. /**
  1639. * @param {Stream} stream Input stream.
  1640. * @param {number} code_point Next code point read from the stream.
  1641. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1642. */
  1643. this.handler = function(stream, code_point) {
  1644. // 1. If code point is end-of-stream, return finished.
  1645. if (code_point === end_of_stream)
  1646. return finished;
  1647. // 2. If code point is an ASCII code point, return a byte whose
  1648. // value is code point.
  1649. if (isASCIICodePoint(code_point))
  1650. return code_point;
  1651. // 3. If code point is U+E5E5, return error with code point.
  1652. if (code_point === 0xE5E5)
  1653. return encoderError(code_point);
  1654. // 4. If the gbk flag is set and code point is U+20AC, return
  1655. // byte 0x80.
  1656. if (gbk_flag && code_point === 0x20AC)
  1657. return 0x80;
  1658. // 5. Let pointer be the index pointer for code point in index
  1659. // gb18030.
  1660. var pointer = indexPointerFor(code_point, index('gb18030'));
  1661. // 6. If pointer is not null, run these substeps:
  1662. if (pointer !== null) {
  1663. // 1. Let lead be floor(pointer / 190) + 0x81.
  1664. var lead = floor(pointer / 190) + 0x81;
  1665. // 2. Let trail be pointer % 190.
  1666. var trail = pointer % 190;
  1667. // 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
  1668. var offset = trail < 0x3F ? 0x40 : 0x41;
  1669. // 4. Return two bytes whose values are lead and trail + offset.
  1670. return [lead, trail + offset];
  1671. }
  1672. // 7. If gbk flag is set, return error with code point.
  1673. if (gbk_flag)
  1674. return encoderError(code_point);
  1675. // 8. Set pointer to the index gb18030 ranges pointer for code
  1676. // point.
  1677. pointer = indexGB18030RangesPointerFor(code_point);
  1678. // 9. Let byte1 be floor(pointer / 10 / 126 / 10).
  1679. var byte1 = floor(pointer / 10 / 126 / 10);
  1680. // 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
  1681. pointer = pointer - byte1 * 10 * 126 * 10;
  1682. // 11. Let byte2 be floor(pointer / 10 / 126).
  1683. var byte2 = floor(pointer / 10 / 126);
  1684. // 12. Set pointer to pointer − byte2 × 10 × 126.
  1685. pointer = pointer - byte2 * 10 * 126;
  1686. // 13. Let byte3 be floor(pointer / 10).
  1687. var byte3 = floor(pointer / 10);
  1688. // 14. Let byte4 be pointer − byte3 × 10.
  1689. var byte4 = pointer - byte3 * 10;
  1690. // 15. Return four bytes whose values are byte1 + 0x81, byte2 +
  1691. // 0x30, byte3 + 0x81, byte4 + 0x30.
  1692. return [byte1 + 0x81,
  1693. byte2 + 0x30,
  1694. byte3 + 0x81,
  1695. byte4 + 0x30];
  1696. };
  1697. }
  1698. /** @param {{fatal: boolean}} options */
  1699. encoders['gb18030'] = function(options) {
  1700. return new GB18030Encoder(options);
  1701. };
  1702. /** @param {{fatal: boolean}} options */
  1703. decoders['gb18030'] = function(options) {
  1704. return new GB18030Decoder(options);
  1705. };
  1706. //
  1707. // 12. Legacy multi-byte Chinese (traditional) encodings
  1708. //
  1709. // 12.1 Big5
  1710. // 12.1.1 Big5 decoder
  1711. /**
  1712. * @constructor
  1713. * @implements {Decoder}
  1714. * @param {{fatal: boolean}} options
  1715. */
  1716. function Big5Decoder(options) {
  1717. var fatal = options.fatal;
  1718. // Big5's decoder has an associated Big5 lead (initially 0x00).
  1719. var /** @type {number} */ Big5_lead = 0x00;
  1720. /**
  1721. * @param {Stream} stream The stream of bytes being decoded.
  1722. * @param {number} bite The next byte read from the stream.
  1723. * @return {?(number|!Array.<number>)} The next code point(s)
  1724. * decoded, or null if not enough data exists in the input
  1725. * stream to decode a complete code point.
  1726. */
  1727. this.handler = function(stream, bite) {
  1728. // 1. If byte is end-of-stream and Big5 lead is not 0x00, set
  1729. // Big5 lead to 0x00 and return error.
  1730. if (bite === end_of_stream && Big5_lead !== 0x00) {
  1731. Big5_lead = 0x00;
  1732. return decoderError(fatal);
  1733. }
  1734. // 2. If byte is end-of-stream and Big5 lead is 0x00, return
  1735. // finished.
  1736. if (bite === end_of_stream && Big5_lead === 0x00)
  1737. return finished;
  1738. // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
  1739. // pointer be null, set Big5 lead to 0x00, and then run these
  1740. // substeps:
  1741. if (Big5_lead !== 0x00) {
  1742. var lead = Big5_lead;
  1743. var pointer = null;
  1744. Big5_lead = 0x00;
  1745. // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
  1746. // otherwise.
  1747. var offset = bite < 0x7F ? 0x40 : 0x62;
  1748. // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
  1749. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
  1750. // (byte − offset).
  1751. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
  1752. pointer = (lead - 0x81) * 157 + (bite - offset);
  1753. // 3. If there is a row in the table below whose first column
  1754. // is pointer, return the two code points listed in its second
  1755. // column
  1756. // Pointer | Code points
  1757. // --------+--------------
  1758. // 1133 | U+00CA U+0304
  1759. // 1135 | U+00CA U+030C
  1760. // 1164 | U+00EA U+0304
  1761. // 1166 | U+00EA U+030C
  1762. switch (pointer) {
  1763. case 1133: return [0x00CA, 0x0304];
  1764. case 1135: return [0x00CA, 0x030C];
  1765. case 1164: return [0x00EA, 0x0304];
  1766. case 1166: return [0x00EA, 0x030C];
  1767. }
  1768. // 4. Let code point be null if pointer is null and the index
  1769. // code point for pointer in index Big5 otherwise.
  1770. var code_point = (pointer === null) ? null :
  1771. indexCodePointFor(pointer, index('big5'));
  1772. // 5. If code point is null and byte is an ASCII byte, prepend
  1773. // byte to stream.
  1774. if (code_point === null && isASCIIByte(bite))
  1775. stream.prepend(bite);
  1776. // 6. If code point is null, return error.
  1777. if (code_point === null)
  1778. return decoderError(fatal);
  1779. // 7. Return a code point whose value is code point.
  1780. return code_point;
  1781. }
  1782. // 4. If byte is an ASCII byte, return a code point whose value
  1783. // is byte.
  1784. if (isASCIIByte(bite))
  1785. return bite;
  1786. // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
  1787. // lead to byte and return continue.
  1788. if (inRange(bite, 0x81, 0xFE)) {
  1789. Big5_lead = bite;
  1790. return null;
  1791. }
  1792. // 6. Return error.
  1793. return decoderError(fatal);
  1794. };
  1795. }
  1796. // 12.1.2 Big5 encoder
  1797. /**
  1798. * @constructor
  1799. * @implements {Encoder}
  1800. * @param {{fatal: boolean}} options
  1801. */
  1802. function Big5Encoder(options) {
  1803. var fatal = options.fatal;
  1804. /**
  1805. * @param {Stream} stream Input stream.
  1806. * @param {number} code_point Next code point read from the stream.
  1807. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1808. */
  1809. this.handler = function(stream, code_point) {
  1810. // 1. If code point is end-of-stream, return finished.
  1811. if (code_point === end_of_stream)
  1812. return finished;
  1813. // 2. If code point is an ASCII code point, return a byte whose
  1814. // value is code point.
  1815. if (isASCIICodePoint(code_point))
  1816. return code_point;
  1817. // 3. Let pointer be the index Big5 pointer for code point.
  1818. var pointer = indexBig5PointerFor(code_point);
  1819. // 4. If pointer is null, return error with code point.
  1820. if (pointer === null)
  1821. return encoderError(code_point);
  1822. // 5. Let lead be floor(pointer / 157) + 0x81.
  1823. var lead = floor(pointer / 157) + 0x81;
  1824. // 6. If lead is less than 0xA1, return error with code point.
  1825. if (lead < 0xA1)
  1826. return encoderError(code_point);
  1827. // 7. Let trail be pointer % 157.
  1828. var trail = pointer % 157;
  1829. // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
  1830. // otherwise.
  1831. var offset = trail < 0x3F ? 0x40 : 0x62;
  1832. // Return two bytes whose values are lead and trail + offset.
  1833. return [lead, trail + offset];
  1834. };
  1835. }
  1836. /** @param {{fatal: boolean}} options */
  1837. encoders['Big5'] = function(options) {
  1838. return new Big5Encoder(options);
  1839. };
  1840. /** @param {{fatal: boolean}} options */
  1841. decoders['Big5'] = function(options) {
  1842. return new Big5Decoder(options);
  1843. };
  1844. //
  1845. // 13. Legacy multi-byte Japanese encodings
  1846. //
  1847. // 13.1 euc-jp
  1848. // 13.1.1 euc-jp decoder
  1849. /**
  1850. * @constructor
  1851. * @implements {Decoder}
  1852. * @param {{fatal: boolean}} options
  1853. */
  1854. function EUCJPDecoder(options) {
  1855. var fatal = options.fatal;
  1856. // euc-jp's decoder has an associated euc-jp jis0212 flag
  1857. // (initially unset) and euc-jp lead (initially 0x00).
  1858. var /** @type {boolean} */ eucjp_jis0212_flag = false,
  1859. /** @type {number} */ eucjp_lead = 0x00;
  1860. /**
  1861. * @param {Stream} stream The stream of bytes being decoded.
  1862. * @param {number} bite The next byte read from the stream.
  1863. * @return {?(number|!Array.<number>)} The next code point(s)
  1864. * decoded, or null if not enough data exists in the input
  1865. * stream to decode a complete code point.
  1866. */
  1867. this.handler = function(stream, bite) {
  1868. // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
  1869. // euc-jp lead to 0x00, and return error.
  1870. if (bite === end_of_stream && eucjp_lead !== 0x00) {
  1871. eucjp_lead = 0x00;
  1872. return decoderError(fatal);
  1873. }
  1874. // 2. If byte is end-of-stream and euc-jp lead is 0x00, return
  1875. // finished.
  1876. if (bite === end_of_stream && eucjp_lead === 0x00)
  1877. return finished;
  1878. // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
  1879. // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
  1880. // point whose value is 0xFF61 − 0xA1 + byte.
  1881. if (eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
  1882. eucjp_lead = 0x00;
  1883. return 0xFF61 - 0xA1 + bite;
  1884. }
  1885. // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
  1886. // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
  1887. // to byte, and return continue.
  1888. if (eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
  1889. eucjp_jis0212_flag = true;
  1890. eucjp_lead = bite;
  1891. return null;
  1892. }
  1893. // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
  1894. // euc-jp lead to 0x00, and run these substeps:
  1895. if (eucjp_lead !== 0x00) {
  1896. var lead = eucjp_lead;
  1897. eucjp_lead = 0x00;
  1898. // 1. Let code point be null.
  1899. var code_point = null;
  1900. // 2. If lead and byte are both in the range 0xA1 to 0xFE,
  1901. // inclusive, set code point to the index code point for (lead
  1902. // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
  1903. // jis0212 flag is unset and in index jis0212 otherwise.
  1904. if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
  1905. code_point = indexCodePointFor(
  1906. (lead - 0xA1) * 94 + (bite - 0xA1),
  1907. index(!eucjp_jis0212_flag ? 'jis0208' : 'jis0212'));
  1908. }
  1909. // 3. Unset the euc-jp jis0212 flag.
  1910. eucjp_jis0212_flag = false;
  1911. // 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
  1912. // prepend byte to stream.
  1913. if (!inRange(bite, 0xA1, 0xFE))
  1914. stream.prepend(bite);
  1915. // 5. If code point is null, return error.
  1916. if (code_point === null)
  1917. return decoderError(fatal);
  1918. // 6. Return a code point whose value is code point.
  1919. return code_point;
  1920. }
  1921. // 6. If byte is an ASCII byte, return a code point whose value
  1922. // is byte.
  1923. if (isASCIIByte(bite))
  1924. return bite;
  1925. // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
  1926. // inclusive, set euc-jp lead to byte and return continue.
  1927. if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
  1928. eucjp_lead = bite;
  1929. return null;
  1930. }
  1931. // 8. Return error.
  1932. return decoderError(fatal);
  1933. };
  1934. }
  1935. // 13.1.2 euc-jp encoder
  1936. /**
  1937. * @constructor
  1938. * @implements {Encoder}
  1939. * @param {{fatal: boolean}} options
  1940. */
  1941. function EUCJPEncoder(options) {
  1942. var fatal = options.fatal;
  1943. /**
  1944. * @param {Stream} stream Input stream.
  1945. * @param {number} code_point Next code point read from the stream.
  1946. * @return {(number|!Array.<number>)} Byte(s) to emit.
  1947. */
  1948. this.handler = function(stream, code_point) {
  1949. // 1. If code point is end-of-stream, return finished.
  1950. if (code_point === end_of_stream)
  1951. return finished;
  1952. // 2. If code point is an ASCII code point, return a byte whose
  1953. // value is code point.
  1954. if (isASCIICodePoint(code_point))
  1955. return code_point;
  1956. // 3. If code point is U+00A5, return byte 0x5C.
  1957. if (code_point === 0x00A5)
  1958. return 0x5C;
  1959. // 4. If code point is U+203E, return byte 0x7E.
  1960. if (code_point === 0x203E)
  1961. return 0x7E;
  1962. // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
  1963. // return two bytes whose values are 0x8E and code point −
  1964. // 0xFF61 + 0xA1.
  1965. if (inRange(code_point, 0xFF61, 0xFF9F))
  1966. return [0x8E, code_point - 0xFF61 + 0xA1];
  1967. // 6. If code point is U+2212, set it to U+FF0D.
  1968. if (code_point === 0x2212)
  1969. code_point = 0xFF0D;
  1970. // 7. Let pointer be the index pointer for code point in index
  1971. // jis0208.
  1972. var pointer = indexPointerFor(code_point, index('jis0208'));
  1973. // 8. If pointer is null, return error with code point.
  1974. if (pointer === null)
  1975. return encoderError(code_point);
  1976. // 9. Let lead be floor(pointer / 94) + 0xA1.
  1977. var lead = floor(pointer / 94) + 0xA1;
  1978. // 10. Let trail be pointer % 94 + 0xA1.
  1979. var trail = pointer % 94 + 0xA1;
  1980. // 11. Return two bytes whose values are lead and trail.
  1981. return [lead, trail];
  1982. };
  1983. }
  1984. /** @param {{fatal: boolean}} options */
  1985. encoders['EUC-JP'] = function(options) {
  1986. return new EUCJPEncoder(options);
  1987. };
  1988. /** @param {{fatal: boolean}} options */
  1989. decoders['EUC-JP'] = function(options) {
  1990. return new EUCJPDecoder(options);
  1991. };
  1992. // 13.2 iso-2022-jp
  1993. // 13.2.1 iso-2022-jp decoder
  1994. /**
  1995. * @constructor
  1996. * @implements {Decoder}
  1997. * @param {{fatal: boolean}} options
  1998. */
  1999. function ISO2022JPDecoder(options) {
  2000. var fatal = options.fatal;
  2001. /** @enum */
  2002. var states = {
  2003. ASCII: 0,
  2004. Roman: 1,
  2005. Katakana: 2,
  2006. LeadByte: 3,
  2007. TrailByte: 4,
  2008. EscapeStart: 5,
  2009. Escape: 6
  2010. };
  2011. // iso-2022-jp's decoder has an associated iso-2022-jp decoder
  2012. // state (initially ASCII), iso-2022-jp decoder output state
  2013. // (initially ASCII), iso-2022-jp lead (initially 0x00), and
  2014. // iso-2022-jp output flag (initially unset).
  2015. var /** @type {number} */ iso2022jp_decoder_state = states.ASCII,
  2016. /** @type {number} */ iso2022jp_decoder_output_state = states.ASCII,
  2017. /** @type {number} */ iso2022jp_lead = 0x00,
  2018. /** @type {boolean} */ iso2022jp_output_flag = false;
  2019. /**
  2020. * @param {Stream} stream The stream of bytes being decoded.
  2021. * @param {number} bite The next byte read from the stream.
  2022. * @return {?(number|!Array.<number>)} The next code point(s)
  2023. * decoded, or null if not enough data exists in the input
  2024. * stream to decode a complete code point.
  2025. */
  2026. this.handler = function(stream, bite) {
  2027. // switching on iso-2022-jp decoder state:
  2028. switch (iso2022jp_decoder_state) {
  2029. default:
  2030. case states.ASCII:
  2031. // ASCII
  2032. // Based on byte:
  2033. // 0x1B
  2034. if (bite === 0x1B) {
  2035. // Set iso-2022-jp decoder state to escape start and return
  2036. // continue.
  2037. iso2022jp_decoder_state = states.EscapeStart;
  2038. return null;
  2039. }
  2040. // 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
  2041. if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
  2042. && bite !== 0x0F && bite !== 0x1B) {
  2043. // Unset the iso-2022-jp output flag and return a code point
  2044. // whose value is byte.
  2045. iso2022jp_output_flag = false;
  2046. return bite;
  2047. }
  2048. // end-of-stream
  2049. if (bite === end_of_stream) {
  2050. // Return finished.
  2051. return finished;
  2052. }
  2053. // Otherwise
  2054. // Unset the iso-2022-jp output flag and return error.
  2055. iso2022jp_output_flag = false;
  2056. return decoderError(fatal);
  2057. case states.Roman:
  2058. // Roman
  2059. // Based on byte:
  2060. // 0x1B
  2061. if (bite === 0x1B) {
  2062. // Set iso-2022-jp decoder state to escape start and return
  2063. // continue.
  2064. iso2022jp_decoder_state = states.EscapeStart;
  2065. return null;
  2066. }
  2067. // 0x5C
  2068. if (bite === 0x5C) {
  2069. // Unset the iso-2022-jp output flag and return code point
  2070. // U+00A5.
  2071. iso2022jp_output_flag = false;
  2072. return 0x00A5;
  2073. }
  2074. // 0x7E
  2075. if (bite === 0x7E) {
  2076. // Unset the iso-2022-jp output flag and return code point
  2077. // U+203E.
  2078. iso2022jp_output_flag = false;
  2079. return 0x203E;
  2080. }
  2081. // 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
  2082. if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
  2083. && bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
  2084. // Unset the iso-2022-jp output flag and return a code point
  2085. // whose value is byte.
  2086. iso2022jp_output_flag = false;
  2087. return bite;
  2088. }
  2089. // end-of-stream
  2090. if (bite === end_of_stream) {
  2091. // Return finished.
  2092. return finished;
  2093. }
  2094. // Otherwise
  2095. // Unset the iso-2022-jp output flag and return error.
  2096. iso2022jp_output_flag = false;
  2097. return decoderError(fatal);
  2098. case states.Katakana:
  2099. // Katakana
  2100. // Based on byte:
  2101. // 0x1B
  2102. if (bite === 0x1B) {
  2103. // Set iso-2022-jp decoder state to escape start and return
  2104. // continue.
  2105. iso2022jp_decoder_state = states.EscapeStart;
  2106. return null;
  2107. }
  2108. // 0x21 to 0x5F
  2109. if (inRange(bite, 0x21, 0x5F)) {
  2110. // Unset the iso-2022-jp output flag and return a code point
  2111. // whose value is 0xFF61 − 0x21 + byte.
  2112. iso2022jp_output_flag = false;
  2113. return 0xFF61 - 0x21 + bite;
  2114. }
  2115. // end-of-stream
  2116. if (bite === end_of_stream) {
  2117. // Return finished.
  2118. return finished;
  2119. }
  2120. // Otherwise
  2121. // Unset the iso-2022-jp output flag and return error.
  2122. iso2022jp_output_flag = false;
  2123. return decoderError(fatal);
  2124. case states.LeadByte:
  2125. // Lead byte
  2126. // Based on byte:
  2127. // 0x1B
  2128. if (bite === 0x1B) {
  2129. // Set iso-2022-jp decoder state to escape start and return
  2130. // continue.
  2131. iso2022jp_decoder_state = states.EscapeStart;
  2132. return null;
  2133. }
  2134. // 0x21 to 0x7E
  2135. if (inRange(bite, 0x21, 0x7E)) {
  2136. // Unset the iso-2022-jp output flag, set iso-2022-jp lead
  2137. // to byte, iso-2022-jp decoder state to trail byte, and
  2138. // return continue.
  2139. iso2022jp_output_flag = false;
  2140. iso2022jp_lead = bite;
  2141. iso2022jp_decoder_state = states.TrailByte;
  2142. return null;
  2143. }
  2144. // end-of-stream
  2145. if (bite === end_of_stream) {
  2146. // Return finished.
  2147. return finished;
  2148. }
  2149. // Otherwise
  2150. // Unset the iso-2022-jp output flag and return error.
  2151. iso2022jp_output_flag = false;
  2152. return decoderError(fatal);
  2153. case states.TrailByte:
  2154. // Trail byte
  2155. // Based on byte:
  2156. // 0x1B
  2157. if (bite === 0x1B) {
  2158. // Set iso-2022-jp decoder state to escape start and return
  2159. // continue.
  2160. iso2022jp_decoder_state = states.EscapeStart;
  2161. return decoderError(fatal);
  2162. }
  2163. // 0x21 to 0x7E
  2164. if (inRange(bite, 0x21, 0x7E)) {
  2165. // 1. Set the iso-2022-jp decoder state to lead byte.
  2166. iso2022jp_decoder_state = states.LeadByte;
  2167. // 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
  2168. var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
  2169. // 3. Let code point be the index code point for pointer in
  2170. // index jis0208.
  2171. var code_point = indexCodePointFor(pointer, index('jis0208'));
  2172. // 4. If code point is null, return error.
  2173. if (code_point === null)
  2174. return decoderError(fatal);
  2175. // 5. Return a code point whose value is code point.
  2176. return code_point;
  2177. }
  2178. // end-of-stream
  2179. if (bite === end_of_stream) {
  2180. // Set the iso-2022-jp decoder state to lead byte, prepend
  2181. // byte to stream, and return error.
  2182. iso2022jp_decoder_state = states.LeadByte;
  2183. stream.prepend(bite);
  2184. return decoderError(fatal);
  2185. }
  2186. // Otherwise
  2187. // Set iso-2022-jp decoder state to lead byte and return
  2188. // error.
  2189. iso2022jp_decoder_state = states.LeadByte;
  2190. return decoderError(fatal);
  2191. case states.EscapeStart:
  2192. // Escape start
  2193. // 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
  2194. // byte, iso-2022-jp decoder state to escape, and return
  2195. // continue.
  2196. if (bite === 0x24 || bite === 0x28) {
  2197. iso2022jp_lead = bite;
  2198. iso2022jp_decoder_state = states.Escape;
  2199. return null;
  2200. }
  2201. // 2. Prepend byte to stream.
  2202. stream.prepend(bite);
  2203. // 3. Unset the iso-2022-jp output flag, set iso-2022-jp
  2204. // decoder state to iso-2022-jp decoder output state, and
  2205. // return error.
  2206. iso2022jp_output_flag = false;
  2207. iso2022jp_decoder_state = iso2022jp_decoder_output_state;
  2208. return decoderError(fatal);
  2209. case states.Escape:
  2210. // Escape
  2211. // 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
  2212. // 0x00.
  2213. var lead = iso2022jp_lead;
  2214. iso2022jp_lead = 0x00;
  2215. // 2. Let state be null.
  2216. var state = null;
  2217. // 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
  2218. if (lead === 0x28 && bite === 0x42)
  2219. state = states.ASCII;
  2220. // 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
  2221. if (lead === 0x28 && bite === 0x4A)
  2222. state = states.Roman;
  2223. // 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
  2224. if (lead === 0x28 && bite === 0x49)
  2225. state = states.Katakana;
  2226. // 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
  2227. // state to lead byte.
  2228. if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
  2229. state = states.LeadByte;
  2230. // 7. If state is non-null, run these substeps:
  2231. if (state !== null) {
  2232. // 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
  2233. // output state to states.
  2234. iso2022jp_decoder_state = iso2022jp_decoder_state = state;
  2235. // 2. Let output flag be the iso-2022-jp output flag.
  2236. var output_flag = iso2022jp_output_flag;
  2237. // 3. Set the iso-2022-jp output flag.
  2238. iso2022jp_output_flag = true;
  2239. // 4. Return continue, if output flag is unset, and error
  2240. // otherwise.
  2241. return !output_flag ? null : decoderError(fatal);
  2242. }
  2243. // 8. Prepend lead and byte to stream.
  2244. stream.prepend([lead, bite]);
  2245. // 9. Unset the iso-2022-jp output flag, set iso-2022-jp
  2246. // decoder state to iso-2022-jp decoder output state and
  2247. // return error.
  2248. iso2022jp_output_flag = false;
  2249. iso2022jp_decoder_state = iso2022jp_decoder_output_state;
  2250. return decoderError(fatal);
  2251. }
  2252. };
  2253. }
  2254. // 13.2.2 iso-2022-jp encoder
  2255. /**
  2256. * @constructor
  2257. * @implements {Encoder}
  2258. * @param {{fatal: boolean}} options
  2259. */
  2260. function ISO2022JPEncoder(options) {
  2261. var fatal = options.fatal;
  2262. // iso-2022-jp's encoder has an associated iso-2022-jp encoder
  2263. // state which is one of ASCII, Roman, and jis0208 (initially
  2264. // ASCII).
  2265. /** @enum */
  2266. var states = {
  2267. ASCII: 0,
  2268. Roman: 1,
  2269. jis0208: 2
  2270. };
  2271. var /** @type {number} */ iso2022jp_state = states.ASCII;
  2272. /**
  2273. * @param {Stream} stream Input stream.
  2274. * @param {number} code_point Next code point read from the stream.
  2275. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2276. */
  2277. this.handler = function(stream, code_point) {
  2278. // 1. If code point is end-of-stream and iso-2022-jp encoder
  2279. // state is not ASCII, prepend code point to stream, set
  2280. // iso-2022-jp encoder state to ASCII, and return three bytes
  2281. // 0x1B 0x28 0x42.
  2282. if (code_point === end_of_stream &&
  2283. iso2022jp_state !== states.ASCII) {
  2284. stream.prepend(code_point);
  2285. iso2022jp_state = states.ASCII;
  2286. return [0x1B, 0x28, 0x42];
  2287. }
  2288. // 2. If code point is end-of-stream and iso-2022-jp encoder
  2289. // state is ASCII, return finished.
  2290. if (code_point === end_of_stream && iso2022jp_state === states.ASCII)
  2291. return finished;
  2292. // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
  2293. // point is U+000E, U+000F, or U+001B, return error with U+FFFD.
  2294. if ((iso2022jp_state === states.ASCII ||
  2295. iso2022jp_state === states.Roman) &&
  2296. (code_point === 0x000E || code_point === 0x000F ||
  2297. code_point === 0x001B)) {
  2298. return encoderError(0xFFFD);
  2299. }
  2300. // 4. If iso-2022-jp encoder state is ASCII and code point is an
  2301. // ASCII code point, return a byte whose value is code point.
  2302. if (iso2022jp_state === states.ASCII &&
  2303. isASCIICodePoint(code_point))
  2304. return code_point;
  2305. // 5. If iso-2022-jp encoder state is Roman and code point is an
  2306. // ASCII code point, excluding U+005C and U+007E, or is U+00A5
  2307. // or U+203E, run these substeps:
  2308. if (iso2022jp_state === states.Roman &&
  2309. ((isASCIICodePoint(code_point) &&
  2310. code_point !== 0x005C && code_point !== 0x007E) ||
  2311. (code_point == 0x00A5 || code_point == 0x203E))) {
  2312. // 1. If code point is an ASCII code point, return a byte
  2313. // whose value is code point.
  2314. if (isASCIICodePoint(code_point))
  2315. return code_point;
  2316. // 2. If code point is U+00A5, return byte 0x5C.
  2317. if (code_point === 0x00A5)
  2318. return 0x5C;
  2319. // 3. If code point is U+203E, return byte 0x7E.
  2320. if (code_point === 0x203E)
  2321. return 0x7E;
  2322. }
  2323. // 6. If code point is an ASCII code point, and iso-2022-jp
  2324. // encoder state is not ASCII, prepend code point to stream, set
  2325. // iso-2022-jp encoder state to ASCII, and return three bytes
  2326. // 0x1B 0x28 0x42.
  2327. if (isASCIICodePoint(code_point) &&
  2328. iso2022jp_state !== states.ASCII) {
  2329. stream.prepend(code_point);
  2330. iso2022jp_state = states.ASCII;
  2331. return [0x1B, 0x28, 0x42];
  2332. }
  2333. // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
  2334. // encoder state is not Roman, prepend code point to stream, set
  2335. // iso-2022-jp encoder state to Roman, and return three bytes
  2336. // 0x1B 0x28 0x4A.
  2337. if ((code_point === 0x00A5 || code_point === 0x203E) &&
  2338. iso2022jp_state !== states.Roman) {
  2339. stream.prepend(code_point);
  2340. iso2022jp_state = states.Roman;
  2341. return [0x1B, 0x28, 0x4A];
  2342. }
  2343. // 8. If code point is U+2212, set it to U+FF0D.
  2344. if (code_point === 0x2212)
  2345. code_point = 0xFF0D;
  2346. // 9. Let pointer be the index pointer for code point in index
  2347. // jis0208.
  2348. var pointer = indexPointerFor(code_point, index('jis0208'));
  2349. // 10. If pointer is null, return error with code point.
  2350. if (pointer === null)
  2351. return encoderError(code_point);
  2352. // 11. If iso-2022-jp encoder state is not jis0208, prepend code
  2353. // point to stream, set iso-2022-jp encoder state to jis0208,
  2354. // and return three bytes 0x1B 0x24 0x42.
  2355. if (iso2022jp_state !== states.jis0208) {
  2356. stream.prepend(code_point);
  2357. iso2022jp_state = states.jis0208;
  2358. return [0x1B, 0x24, 0x42];
  2359. }
  2360. // 12. Let lead be floor(pointer / 94) + 0x21.
  2361. var lead = floor(pointer / 94) + 0x21;
  2362. // 13. Let trail be pointer % 94 + 0x21.
  2363. var trail = pointer % 94 + 0x21;
  2364. // 14. Return two bytes whose values are lead and trail.
  2365. return [lead, trail];
  2366. };
  2367. }
  2368. /** @param {{fatal: boolean}} options */
  2369. encoders['ISO-2022-JP'] = function(options) {
  2370. return new ISO2022JPEncoder(options);
  2371. };
  2372. /** @param {{fatal: boolean}} options */
  2373. decoders['ISO-2022-JP'] = function(options) {
  2374. return new ISO2022JPDecoder(options);
  2375. };
  2376. // 13.3 Shift_JIS
  2377. // 13.3.1 Shift_JIS decoder
  2378. /**
  2379. * @constructor
  2380. * @implements {Decoder}
  2381. * @param {{fatal: boolean}} options
  2382. */
  2383. function ShiftJISDecoder(options) {
  2384. var fatal = options.fatal;
  2385. // Shift_JIS's decoder has an associated Shift_JIS lead (initially
  2386. // 0x00).
  2387. var /** @type {number} */ Shift_JIS_lead = 0x00;
  2388. /**
  2389. * @param {Stream} stream The stream of bytes being decoded.
  2390. * @param {number} bite The next byte read from the stream.
  2391. * @return {?(number|!Array.<number>)} The next code point(s)
  2392. * decoded, or null if not enough data exists in the input
  2393. * stream to decode a complete code point.
  2394. */
  2395. this.handler = function(stream, bite) {
  2396. // 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
  2397. // set Shift_JIS lead to 0x00 and return error.
  2398. if (bite === end_of_stream && Shift_JIS_lead !== 0x00) {
  2399. Shift_JIS_lead = 0x00;
  2400. return decoderError(fatal);
  2401. }
  2402. // 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
  2403. // return finished.
  2404. if (bite === end_of_stream && Shift_JIS_lead === 0x00)
  2405. return finished;
  2406. // 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
  2407. // let pointer be null, set Shift_JIS lead to 0x00, and then run
  2408. // these substeps:
  2409. if (Shift_JIS_lead !== 0x00) {
  2410. var lead = Shift_JIS_lead;
  2411. var pointer = null;
  2412. Shift_JIS_lead = 0x00;
  2413. // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
  2414. // otherwise.
  2415. var offset = (bite < 0x7F) ? 0x40 : 0x41;
  2416. // 2. Let lead offset be 0x81, if lead is less than 0xA0, and
  2417. // 0xC1 otherwise.
  2418. var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
  2419. // 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  2420. // to 0xFC, inclusive, set pointer to (lead − lead offset) ×
  2421. // 188 + byte − offset.
  2422. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC))
  2423. pointer = (lead - lead_offset) * 188 + bite - offset;
  2424. // 4. If pointer is in the range 8836 to 10715, inclusive,
  2425. // return a code point whose value is 0xE000 − 8836 + pointer.
  2426. if (inRange(pointer, 8836, 10715))
  2427. return 0xE000 - 8836 + pointer;
  2428. // 5. Let code point be null, if pointer is null, and the
  2429. // index code point for pointer in index jis0208 otherwise.
  2430. var code_point = (pointer === null) ? null :
  2431. indexCodePointFor(pointer, index('jis0208'));
  2432. // 6. If code point is null and byte is an ASCII byte, prepend
  2433. // byte to stream.
  2434. if (code_point === null && isASCIIByte(bite))
  2435. stream.prepend(bite);
  2436. // 7. If code point is null, return error.
  2437. if (code_point === null)
  2438. return decoderError(fatal);
  2439. // 8. Return a code point whose value is code point.
  2440. return code_point;
  2441. }
  2442. // 4. If byte is an ASCII byte or 0x80, return a code point
  2443. // whose value is byte.
  2444. if (isASCIIByte(bite) || bite === 0x80)
  2445. return bite;
  2446. // 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
  2447. // code point whose value is 0xFF61 − 0xA1 + byte.
  2448. if (inRange(bite, 0xA1, 0xDF))
  2449. return 0xFF61 - 0xA1 + bite;
  2450. // 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
  2451. // to 0xFC, inclusive, set Shift_JIS lead to byte and return
  2452. // continue.
  2453. if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
  2454. Shift_JIS_lead = bite;
  2455. return null;
  2456. }
  2457. // 7. Return error.
  2458. return decoderError(fatal);
  2459. };
  2460. }
  2461. // 13.3.2 Shift_JIS encoder
  2462. /**
  2463. * @constructor
  2464. * @implements {Encoder}
  2465. * @param {{fatal: boolean}} options
  2466. */
  2467. function ShiftJISEncoder(options) {
  2468. var fatal = options.fatal;
  2469. /**
  2470. * @param {Stream} stream Input stream.
  2471. * @param {number} code_point Next code point read from the stream.
  2472. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2473. */
  2474. this.handler = function(stream, code_point) {
  2475. // 1. If code point is end-of-stream, return finished.
  2476. if (code_point === end_of_stream)
  2477. return finished;
  2478. // 2. If code point is an ASCII code point or U+0080, return a
  2479. // byte whose value is code point.
  2480. if (isASCIICodePoint(code_point) || code_point === 0x0080)
  2481. return code_point;
  2482. // 3. If code point is U+00A5, return byte 0x5C.
  2483. if (code_point === 0x00A5)
  2484. return 0x5C;
  2485. // 4. If code point is U+203E, return byte 0x7E.
  2486. if (code_point === 0x203E)
  2487. return 0x7E;
  2488. // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
  2489. // return a byte whose value is code point − 0xFF61 + 0xA1.
  2490. if (inRange(code_point, 0xFF61, 0xFF9F))
  2491. return code_point - 0xFF61 + 0xA1;
  2492. // 6. If code point is U+2212, set it to U+FF0D.
  2493. if (code_point === 0x2212)
  2494. code_point = 0xFF0D;
  2495. // 7. Let pointer be the index Shift_JIS pointer for code point.
  2496. var pointer = indexShiftJISPointerFor(code_point);
  2497. // 8. If pointer is null, return error with code point.
  2498. if (pointer === null)
  2499. return encoderError(code_point);
  2500. // 9. Let lead be floor(pointer / 188).
  2501. var lead = floor(pointer / 188);
  2502. // 10. Let lead offset be 0x81, if lead is less than 0x1F, and
  2503. // 0xC1 otherwise.
  2504. var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1;
  2505. // 11. Let trail be pointer % 188.
  2506. var trail = pointer % 188;
  2507. // 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41
  2508. // otherwise.
  2509. var offset = (trail < 0x3F) ? 0x40 : 0x41;
  2510. // 13. Return two bytes whose values are lead + lead offset and
  2511. // trail + offset.
  2512. return [lead + lead_offset, trail + offset];
  2513. };
  2514. }
  2515. /** @param {{fatal: boolean}} options */
  2516. encoders['Shift_JIS'] = function(options) {
  2517. return new ShiftJISEncoder(options);
  2518. };
  2519. /** @param {{fatal: boolean}} options */
  2520. decoders['Shift_JIS'] = function(options) {
  2521. return new ShiftJISDecoder(options);
  2522. };
  2523. //
  2524. // 14. Legacy multi-byte Korean encodings
  2525. //
  2526. // 14.1 euc-kr
  2527. // 14.1.1 euc-kr decoder
  2528. /**
  2529. * @constructor
  2530. * @implements {Decoder}
  2531. * @param {{fatal: boolean}} options
  2532. */
  2533. function EUCKRDecoder(options) {
  2534. var fatal = options.fatal;
  2535. // euc-kr's decoder has an associated euc-kr lead (initially 0x00).
  2536. var /** @type {number} */ euckr_lead = 0x00;
  2537. /**
  2538. * @param {Stream} stream The stream of bytes being decoded.
  2539. * @param {number} bite The next byte read from the stream.
  2540. * @return {?(number|!Array.<number>)} The next code point(s)
  2541. * decoded, or null if not enough data exists in the input
  2542. * stream to decode a complete code point.
  2543. */
  2544. this.handler = function(stream, bite) {
  2545. // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
  2546. // euc-kr lead to 0x00 and return error.
  2547. if (bite === end_of_stream && euckr_lead !== 0) {
  2548. euckr_lead = 0x00;
  2549. return decoderError(fatal);
  2550. }
  2551. // 2. If byte is end-of-stream and euc-kr lead is 0x00, return
  2552. // finished.
  2553. if (bite === end_of_stream && euckr_lead === 0)
  2554. return finished;
  2555. // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
  2556. // pointer be null, set euc-kr lead to 0x00, and then run these
  2557. // substeps:
  2558. if (euckr_lead !== 0x00) {
  2559. var lead = euckr_lead;
  2560. var pointer = null;
  2561. euckr_lead = 0x00;
  2562. // 1. If byte is in the range 0x41 to 0xFE, inclusive, set
  2563. // pointer to (lead − 0x81) × 190 + (byte − 0x41).
  2564. if (inRange(bite, 0x41, 0xFE))
  2565. pointer = (lead - 0x81) * 190 + (bite - 0x41);
  2566. // 2. Let code point be null, if pointer is null, and the
  2567. // index code point for pointer in index euc-kr otherwise.
  2568. var code_point = (pointer === null)
  2569. ? null : indexCodePointFor(pointer, index('euc-kr'));
  2570. // 3. If code point is null and byte is an ASCII byte, prepend
  2571. // byte to stream.
  2572. if (pointer === null && isASCIIByte(bite))
  2573. stream.prepend(bite);
  2574. // 4. If code point is null, return error.
  2575. if (code_point === null)
  2576. return decoderError(fatal);
  2577. // 5. Return a code point whose value is code point.
  2578. return code_point;
  2579. }
  2580. // 4. If byte is an ASCII byte, return a code point whose value
  2581. // is byte.
  2582. if (isASCIIByte(bite))
  2583. return bite;
  2584. // 5. If byte is in the range 0x81 to 0xFE, inclusive, set
  2585. // euc-kr lead to byte and return continue.
  2586. if (inRange(bite, 0x81, 0xFE)) {
  2587. euckr_lead = bite;
  2588. return null;
  2589. }
  2590. // 6. Return error.
  2591. return decoderError(fatal);
  2592. };
  2593. }
  2594. // 14.1.2 euc-kr encoder
  2595. /**
  2596. * @constructor
  2597. * @implements {Encoder}
  2598. * @param {{fatal: boolean}} options
  2599. */
  2600. function EUCKREncoder(options) {
  2601. var fatal = options.fatal;
  2602. /**
  2603. * @param {Stream} stream Input stream.
  2604. * @param {number} code_point Next code point read from the stream.
  2605. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2606. */
  2607. this.handler = function(stream, code_point) {
  2608. // 1. If code point is end-of-stream, return finished.
  2609. if (code_point === end_of_stream)
  2610. return finished;
  2611. // 2. If code point is an ASCII code point, return a byte whose
  2612. // value is code point.
  2613. if (isASCIICodePoint(code_point))
  2614. return code_point;
  2615. // 3. Let pointer be the index pointer for code point in index
  2616. // euc-kr.
  2617. var pointer = indexPointerFor(code_point, index('euc-kr'));
  2618. // 4. If pointer is null, return error with code point.
  2619. if (pointer === null)
  2620. return encoderError(code_point);
  2621. // 5. Let lead be floor(pointer / 190) + 0x81.
  2622. var lead = floor(pointer / 190) + 0x81;
  2623. // 6. Let trail be pointer % 190 + 0x41.
  2624. var trail = (pointer % 190) + 0x41;
  2625. // 7. Return two bytes whose values are lead and trail.
  2626. return [lead, trail];
  2627. };
  2628. }
  2629. /** @param {{fatal: boolean}} options */
  2630. encoders['EUC-KR'] = function(options) {
  2631. return new EUCKREncoder(options);
  2632. };
  2633. /** @param {{fatal: boolean}} options */
  2634. decoders['EUC-KR'] = function(options) {
  2635. return new EUCKRDecoder(options);
  2636. };
  2637. //
  2638. // 15. Legacy miscellaneous encodings
  2639. //
  2640. // 15.1 replacement
  2641. // Not needed - API throws RangeError
  2642. // 15.2 Common infrastructure for utf-16be and utf-16le
  2643. /**
  2644. * @param {number} code_unit
  2645. * @param {boolean} utf16be
  2646. * @return {!Array.<number>} bytes
  2647. */
  2648. function convertCodeUnitToBytes(code_unit, utf16be) {
  2649. // 1. Let byte1 be code unit >> 8.
  2650. var byte1 = code_unit >> 8;
  2651. // 2. Let byte2 be code unit & 0x00FF.
  2652. var byte2 = code_unit & 0x00FF;
  2653. // 3. Then return the bytes in order:
  2654. // utf-16be flag is set: byte1, then byte2.
  2655. if (utf16be)
  2656. return [byte1, byte2];
  2657. // utf-16be flag is unset: byte2, then byte1.
  2658. return [byte2, byte1];
  2659. }
  2660. // 15.2.1 shared utf-16 decoder
  2661. /**
  2662. * @constructor
  2663. * @implements {Decoder}
  2664. * @param {boolean} utf16_be True if big-endian, false if little-endian.
  2665. * @param {{fatal: boolean}} options
  2666. */
  2667. function UTF16Decoder(utf16_be, options) {
  2668. var fatal = options.fatal;
  2669. var /** @type {?number} */ utf16_lead_byte = null,
  2670. /** @type {?number} */ utf16_lead_surrogate = null;
  2671. /**
  2672. * @param {Stream} stream The stream of bytes being decoded.
  2673. * @param {number} bite The next byte read from the stream.
  2674. * @return {?(number|!Array.<number>)} The next code point(s)
  2675. * decoded, or null if not enough data exists in the input
  2676. * stream to decode a complete code point.
  2677. */
  2678. this.handler = function(stream, bite) {
  2679. // 1. If byte is end-of-stream and either utf-16 lead byte or
  2680. // utf-16 lead surrogate is not null, set utf-16 lead byte and
  2681. // utf-16 lead surrogate to null, and return error.
  2682. if (bite === end_of_stream && (utf16_lead_byte !== null ||
  2683. utf16_lead_surrogate !== null)) {
  2684. return decoderError(fatal);
  2685. }
  2686. // 2. If byte is end-of-stream and utf-16 lead byte and utf-16
  2687. // lead surrogate are null, return finished.
  2688. if (bite === end_of_stream && utf16_lead_byte === null &&
  2689. utf16_lead_surrogate === null) {
  2690. return finished;
  2691. }
  2692. // 3. If utf-16 lead byte is null, set utf-16 lead byte to byte
  2693. // and return continue.
  2694. if (utf16_lead_byte === null) {
  2695. utf16_lead_byte = bite;
  2696. return null;
  2697. }
  2698. // 4. Let code unit be the result of:
  2699. var code_unit;
  2700. if (utf16_be) {
  2701. // utf-16be decoder flag is set
  2702. // (utf-16 lead byte << 8) + byte.
  2703. code_unit = (utf16_lead_byte << 8) + bite;
  2704. } else {
  2705. // utf-16be decoder flag is unset
  2706. // (byte << 8) + utf-16 lead byte.
  2707. code_unit = (bite << 8) + utf16_lead_byte;
  2708. }
  2709. // Then set utf-16 lead byte to null.
  2710. utf16_lead_byte = null;
  2711. // 5. If utf-16 lead surrogate is not null, let lead surrogate
  2712. // be utf-16 lead surrogate, set utf-16 lead surrogate to null,
  2713. // and then run these substeps:
  2714. if (utf16_lead_surrogate !== null) {
  2715. var lead_surrogate = utf16_lead_surrogate;
  2716. utf16_lead_surrogate = null;
  2717. // 1. If code unit is in the range U+DC00 to U+DFFF,
  2718. // inclusive, return a code point whose value is 0x10000 +
  2719. // ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00).
  2720. if (inRange(code_unit, 0xDC00, 0xDFFF)) {
  2721. return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
  2722. (code_unit - 0xDC00);
  2723. }
  2724. // 2. Prepend the sequence resulting of converting code unit
  2725. // to bytes using utf-16be decoder flag to stream and return
  2726. // error.
  2727. stream.prepend(convertCodeUnitToBytes(code_unit, utf16_be));
  2728. return decoderError(fatal);
  2729. }
  2730. // 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
  2731. // set utf-16 lead surrogate to code unit and return continue.
  2732. if (inRange(code_unit, 0xD800, 0xDBFF)) {
  2733. utf16_lead_surrogate = code_unit;
  2734. return null;
  2735. }
  2736. // 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
  2737. // return error.
  2738. if (inRange(code_unit, 0xDC00, 0xDFFF))
  2739. return decoderError(fatal);
  2740. // 8. Return code point code unit.
  2741. return code_unit;
  2742. };
  2743. }
  2744. // 15.2.2 shared utf-16 encoder
  2745. /**
  2746. * @constructor
  2747. * @implements {Encoder}
  2748. * @param {boolean} utf16_be True if big-endian, false if little-endian.
  2749. * @param {{fatal: boolean}} options
  2750. */
  2751. function UTF16Encoder(utf16_be, options) {
  2752. var fatal = options.fatal;
  2753. /**
  2754. * @param {Stream} stream Input stream.
  2755. * @param {number} code_point Next code point read from the stream.
  2756. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2757. */
  2758. this.handler = function(stream, code_point) {
  2759. // 1. If code point is end-of-stream, return finished.
  2760. if (code_point === end_of_stream)
  2761. return finished;
  2762. // 2. If code point is in the range U+0000 to U+FFFF, inclusive,
  2763. // return the sequence resulting of converting code point to
  2764. // bytes using utf-16be encoder flag.
  2765. if (inRange(code_point, 0x0000, 0xFFFF))
  2766. return convertCodeUnitToBytes(code_point, utf16_be);
  2767. // 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800,
  2768. // converted to bytes using utf-16be encoder flag.
  2769. var lead = convertCodeUnitToBytes(
  2770. ((code_point - 0x10000) >> 10) + 0xD800, utf16_be);
  2771. // 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00,
  2772. // converted to bytes using utf-16be encoder flag.
  2773. var trail = convertCodeUnitToBytes(
  2774. ((code_point - 0x10000) & 0x3FF) + 0xDC00, utf16_be);
  2775. // 5. Return a byte sequence of lead followed by trail.
  2776. return lead.concat(trail);
  2777. };
  2778. }
  2779. // 15.3 utf-16be
  2780. // 15.3.1 utf-16be decoder
  2781. /** @param {{fatal: boolean}} options */
  2782. encoders['UTF-16BE'] = function(options) {
  2783. return new UTF16Encoder(true, options);
  2784. };
  2785. // 15.3.2 utf-16be encoder
  2786. /** @param {{fatal: boolean}} options */
  2787. decoders['UTF-16BE'] = function(options) {
  2788. return new UTF16Decoder(true, options);
  2789. };
  2790. // 15.4 utf-16le
  2791. // 15.4.1 utf-16le decoder
  2792. /** @param {{fatal: boolean}} options */
  2793. encoders['UTF-16LE'] = function(options) {
  2794. return new UTF16Encoder(false, options);
  2795. };
  2796. // 15.4.2 utf-16le encoder
  2797. /** @param {{fatal: boolean}} options */
  2798. decoders['UTF-16LE'] = function(options) {
  2799. return new UTF16Decoder(false, options);
  2800. };
  2801. // 15.5 x-user-defined
  2802. // 15.5.1 x-user-defined decoder
  2803. /**
  2804. * @constructor
  2805. * @implements {Decoder}
  2806. * @param {{fatal: boolean}} options
  2807. */
  2808. function XUserDefinedDecoder(options) {
  2809. var fatal = options.fatal;
  2810. /**
  2811. * @param {Stream} stream The stream of bytes being decoded.
  2812. * @param {number} bite The next byte read from the stream.
  2813. * @return {?(number|!Array.<number>)} The next code point(s)
  2814. * decoded, or null if not enough data exists in the input
  2815. * stream to decode a complete code point.
  2816. */
  2817. this.handler = function(stream, bite) {
  2818. // 1. If byte is end-of-stream, return finished.
  2819. if (bite === end_of_stream)
  2820. return finished;
  2821. // 2. If byte is an ASCII byte, return a code point whose value
  2822. // is byte.
  2823. if (isASCIIByte(bite))
  2824. return bite;
  2825. // 3. Return a code point whose value is 0xF780 + byte − 0x80.
  2826. return 0xF780 + bite - 0x80;
  2827. };
  2828. }
  2829. // 15.5.2 x-user-defined encoder
  2830. /**
  2831. * @constructor
  2832. * @implements {Encoder}
  2833. * @param {{fatal: boolean}} options
  2834. */
  2835. function XUserDefinedEncoder(options) {
  2836. var fatal = options.fatal;
  2837. /**
  2838. * @param {Stream} stream Input stream.
  2839. * @param {number} code_point Next code point read from the stream.
  2840. * @return {(number|!Array.<number>)} Byte(s) to emit.
  2841. */
  2842. this.handler = function(stream, code_point) {
  2843. // 1.If code point is end-of-stream, return finished.
  2844. if (code_point === end_of_stream)
  2845. return finished;
  2846. // 2. If code point is an ASCII code point, return a byte whose
  2847. // value is code point.
  2848. if (isASCIICodePoint(code_point))
  2849. return code_point;
  2850. // 3. If code point is in the range U+F780 to U+F7FF, inclusive,
  2851. // return a byte whose value is code point − 0xF780 + 0x80.
  2852. if (inRange(code_point, 0xF780, 0xF7FF))
  2853. return code_point - 0xF780 + 0x80;
  2854. // 4. Return error with code point.
  2855. return encoderError(code_point);
  2856. };
  2857. }
  2858. /** @param {{fatal: boolean}} options */
  2859. encoders['x-user-defined'] = function(options) {
  2860. return new XUserDefinedEncoder(options);
  2861. };
  2862. /** @param {{fatal: boolean}} options */
  2863. decoders['x-user-defined'] = function(options) {
  2864. return new XUserDefinedDecoder(options);
  2865. };
  2866. if (typeof module !== "undefined" && module.exports) {
  2867. module.exports = {
  2868. TextEncoder: TextEncoder,
  2869. TextDecoder: TextDecoder,
  2870. EncodingIndexes: require("./encoding-indexes.js")["encoding-indexes"]
  2871. };
  2872. }
  2873. // For strict environments where `this` inside the global scope
  2874. // is `undefined`, take a pure object instead
  2875. }(this || {}));