utf7.js 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
  2. // Below is UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
  3. exports.utf7 = function(options) {
  4. return {
  5. encoder: function utf7Encoder() {
  6. return {
  7. write: utf7EncoderWrite,
  8. end: function() {},
  9. iconv: options.iconv,
  10. };
  11. },
  12. decoder: function utf7Decoder() {
  13. return {
  14. write: utf7DecoderWrite,
  15. end: utf7DecoderEnd,
  16. iconv: options.iconv,
  17. inBase64: false,
  18. base64Accum: '',
  19. };
  20. },
  21. };
  22. };
  23. var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
  24. function utf7EncoderWrite(str) {
  25. // Naive implementation.
  26. // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
  27. return new Buffer(str.replace(nonDirectChars, function(chunk) {
  28. return "+" + (chunk === '+' ? '' :
  29. this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
  30. + "-";
  31. }.bind(this)));
  32. }
  33. var base64Regex = /[A-Za-z0-9\/+]/;
  34. var base64Chars = [];
  35. for (var i = 0; i < 256; i++)
  36. base64Chars[i] = base64Regex.test(String.fromCharCode(i));
  37. var plusChar = '+'.charCodeAt(0),
  38. minusChar = '-'.charCodeAt(0),
  39. andChar = '&'.charCodeAt(0);
  40. function utf7DecoderWrite(buf) {
  41. var res = "", lastI = 0,
  42. inBase64 = this.inBase64,
  43. base64Accum = this.base64Accum;
  44. // The decoder is more involved as we must handle chunks in stream.
  45. for (var i = 0; i < buf.length; i++) {
  46. if (!inBase64) { // We're in direct mode.
  47. // Write direct chars until '+'
  48. if (buf[i] == plusChar) {
  49. res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
  50. lastI = i+1;
  51. inBase64 = true;
  52. }
  53. } else { // We decode base64.
  54. if (!base64Chars[buf[i]]) { // Base64 ended.
  55. if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
  56. res += "+";
  57. } else {
  58. var b64str = base64Accum + buf.slice(lastI, i).toString();
  59. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  60. }
  61. if (buf[i] != minusChar) // Minus is absorbed after base64.
  62. i--;
  63. lastI = i+1;
  64. inBase64 = false;
  65. base64Accum = '';
  66. }
  67. }
  68. }
  69. if (!inBase64) {
  70. res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
  71. } else {
  72. var b64str = base64Accum + buf.slice(lastI).toString();
  73. var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  74. base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
  75. b64str = b64str.slice(0, canBeDecoded);
  76. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  77. }
  78. this.inBase64 = inBase64;
  79. this.base64Accum = base64Accum;
  80. return res;
  81. }
  82. function utf7DecoderEnd() {
  83. var res = "";
  84. if (this.inBase64 && this.base64Accum.length > 0)
  85. res = this.iconv.decode(new Buffer(this.base64Accum, 'base64'), "utf16-be");
  86. this.inBase64 = false;
  87. this.base64Accum = '';
  88. return res;
  89. }
  90. // UTF-7-IMAP codec.
  91. // RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
  92. // Differences:
  93. // * Base64 part is started by "&" instead of "+"
  94. // * Direct characters are 0x20-0x7E, except "&" (0x26)
  95. // * In Base64, "," is used instead of "/"
  96. // * Base64 must not be used to represent direct characters.
  97. // * No implicit shift back from Base64 (should always end with '-')
  98. // * String must end in non-shifted position.
  99. // * "-&" while in base64 is not allowed.
  100. exports.utf7imap = function(options) {
  101. return {
  102. encoder: function utf7ImapEncoder() {
  103. return {
  104. write: utf7ImapEncoderWrite,
  105. end: utf7ImapEncoderEnd,
  106. iconv: options.iconv,
  107. inBase64: false,
  108. base64Accum: new Buffer(6),
  109. base64AccumIdx: 0,
  110. };
  111. },
  112. decoder: function utf7ImapDecoder() {
  113. return {
  114. write: utf7ImapDecoderWrite,
  115. end: utf7ImapDecoderEnd,
  116. iconv: options.iconv,
  117. inBase64: false,
  118. base64Accum: '',
  119. };
  120. },
  121. };
  122. };
  123. function utf7ImapEncoderWrite(str) {
  124. var inBase64 = this.inBase64,
  125. base64Accum = this.base64Accum,
  126. base64AccumIdx = this.base64AccumIdx,
  127. buf = new Buffer(str.length*5 + 10), bufIdx = 0;
  128. for (var i = 0; i < str.length; i++) {
  129. var uChar = str.charCodeAt(i);
  130. if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
  131. if (inBase64) {
  132. if (base64AccumIdx > 0) {
  133. bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
  134. base64AccumIdx = 0;
  135. }
  136. buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
  137. inBase64 = false;
  138. }
  139. if (!inBase64) {
  140. buf[bufIdx++] = uChar; // Write direct character
  141. if (uChar === andChar) // Ampersand -> '&-'
  142. buf[bufIdx++] = minusChar;
  143. }
  144. } else { // Non-direct character
  145. if (!inBase64) {
  146. buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
  147. inBase64 = true;
  148. }
  149. if (inBase64) {
  150. base64Accum[base64AccumIdx++] = uChar >> 8;
  151. base64Accum[base64AccumIdx++] = uChar & 0xFF;
  152. if (base64AccumIdx == base64Accum.length) {
  153. bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
  154. base64AccumIdx = 0;
  155. }
  156. }
  157. }
  158. }
  159. this.inBase64 = inBase64;
  160. this.base64AccumIdx = base64AccumIdx;
  161. return buf.slice(0, bufIdx);
  162. }
  163. function utf7ImapEncoderEnd() {
  164. var buf = new Buffer(10), bufIdx = 0;
  165. if (this.inBase64) {
  166. if (this.base64AccumIdx > 0) {
  167. bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
  168. this.base64AccumIdx = 0;
  169. }
  170. buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
  171. this.inBase64 = false;
  172. }
  173. return buf.slice(0, bufIdx);
  174. }
  175. var base64IMAPChars = base64Chars.slice();
  176. base64IMAPChars[','.charCodeAt(0)] = true;
  177. function utf7ImapDecoderWrite(buf) {
  178. var res = "", lastI = 0,
  179. inBase64 = this.inBase64,
  180. base64Accum = this.base64Accum;
  181. // The decoder is more involved as we must handle chunks in stream.
  182. // It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
  183. for (var i = 0; i < buf.length; i++) {
  184. if (!inBase64) { // We're in direct mode.
  185. // Write direct chars until '&'
  186. if (buf[i] == andChar) {
  187. res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
  188. lastI = i+1;
  189. inBase64 = true;
  190. }
  191. } else { // We decode base64.
  192. if (!base64IMAPChars[buf[i]]) { // Base64 ended.
  193. if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
  194. res += "&";
  195. } else {
  196. var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
  197. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  198. }
  199. if (buf[i] != minusChar) // Minus may be absorbed after base64.
  200. i--;
  201. lastI = i+1;
  202. inBase64 = false;
  203. base64Accum = '';
  204. }
  205. }
  206. }
  207. if (!inBase64) {
  208. res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
  209. } else {
  210. var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
  211. var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  212. base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
  213. b64str = b64str.slice(0, canBeDecoded);
  214. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  215. }
  216. this.inBase64 = inBase64;
  217. this.base64Accum = base64Accum;
  218. return res;
  219. }
  220. function utf7ImapDecoderEnd() {
  221. var res = "";
  222. if (this.inBase64 && this.base64Accum.length > 0)
  223. res = this.iconv.decode(new Buffer(this.base64Accum, 'base64'), "utf16-be");
  224. this.inBase64 = false;
  225. this.base64Accum = '';
  226. return res;
  227. }