extract.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. var path = require('path');
  2. var fs = require('graceful-fs');
  3. var zlib = require('zlib');
  4. var DecompressZip = require('decompress-zip');
  5. var tar = require('tar-fs');
  6. var Q = require('q');
  7. var mout = require('mout');
  8. var junk = require('junk');
  9. var createError = require('./createError');
  10. // This forces the default chunk size to something small in an attempt
  11. // to avoid issue #314
  12. zlib.Z_DEFAULT_CHUNK = 1024 * 8;
  13. var extractors;
  14. var extractorTypes;
  15. extractors = {
  16. '.zip': extractZip,
  17. '.tar': extractTar,
  18. '.tar.gz': extractTarGz,
  19. '.tgz': extractTarGz,
  20. '.gz': extractGz,
  21. 'application/zip': extractZip,
  22. 'application/x-zip': extractZip,
  23. 'application/x-zip-compressed': extractZip,
  24. 'application/x-tar': extractTar,
  25. 'application/x-tgz': extractTarGz,
  26. 'application/x-gzip': extractGz
  27. };
  28. extractorTypes = Object.keys(extractors);
  29. function extractZip(archive, dst) {
  30. var deferred = Q.defer();
  31. new DecompressZip(archive)
  32. .on('error', deferred.reject)
  33. .on('extract', deferred.resolve.bind(deferred, dst))
  34. .extract({
  35. path: dst,
  36. follow: false, // Do not follow symlinks (#699)
  37. filter: filterSymlinks // Filter symlink files
  38. });
  39. return deferred.promise;
  40. }
  41. function extractTar(archive, dst) {
  42. var deferred = Q.defer();
  43. fs.createReadStream(archive)
  44. .on('error', deferred.reject)
  45. .pipe(tar.extract(dst, {
  46. ignore: isSymlink // Filter symlink files
  47. }))
  48. .on('error', deferred.reject)
  49. .on('finish', deferred.resolve.bind(deferred, dst));
  50. return deferred.promise;
  51. }
  52. function extractTarGz(archive, dst) {
  53. var deferred = Q.defer();
  54. fs.createReadStream(archive)
  55. .on('error', deferred.reject)
  56. .pipe(zlib.createGunzip())
  57. .on('error', deferred.reject)
  58. .pipe(tar.extract(dst, {
  59. ignore: isSymlink // Filter symlink files
  60. }))
  61. .on('error', deferred.reject)
  62. .on('finish', deferred.resolve.bind(deferred, dst));
  63. return deferred.promise;
  64. }
  65. function extractGz(archive, dst) {
  66. var deferred = Q.defer();
  67. fs.createReadStream(archive)
  68. .on('error', deferred.reject)
  69. .pipe(zlib.createGunzip())
  70. .on('error', deferred.reject)
  71. .pipe(fs.createWriteStream(dst))
  72. .on('error', deferred.reject)
  73. .on('close', deferred.resolve.bind(deferred, dst));
  74. return deferred.promise;
  75. }
  76. function isSymlink(entry) {
  77. return entry.type === 'SymbolicLink';
  78. }
  79. function filterSymlinks(entry) {
  80. return entry.type !== 'SymbolicLink';
  81. }
  82. function getExtractor(archive) {
  83. // Make the archive lower case to match against the types
  84. // This ensures that upper-cased extensions work
  85. archive = archive.toLowerCase();
  86. var type = mout.array.find(extractorTypes, function (type) {
  87. return mout.string.endsWith(archive, type);
  88. });
  89. return type ? extractors[type] : null;
  90. }
  91. function isSingleDir(dir) {
  92. return Q.nfcall(fs.readdir, dir)
  93. .then(function (files) {
  94. var singleDir;
  95. // Remove any OS specific files from the files array
  96. // before checking its length
  97. files = files.filter(junk.isnt);
  98. if (files.length !== 1) {
  99. return false;
  100. }
  101. singleDir = path.join(dir, files[0]);
  102. return Q.nfcall(fs.stat, singleDir)
  103. .then(function (stat) {
  104. return stat.isDirectory() ? singleDir : false;
  105. });
  106. });
  107. }
  108. function moveSingleDirContents(dir) {
  109. var destDir = path.dirname(dir);
  110. return Q.nfcall(fs.readdir, dir)
  111. .then(function (files) {
  112. var promises;
  113. promises = files.map(function (file) {
  114. var src = path.join(dir, file);
  115. var dst = path.join(destDir, file);
  116. return Q.nfcall(fs.rename, src, dst);
  117. });
  118. return Q.all(promises);
  119. })
  120. .then(function () {
  121. return Q.nfcall(fs.rmdir, dir);
  122. });
  123. }
  124. // -----------------------------
  125. function canExtract(src, mimeType) {
  126. if (mimeType && mimeType !== 'application/octet-stream') {
  127. return !!getExtractor(mimeType);
  128. }
  129. return !!getExtractor(src);
  130. }
  131. // Available options:
  132. // - keepArchive: true to keep the archive afterwards (defaults to false)
  133. // - keepStructure: true to keep the extracted structure unchanged (defaults to false)
  134. function extract(src, dst, opts) {
  135. var extractor;
  136. var promise;
  137. opts = opts || {};
  138. extractor = getExtractor(src);
  139. // Try to get extractor from mime type
  140. if (!extractor && opts.mimeType) {
  141. extractor = getExtractor(opts.mimeType);
  142. }
  143. // If extractor is null, then the archive type is unknown
  144. if (!extractor) {
  145. return Q.reject(createError('File ' + src + ' is not a known archive', 'ENOTARCHIVE'));
  146. }
  147. // Check archive file size
  148. promise = Q.nfcall(fs.stat, src)
  149. .then(function (stat) {
  150. if (stat.size <= 8) {
  151. throw createError('File ' + src + ' is an invalid archive', 'ENOTARCHIVE');
  152. }
  153. // Extract archive
  154. return extractor(src, dst);
  155. });
  156. // TODO: There's an issue here if the src and dst are the same and
  157. // The zip name is the same as some of the zip file contents
  158. // Maybe create a temp directory inside dst, unzip it there,
  159. // unlink zip and then move contents
  160. // Remove archive
  161. if (!opts.keepArchive) {
  162. promise = promise
  163. .then(function () {
  164. return Q.nfcall(fs.unlink, src);
  165. });
  166. }
  167. // Move contents if a single directory was extracted
  168. if (!opts.keepStructure) {
  169. promise = promise
  170. .then(function () {
  171. return isSingleDir(dst);
  172. })
  173. .then(function (singleDir) {
  174. return singleDir ? moveSingleDirContents(singleDir) : null;
  175. });
  176. }
  177. // Resolve promise to the dst dir
  178. return promise.then(function () {
  179. return dst;
  180. });
  181. }
  182. module.exports = extract;
  183. module.exports.canExtract = canExtract;