You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. 'use strict'
  2. const BB = require('bluebird')
  3. const contentPath = require('./content/path')
  4. const figgyPudding = require('figgy-pudding')
  5. const finished = BB.promisify(require('mississippi').finished)
  6. const fixOwner = require('./util/fix-owner')
  7. const fs = require('graceful-fs')
  8. const glob = BB.promisify(require('glob'))
  9. const index = require('./entry-index')
  10. const path = require('path')
  11. const rimraf = BB.promisify(require('rimraf'))
  12. const ssri = require('ssri')
  13. BB.promisifyAll(fs)
  14. const VerifyOpts = figgyPudding({
  15. concurrency: {
  16. default: 20
  17. },
  18. filter: {},
  19. log: {
  20. default: { silly () {} }
  21. }
  22. })
  23. module.exports = verify
  24. function verify (cache, opts) {
  25. opts = VerifyOpts(opts)
  26. opts.log.silly('verify', 'verifying cache at', cache)
  27. return BB.reduce([
  28. markStartTime,
  29. fixPerms,
  30. garbageCollect,
  31. rebuildIndex,
  32. cleanTmp,
  33. writeVerifile,
  34. markEndTime
  35. ], (stats, step, i) => {
  36. const label = step.name || `step #${i}`
  37. const start = new Date()
  38. return BB.resolve(step(cache, opts)).then(s => {
  39. s && Object.keys(s).forEach(k => {
  40. stats[k] = s[k]
  41. })
  42. const end = new Date()
  43. if (!stats.runTime) { stats.runTime = {} }
  44. stats.runTime[label] = end - start
  45. return stats
  46. })
  47. }, {}).tap(stats => {
  48. stats.runTime.total = stats.endTime - stats.startTime
  49. opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`)
  50. })
  51. }
  52. function markStartTime (cache, opts) {
  53. return { startTime: new Date() }
  54. }
  55. function markEndTime (cache, opts) {
  56. return { endTime: new Date() }
  57. }
  58. function fixPerms (cache, opts) {
  59. opts.log.silly('verify', 'fixing cache permissions')
  60. return fixOwner.mkdirfix(cache, cache).then(() => {
  61. // TODO - fix file permissions too
  62. return fixOwner.chownr(cache, cache)
  63. }).then(() => null)
  64. }
  65. // Implements a naive mark-and-sweep tracing garbage collector.
  66. //
  67. // The algorithm is basically as follows:
  68. // 1. Read (and filter) all index entries ("pointers")
  69. // 2. Mark each integrity value as "live"
  70. // 3. Read entire filesystem tree in `content-vX/` dir
  71. // 4. If content is live, verify its checksum and delete it if it fails
  72. // 5. If content is not marked as live, rimraf it.
  73. //
  74. function garbageCollect (cache, opts) {
  75. opts.log.silly('verify', 'garbage collecting content')
  76. const indexStream = index.lsStream(cache)
  77. const liveContent = new Set()
  78. indexStream.on('data', entry => {
  79. if (opts.filter && !opts.filter(entry)) { return }
  80. liveContent.add(entry.integrity.toString())
  81. })
  82. return finished(indexStream).then(() => {
  83. const contentDir = contentPath._contentDir(cache)
  84. return glob(path.join(contentDir, '**'), {
  85. follow: false,
  86. nodir: true,
  87. nosort: true
  88. }).then(files => {
  89. return BB.resolve({
  90. verifiedContent: 0,
  91. reclaimedCount: 0,
  92. reclaimedSize: 0,
  93. badContentCount: 0,
  94. keptSize: 0
  95. }).tap((stats) => BB.map(files, (f) => {
  96. const split = f.split(/[/\\]/)
  97. const digest = split.slice(split.length - 3).join('')
  98. const algo = split[split.length - 4]
  99. const integrity = ssri.fromHex(digest, algo)
  100. if (liveContent.has(integrity.toString())) {
  101. return verifyContent(f, integrity).then(info => {
  102. if (!info.valid) {
  103. stats.reclaimedCount++
  104. stats.badContentCount++
  105. stats.reclaimedSize += info.size
  106. } else {
  107. stats.verifiedContent++
  108. stats.keptSize += info.size
  109. }
  110. return stats
  111. })
  112. } else {
  113. // No entries refer to this content. We can delete.
  114. stats.reclaimedCount++
  115. return fs.statAsync(f).then(s => {
  116. return rimraf(f).then(() => {
  117. stats.reclaimedSize += s.size
  118. return stats
  119. })
  120. })
  121. }
  122. }, { concurrency: opts.concurrency }))
  123. })
  124. })
  125. }
  126. function verifyContent (filepath, sri) {
  127. return fs.statAsync(filepath).then(stat => {
  128. const contentInfo = {
  129. size: stat.size,
  130. valid: true
  131. }
  132. return ssri.checkStream(
  133. fs.createReadStream(filepath),
  134. sri
  135. ).catch(err => {
  136. if (err.code !== 'EINTEGRITY') { throw err }
  137. return rimraf(filepath).then(() => {
  138. contentInfo.valid = false
  139. })
  140. }).then(() => contentInfo)
  141. }).catch({ code: 'ENOENT' }, () => ({ size: 0, valid: false }))
  142. }
  143. function rebuildIndex (cache, opts) {
  144. opts.log.silly('verify', 'rebuilding index')
  145. return index.ls(cache).then(entries => {
  146. const stats = {
  147. missingContent: 0,
  148. rejectedEntries: 0,
  149. totalEntries: 0
  150. }
  151. const buckets = {}
  152. for (let k in entries) {
  153. if (entries.hasOwnProperty(k)) {
  154. const hashed = index._hashKey(k)
  155. const entry = entries[k]
  156. const excluded = opts.filter && !opts.filter(entry)
  157. excluded && stats.rejectedEntries++
  158. if (buckets[hashed] && !excluded) {
  159. buckets[hashed].push(entry)
  160. } else if (buckets[hashed] && excluded) {
  161. // skip
  162. } else if (excluded) {
  163. buckets[hashed] = []
  164. buckets[hashed]._path = index._bucketPath(cache, k)
  165. } else {
  166. buckets[hashed] = [entry]
  167. buckets[hashed]._path = index._bucketPath(cache, k)
  168. }
  169. }
  170. }
  171. return BB.map(Object.keys(buckets), key => {
  172. return rebuildBucket(cache, buckets[key], stats, opts)
  173. }, { concurrency: opts.concurrency }).then(() => stats)
  174. })
  175. }
  176. function rebuildBucket (cache, bucket, stats, opts) {
  177. return fs.truncateAsync(bucket._path).then(() => {
  178. // This needs to be serialized because cacache explicitly
  179. // lets very racy bucket conflicts clobber each other.
  180. return BB.mapSeries(bucket, entry => {
  181. const content = contentPath(cache, entry.integrity)
  182. return fs.statAsync(content).then(() => {
  183. return index.insert(cache, entry.key, entry.integrity, {
  184. metadata: entry.metadata,
  185. size: entry.size
  186. }).then(() => { stats.totalEntries++ })
  187. }).catch({ code: 'ENOENT' }, () => {
  188. stats.rejectedEntries++
  189. stats.missingContent++
  190. })
  191. })
  192. })
  193. }
  194. function cleanTmp (cache, opts) {
  195. opts.log.silly('verify', 'cleaning tmp directory')
  196. return rimraf(path.join(cache, 'tmp'))
  197. }
  198. function writeVerifile (cache, opts) {
  199. const verifile = path.join(cache, '_lastverified')
  200. opts.log.silly('verify', 'writing verifile to ' + verifile)
  201. try {
  202. return fs.writeFileAsync(verifile, '' + (+(new Date())))
  203. } finally {
  204. fixOwner.chownr.sync(cache, verifile)
  205. }
  206. }
  207. module.exports.lastRun = lastRun
  208. function lastRun (cache) {
  209. return fs.readFileAsync(
  210. path.join(cache, '_lastverified'), 'utf8'
  211. ).then(data => new Date(+data))
  212. }