mirror of
https://github.com/overleaf/overleaf.git
synced 2025-12-05 01:10:29 +00:00
Merge pull request #29980 from overleaf/bg-history-extend-backup-comparison-III
Check file tree hashes in backup comparison GitOrigin-RevId: 4bd1f36afa34f326d4b8934c8bb0ea00a52cf1d9
This commit is contained in:
@@ -68,14 +68,18 @@ async function getHistoryId(projectId) {
|
||||
return project.overleaf.history.id
|
||||
}
|
||||
|
||||
async function getBackupStatus(projectId) {
|
||||
async function getBackupStatus(projectId, options = {}) {
|
||||
const projection = {
|
||||
'overleaf.history': 1,
|
||||
'overleaf.backup': 1,
|
||||
}
|
||||
if (options.includeRootFolder) {
|
||||
projection.rootFolder = 1
|
||||
}
|
||||
const project = await projects.findOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{
|
||||
projection: {
|
||||
'overleaf.history': 1,
|
||||
'overleaf.backup': 1,
|
||||
},
|
||||
projection,
|
||||
}
|
||||
)
|
||||
if (!project) {
|
||||
@@ -93,9 +97,38 @@ async function getBackupStatus(projectId) {
|
||||
historyId: `${project.overleaf.history.id}`,
|
||||
currentEndVersion: project.overleaf.history.currentEndVersion,
|
||||
currentEndTimestamp: project.overleaf.history.currentEndTimestamp,
|
||||
...(options.includeRootFolder && { rootFolder: project.rootFolder?.[0] }),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively traverses the file tree and collects file hashes into a Set.
|
||||
*
|
||||
* @param {object} rootFolder - The root folder object of the file tree.
|
||||
* @returns {Set<string>} A Set containing all unique file hashes found in the file tree.
|
||||
*/
|
||||
function getHashesFromFileTree(rootFolder) {
|
||||
const hashSet = new Set()
|
||||
|
||||
function processFolder(folder) {
|
||||
for (const file of folder.fileRefs || []) {
|
||||
if (file?.hash) {
|
||||
hashSet.add(file.hash)
|
||||
}
|
||||
}
|
||||
|
||||
for (const subfolder of folder.folders || []) {
|
||||
if (subfolder?._id) {
|
||||
processFolder(subfolder)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
processFolder(rootFolder)
|
||||
|
||||
return hashSet
|
||||
}
|
||||
|
||||
async function setBackupVersion(
|
||||
projectId,
|
||||
previousBackedUpVersion,
|
||||
@@ -216,4 +249,5 @@ module.exports = {
|
||||
listUninitializedBackups,
|
||||
getBackedUpBlobHashes,
|
||||
unsetBackedUpBlobHashes,
|
||||
getHashesFromFileTree,
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
getProjectChunks,
|
||||
getLatestChunkMetadata,
|
||||
create,
|
||||
getBackend,
|
||||
} from '../lib/chunk_store/index.js'
|
||||
import { client } from '../lib/mongodb.js'
|
||||
import redis from '../lib/redis.js'
|
||||
@@ -27,6 +28,7 @@ import {
|
||||
updatePendingChangeTimestamp,
|
||||
getBackedUpBlobHashes,
|
||||
unsetBackedUpBlobHashes,
|
||||
getHashesFromFileTree,
|
||||
} from '../lib/backup_store/index.js'
|
||||
import { backupBlob, downloadBlobToDir } from '../lib/backupBlob.mjs'
|
||||
import {
|
||||
@@ -949,8 +951,19 @@ async function getBlobListing(historyId) {
|
||||
*/
|
||||
|
||||
async function compareBackups(projectId, options, log = console.log) {
|
||||
log(`Comparing backups for project ${projectId}`)
|
||||
const { historyId } = await getBackupStatus(projectId)
|
||||
// Convert any postgres history ids to mongo project ids
|
||||
const backend = getBackend(projectId)
|
||||
projectId = await backend.resolveHistoryIdToMongoProjectId(projectId)
|
||||
const { historyId, rootFolder } = await getBackupStatus(projectId, {
|
||||
includeRootFolder: true,
|
||||
})
|
||||
|
||||
log(`Comparing backups for project ${projectId} historyId ${historyId}`)
|
||||
const hashesFromFileTree = rootFolder
|
||||
? getHashesFromFileTree(rootFolder)
|
||||
: new Set()
|
||||
const hashesFromHistory = new Set()
|
||||
|
||||
const chunks = await getProjectChunks(historyId)
|
||||
const blobStore = new BlobStore(historyId)
|
||||
const backupPersistorForProject = await backupPersistor.forProject(
|
||||
@@ -1047,6 +1060,9 @@ async function compareBackups(projectId, options, log = console.log) {
|
||||
throw new Error('interrupted')
|
||||
}
|
||||
|
||||
// Track all the hashes in the history
|
||||
hashesFromHistory.add(blob.hash)
|
||||
|
||||
if (GLOBAL_BLOBS.has(blob.hash)) {
|
||||
const globalBlob = GLOBAL_BLOBS.get(blob.hash)
|
||||
log(
|
||||
@@ -1158,6 +1174,31 @@ async function compareBackups(projectId, options, log = console.log) {
|
||||
}
|
||||
}
|
||||
|
||||
if (gracefulShutdownInitiated) {
|
||||
throw new Error('interrupted')
|
||||
}
|
||||
// Reconcile hashes in file tree with history
|
||||
log(`Comparing file hashes from file tree with history`)
|
||||
if (hashesFromFileTree.size > 0) {
|
||||
for (const hash of hashesFromFileTree) {
|
||||
const presentInHistory = hashesFromHistory.has(hash)
|
||||
if (presentInHistory) {
|
||||
log(` ✓ File tree hash ${hash} present in history`)
|
||||
} else {
|
||||
log(` ✗ File tree hash ${hash} not found in history`)
|
||||
totalBlobsNotFound++
|
||||
errors.push({
|
||||
type: 'file-not-found',
|
||||
historyId,
|
||||
blobHash: hash,
|
||||
error: `File tree hash ${hash} not found in history`,
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log(` ✓ File tree does not contain any binary files`)
|
||||
}
|
||||
|
||||
// Print summary
|
||||
log('\nComparison Summary:')
|
||||
log('==================')
|
||||
@@ -1236,6 +1277,9 @@ async function compareProjectAndEmitResult(
|
||||
|
||||
return false
|
||||
} catch (err) {
|
||||
if (gracefulShutdownInitiated) {
|
||||
throw err
|
||||
}
|
||||
console.log(`FAIL: ${projectId}`)
|
||||
|
||||
// Output buffered logs on error when verbose
|
||||
@@ -1276,6 +1320,9 @@ async function compareProjectAndEmitResult(
|
||||
case 'blob-size-mismatch':
|
||||
console.log(`size-mismatch: ${projectId},${historyId},${blobHash}`)
|
||||
break
|
||||
case 'file-not-found':
|
||||
console.log(`file-not-found: ${projectId},${historyId},${blobHash}`)
|
||||
break
|
||||
case 'chunk-mismatch':
|
||||
console.log(`chunk-mismatch: ${projectId},${historyId},${chunkId}`)
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user