Merge pull request #29980 from overleaf/bg-history-extend-backup-comparison-III

Check file tree hashes in backup comparison

GitOrigin-RevId: 4bd1f36afa34f326d4b8934c8bb0ea00a52cf1d9
This commit is contained in:
Brian Gough
2025-11-28 16:13:47 +00:00
committed by Copybot
parent 9ebab12049
commit 06f696ced0
2 changed files with 88 additions and 7 deletions

View File

@@ -68,14 +68,18 @@ async function getHistoryId(projectId) {
return project.overleaf.history.id
}
async function getBackupStatus(projectId) {
async function getBackupStatus(projectId, options = {}) {
const projection = {
'overleaf.history': 1,
'overleaf.backup': 1,
}
if (options.includeRootFolder) {
projection.rootFolder = 1
}
const project = await projects.findOne(
{ _id: new ObjectId(projectId) },
{
projection: {
'overleaf.history': 1,
'overleaf.backup': 1,
},
projection,
}
)
if (!project) {
@@ -93,9 +97,38 @@ async function getBackupStatus(projectId) {
historyId: `${project.overleaf.history.id}`,
currentEndVersion: project.overleaf.history.currentEndVersion,
currentEndTimestamp: project.overleaf.history.currentEndTimestamp,
...(options.includeRootFolder && { rootFolder: project.rootFolder?.[0] }),
}
}
/**
* Recursively traverses the file tree and collects file hashes into a Set.
*
* @param {object} rootFolder - The root folder object of the file tree.
* @returns {Set<string>} A Set containing all unique file hashes found in the file tree.
*/
function getHashesFromFileTree(rootFolder) {
const hashSet = new Set()
function processFolder(folder) {
for (const file of folder.fileRefs || []) {
if (file?.hash) {
hashSet.add(file.hash)
}
}
for (const subfolder of folder.folders || []) {
if (subfolder?._id) {
processFolder(subfolder)
}
}
}
processFolder(rootFolder)
return hashSet
}
async function setBackupVersion(
projectId,
previousBackedUpVersion,
@@ -216,4 +249,5 @@ module.exports = {
listUninitializedBackups,
getBackedUpBlobHashes,
unsetBackedUpBlobHashes,
getHashesFromFileTree,
}

View File

@@ -7,6 +7,7 @@ import {
getProjectChunks,
getLatestChunkMetadata,
create,
getBackend,
} from '../lib/chunk_store/index.js'
import { client } from '../lib/mongodb.js'
import redis from '../lib/redis.js'
@@ -27,6 +28,7 @@ import {
updatePendingChangeTimestamp,
getBackedUpBlobHashes,
unsetBackedUpBlobHashes,
getHashesFromFileTree,
} from '../lib/backup_store/index.js'
import { backupBlob, downloadBlobToDir } from '../lib/backupBlob.mjs'
import {
@@ -949,8 +951,19 @@ async function getBlobListing(historyId) {
*/
async function compareBackups(projectId, options, log = console.log) {
log(`Comparing backups for project ${projectId}`)
const { historyId } = await getBackupStatus(projectId)
// Convert any postgres history ids to mongo project ids
const backend = getBackend(projectId)
projectId = await backend.resolveHistoryIdToMongoProjectId(projectId)
const { historyId, rootFolder } = await getBackupStatus(projectId, {
includeRootFolder: true,
})
log(`Comparing backups for project ${projectId} historyId ${historyId}`)
const hashesFromFileTree = rootFolder
? getHashesFromFileTree(rootFolder)
: new Set()
const hashesFromHistory = new Set()
const chunks = await getProjectChunks(historyId)
const blobStore = new BlobStore(historyId)
const backupPersistorForProject = await backupPersistor.forProject(
@@ -1047,6 +1060,9 @@ async function compareBackups(projectId, options, log = console.log) {
throw new Error('interrupted')
}
// Track all the hashes in the history
hashesFromHistory.add(blob.hash)
if (GLOBAL_BLOBS.has(blob.hash)) {
const globalBlob = GLOBAL_BLOBS.get(blob.hash)
log(
@@ -1158,6 +1174,31 @@ async function compareBackups(projectId, options, log = console.log) {
}
}
if (gracefulShutdownInitiated) {
throw new Error('interrupted')
}
// Reconcile hashes in file tree with history
log(`Comparing file hashes from file tree with history`)
if (hashesFromFileTree.size > 0) {
for (const hash of hashesFromFileTree) {
const presentInHistory = hashesFromHistory.has(hash)
if (presentInHistory) {
log(` ✓ File tree hash ${hash} present in history`)
} else {
log(` ✗ File tree hash ${hash} not found in history`)
totalBlobsNotFound++
errors.push({
type: 'file-not-found',
historyId,
blobHash: hash,
error: `File tree hash ${hash} not found in history`,
})
}
}
} else {
log(` ✓ File tree does not contain any binary files`)
}
// Print summary
log('\nComparison Summary:')
log('==================')
@@ -1236,6 +1277,9 @@ async function compareProjectAndEmitResult(
return false
} catch (err) {
if (gracefulShutdownInitiated) {
throw err
}
console.log(`FAIL: ${projectId}`)
// Output buffered logs on error when verbose
@@ -1276,6 +1320,9 @@ async function compareProjectAndEmitResult(
case 'blob-size-mismatch':
console.log(`size-mismatch: ${projectId},${historyId},${blobHash}`)
break
case 'file-not-found':
console.log(`file-not-found: ${projectId},${historyId},${blobHash}`)
break
case 'chunk-mismatch':
console.log(`chunk-mismatch: ${projectId},${historyId},${chunkId}`)
break