fix: Implement distribute locker in Redis for cron jobs
Some checks failed
Deploy to Staging / Build Images (push) Failing after 30s
Deploy to Staging / Deploy to Staging (push) Has been skipped
Deploy to Staging / Verify Staging (push) Has been skipped
Deploy to Staging / Notify Staging Ready (push) Has been skipped
Deploy to Staging / Notify Staging Failure (push) Successful in 6s

This commit is contained in:
Eric Gullickson
2026-01-01 11:02:54 -06:00
parent ffd8ecd1d0
commit d8ea0c7297
6 changed files with 271 additions and 6 deletions

View File

@@ -111,7 +111,23 @@ Backups are stored in `/app/data/backups/` (mapped to `./data/backups/` on host)
Jobs are registered in `backend/src/core/scheduler/index.ts`:
- Backup check: Every minute
- Retention cleanup: Daily at 4 AM
- Retention cleanup: Daily at 4 AM (also runs after each scheduled backup)
### Distributed Locking
Scheduled backups use Redis distributed locking to prevent duplicate backups when multiple backend containers are running (blue-green deployments).
**Lock behavior:**
- Lock key: `backup:schedule:{schedule_id}`
- Lock TTL: 5 minutes (auto-release if container crashes)
- Only one container creates the backup; others skip
**Retention cleanup:**
- Runs immediately after each successful scheduled backup
- Deletes backups exceeding the schedule's retention count
- Also runs globally at 4 AM daily as a safety net
See `backend/src/core/scheduler/README.md` for the distributed locking pattern.
### Admin Routes

View File

@@ -1,12 +1,16 @@
/**
* @ai-summary Job for processing scheduled backups
* @ai-context Runs every minute to check for due scheduled backups
* @ai-context Uses distributed locking to prevent duplicate backups in blue-green deployments
*/
import { Pool } from 'pg';
import { v4 as uuidv4 } from 'uuid';
import { logger } from '../../../core/logging/logger';
import { lockService } from '../../../core/config/redis';
import { BackupRepository } from '../data/backup.repository';
import { BackupService } from '../domain/backup.service';
import { BackupRetentionService } from '../domain/backup-retention.service';
import { ScheduledBackupJobResult, BackupFrequency } from '../domain/backup.types';
let pool: Pool | null = null;
@@ -18,8 +22,12 @@ export function setBackupJobPool(dbPool: Pool): void {
pool = dbPool;
}
// Lock TTL: 5 minutes (backup should complete well within this)
const BACKUP_LOCK_TTL_SECONDS = 300;
/**
* Processes all scheduled backups that are due
* Uses distributed locking to prevent duplicate backups across containers
*/
export async function processScheduledBackups(): Promise<ScheduledBackupJobResult> {
if (!pool) {
@@ -28,6 +36,7 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
const repository = new BackupRepository(pool);
const backupService = new BackupService(pool);
const retentionService = new BackupRetentionService(pool);
const result: ScheduledBackupJobResult = {
processed: 0,
@@ -48,6 +57,21 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
logger.info('Processing scheduled backups', { count: dueSchedules.length });
for (const schedule of dueSchedules) {
// Generate unique lock value for this execution
const lockKey = `backup:schedule:${schedule.id}`;
const lockValue = uuidv4();
// Try to acquire lock for this schedule
const lockAcquired = await lockService.acquireLock(lockKey, BACKUP_LOCK_TTL_SECONDS, lockValue);
if (!lockAcquired) {
logger.debug('Backup already in progress for schedule, skipping', {
scheduleId: schedule.id,
scheduleName: schedule.name,
});
continue;
}
result.processed++;
try {
@@ -71,6 +95,28 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
scheduleId: schedule.id,
backupId: backupResult.backupId,
});
// Run retention cleanup for this schedule immediately after successful backup
try {
const retentionResult = await retentionService.cleanupScheduleBackups(
schedule.id,
schedule.name,
schedule.retentionCount
);
if (retentionResult.deletedCount > 0) {
logger.info('Retention cleanup completed after backup', {
scheduleId: schedule.id,
deletedCount: retentionResult.deletedCount,
freedBytes: retentionResult.freedBytes,
});
}
} catch (retentionError) {
logger.error('Retention cleanup failed after backup', {
scheduleId: schedule.id,
error: retentionError instanceof Error ? retentionError.message : String(retentionError),
});
// Don't fail the overall backup for retention errors
}
} else {
result.failed++;
result.errors.push({
@@ -103,6 +149,9 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
} catch {
// Ignore error updating next run
}
} finally {
// Always release the lock
await lockService.releaseLock(lockKey, lockValue);
}
}