fix: Implement distribute locker in Redis for cron jobs
Some checks failed
Deploy to Staging / Build Images (push) Failing after 30s
Deploy to Staging / Deploy to Staging (push) Has been skipped
Deploy to Staging / Verify Staging (push) Has been skipped
Deploy to Staging / Notify Staging Ready (push) Has been skipped
Deploy to Staging / Notify Staging Failure (push) Successful in 6s
Some checks failed
Deploy to Staging / Build Images (push) Failing after 30s
Deploy to Staging / Deploy to Staging (push) Has been skipped
Deploy to Staging / Verify Staging (push) Has been skipped
Deploy to Staging / Notify Staging Ready (push) Has been skipped
Deploy to Staging / Notify Staging Failure (push) Successful in 6s
This commit is contained in:
@@ -111,7 +111,23 @@ Backups are stored in `/app/data/backups/` (mapped to `./data/backups/` on host)
|
||||
|
||||
Jobs are registered in `backend/src/core/scheduler/index.ts`:
|
||||
- Backup check: Every minute
|
||||
- Retention cleanup: Daily at 4 AM
|
||||
- Retention cleanup: Daily at 4 AM (also runs after each scheduled backup)
|
||||
|
||||
### Distributed Locking
|
||||
|
||||
Scheduled backups use Redis distributed locking to prevent duplicate backups when multiple backend containers are running (blue-green deployments).
|
||||
|
||||
**Lock behavior:**
|
||||
- Lock key: `backup:schedule:{schedule_id}`
|
||||
- Lock TTL: 5 minutes (auto-release if container crashes)
|
||||
- Only one container creates the backup; others skip
|
||||
|
||||
**Retention cleanup:**
|
||||
- Runs immediately after each successful scheduled backup
|
||||
- Deletes backups exceeding the schedule's retention count
|
||||
- Also runs globally at 4 AM daily as a safety net
|
||||
|
||||
See `backend/src/core/scheduler/README.md` for the distributed locking pattern.
|
||||
|
||||
### Admin Routes
|
||||
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
/**
|
||||
* @ai-summary Job for processing scheduled backups
|
||||
* @ai-context Runs every minute to check for due scheduled backups
|
||||
* @ai-context Uses distributed locking to prevent duplicate backups in blue-green deployments
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { logger } from '../../../core/logging/logger';
|
||||
import { lockService } from '../../../core/config/redis';
|
||||
import { BackupRepository } from '../data/backup.repository';
|
||||
import { BackupService } from '../domain/backup.service';
|
||||
import { BackupRetentionService } from '../domain/backup-retention.service';
|
||||
import { ScheduledBackupJobResult, BackupFrequency } from '../domain/backup.types';
|
||||
|
||||
let pool: Pool | null = null;
|
||||
@@ -18,8 +22,12 @@ export function setBackupJobPool(dbPool: Pool): void {
|
||||
pool = dbPool;
|
||||
}
|
||||
|
||||
// Lock TTL: 5 minutes (backup should complete well within this)
|
||||
const BACKUP_LOCK_TTL_SECONDS = 300;
|
||||
|
||||
/**
|
||||
* Processes all scheduled backups that are due
|
||||
* Uses distributed locking to prevent duplicate backups across containers
|
||||
*/
|
||||
export async function processScheduledBackups(): Promise<ScheduledBackupJobResult> {
|
||||
if (!pool) {
|
||||
@@ -28,6 +36,7 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
|
||||
|
||||
const repository = new BackupRepository(pool);
|
||||
const backupService = new BackupService(pool);
|
||||
const retentionService = new BackupRetentionService(pool);
|
||||
|
||||
const result: ScheduledBackupJobResult = {
|
||||
processed: 0,
|
||||
@@ -48,6 +57,21 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
|
||||
logger.info('Processing scheduled backups', { count: dueSchedules.length });
|
||||
|
||||
for (const schedule of dueSchedules) {
|
||||
// Generate unique lock value for this execution
|
||||
const lockKey = `backup:schedule:${schedule.id}`;
|
||||
const lockValue = uuidv4();
|
||||
|
||||
// Try to acquire lock for this schedule
|
||||
const lockAcquired = await lockService.acquireLock(lockKey, BACKUP_LOCK_TTL_SECONDS, lockValue);
|
||||
|
||||
if (!lockAcquired) {
|
||||
logger.debug('Backup already in progress for schedule, skipping', {
|
||||
scheduleId: schedule.id,
|
||||
scheduleName: schedule.name,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
result.processed++;
|
||||
|
||||
try {
|
||||
@@ -71,6 +95,28 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
|
||||
scheduleId: schedule.id,
|
||||
backupId: backupResult.backupId,
|
||||
});
|
||||
|
||||
// Run retention cleanup for this schedule immediately after successful backup
|
||||
try {
|
||||
const retentionResult = await retentionService.cleanupScheduleBackups(
|
||||
schedule.id,
|
||||
schedule.name,
|
||||
schedule.retentionCount
|
||||
);
|
||||
if (retentionResult.deletedCount > 0) {
|
||||
logger.info('Retention cleanup completed after backup', {
|
||||
scheduleId: schedule.id,
|
||||
deletedCount: retentionResult.deletedCount,
|
||||
freedBytes: retentionResult.freedBytes,
|
||||
});
|
||||
}
|
||||
} catch (retentionError) {
|
||||
logger.error('Retention cleanup failed after backup', {
|
||||
scheduleId: schedule.id,
|
||||
error: retentionError instanceof Error ? retentionError.message : String(retentionError),
|
||||
});
|
||||
// Don't fail the overall backup for retention errors
|
||||
}
|
||||
} else {
|
||||
result.failed++;
|
||||
result.errors.push({
|
||||
@@ -103,6 +149,9 @@ export async function processScheduledBackups(): Promise<ScheduledBackupJobResul
|
||||
} catch {
|
||||
// Ignore error updating next run
|
||||
}
|
||||
} finally {
|
||||
// Always release the lock
|
||||
await lockService.releaseLock(lockKey, lockValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user