Add S3-backed storage to prevent data loss

- New lib/storage.mjs: async S3 backup on every queue/log save
- Versioned S3 bucket (claw-apply-data) keeps every revision
- Auto-restore from S3 if local file is missing or corrupt
- saveQueue/saveLog now validate data type before writing
  (prevents the exact bug that corrupted the queue)
- IAM role attached to EC2 instance for credential-free S3 access
- Config: storage.type = "local" (default) or "s3"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 21:56:37 -08:00
parent c78586926a
commit 253d1888e9
7 changed files with 228 additions and 4 deletions

View File

@@ -2,10 +2,12 @@
* queue.mjs — Job queue management
* Handles jobs_queue.json read/write/update
* Uses in-memory cache to avoid redundant disk I/O within a run.
* S3-backed: every write syncs to versioned S3 bucket (never lose data).
*/
import { readFileSync, writeFileSync, appendFileSync, renameSync, unlinkSync, existsSync, mkdirSync } from 'fs';
import { dirname, resolve } from 'path';
import { fileURLToPath } from 'url';
import { initStorage, backupToS3, loadJSONSafe } from './storage.mjs';
const __dir = dirname(fileURLToPath(import.meta.url));
const QUEUE_PATH = `${__dir}/../data/jobs_queue.json`;
@@ -94,13 +96,62 @@ function applyPendingUpdates(queue) {
export function loadQueue() {
if (_queueCache) return _queueCache;
ensureDir(QUEUE_PATH);
_queueCache = existsSync(QUEUE_PATH) ? JSON.parse(readFileSync(QUEUE_PATH, 'utf8')) : [];
let data = null;
if (existsSync(QUEUE_PATH)) {
try {
const raw = readFileSync(QUEUE_PATH, 'utf8');
const parsed = JSON.parse(raw);
if (!Array.isArray(parsed)) throw new Error(`Expected array, got ${typeof parsed}`);
data = parsed;
} catch (err) {
console.warn(`⚠️ Queue file corrupt: ${err.message} — will attempt S3 restore`);
}
}
// S3 restore handled async at startup via initQueueFromS3() — for sync path, use empty array
_queueCache = data || [];
if (applyPendingUpdates(_queueCache)) {
saveQueue(_queueCache);
}
return _queueCache;
}
/**
* Async queue initialization with S3 fallback.
* Call once at startup before processing jobs.
*/
export async function initQueueFromS3(settings) {
initStorage(settings);
// If local queue is missing or empty, try S3
let needsRestore = false;
if (!existsSync(QUEUE_PATH)) {
needsRestore = true;
} else {
try {
const raw = readFileSync(QUEUE_PATH, 'utf8');
const parsed = JSON.parse(raw);
if (!Array.isArray(parsed)) needsRestore = true;
} catch {
needsRestore = true;
}
}
if (needsRestore) {
const restored = await loadJSONSafe(QUEUE_PATH, []);
if (Array.isArray(restored) && restored.length > 0) {
_queueCache = restored;
console.log(`✅ Queue restored from S3: ${restored.length} jobs`);
}
}
// Also ensure applications log is backed up
if (existsSync(LOG_PATH)) {
backupToS3(LOG_PATH);
}
}
/**
* Force a fresh read from disk + apply pending updates.
* Call this between iterations in long-running processes to pick up
@@ -112,9 +163,13 @@ export function reloadQueue() {
}
export function saveQueue(jobs) {
if (!Array.isArray(jobs)) {
throw new Error(`saveQueue: expected array, got ${typeof jobs} — refusing to write corrupt data`);
}
ensureDir(QUEUE_PATH);
atomicWriteJSON(QUEUE_PATH, jobs);
_queueCache = jobs;
backupToS3(QUEUE_PATH);
}
function loadLog() {
@@ -125,9 +180,13 @@ function loadLog() {
}
function saveLog(log) {
if (!Array.isArray(log)) {
throw new Error(`saveLog: expected array, got ${typeof log} — refusing to write corrupt data`);
}
ensureDir(LOG_PATH);
atomicWriteJSON(LOG_PATH, log);
_logCache = log;
backupToS3(LOG_PATH);
}
export function appendLog(entry) {