Add S3 binary file support and resume download

- ensureLocalFile() downloads binary files (resume PDF) from S3 to temp
- Applier downloads resume from S3 before applying
- Cached in /tmp to avoid re-downloading each run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 22:07:31 -08:00
parent 534d318953
commit 76c9d0df31
2 changed files with 53 additions and 3 deletions

View File

@@ -20,6 +20,7 @@ process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return orig
process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); };
import { getJobsByStatus, updateJobStatus, appendLog, loadConfig, isAlreadyApplied, initQueue } from './lib/queue.mjs';
import { ensureLocalFile } from './lib/storage.mjs';
import { acquireLock } from './lib/lock.mjs';
import { createBrowser } from './lib/browser.mjs';
import { ensureAuth } from './lib/session.mjs';
@@ -45,6 +46,12 @@ async function main() {
const settings = loadConfig(resolve(__dir, 'config/settings.json'));
await initQueue(settings);
const profile = loadConfig(resolve(__dir, 'config/profile.json'));
// Ensure resume is available locally (downloads from S3 if needed)
if (profile.resume_path) {
profile.resume_path = await ensureLocalFile('config/Matthew_Jackson_Resume.pdf', profile.resume_path);
}
const answersPath = resolve(__dir, 'config/answers.json');
const answers = existsSync(answersPath) ? loadConfig(answersPath) : [];
const maxApps = settings.max_applications_per_run || Infinity;

View File

@@ -2,7 +2,7 @@
* storage.mjs — Pluggable data storage (local disk or S3)
*
* When type is "local": reads/writes go to local disk (default).
* When type is "s3": S3 is the primary store. No local files for data.
* When type is "s3": S3 is the primary store.
* - Versioned bucket means every write is recoverable.
* - In-memory cache in queue.mjs handles read performance.
*
@@ -10,8 +10,10 @@
* storage: { type: "s3", bucket: "claw-apply-data", region: "us-west-2" }
* storage: { type: "local" } (default)
*/
import { readFileSync, writeFileSync, existsSync } from 'fs';
import { basename } from 'path';
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { basename, dirname } from 'path';
import { tmpdir } from 'os';
import { join } from 'path';
let _s3Client = null;
let _config = { type: 'local' };
@@ -101,8 +103,49 @@ export async function saveJSON(filePath, data) {
}
// Local storage — atomic write
const dir = dirname(filePath);
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
const tmp = filePath + '.tmp';
writeFileSync(tmp, body);
const { renameSync } = await import('fs');
renameSync(tmp, filePath);
}
/**
* Load a binary file (e.g. resume PDF) from storage.
* For S3: downloads to a temp file and returns the local path.
* For local: returns the path as-is (must already exist).
*
* @param {string} s3Key — S3 key (e.g. "config/Matthew_Jackson_Resume.pdf")
* @param {string} localPath — local file path (used as-is for local storage)
* @returns {string} — local file path (may be temp file for S3)
*/
export async function ensureLocalFile(s3Key, localPath) {
if (_config.type !== 's3') {
return localPath;
}
// If the file already exists locally (cached from previous download), use it
const tempPath = join(tmpdir(), basename(s3Key));
if (existsSync(tempPath)) return tempPath;
try {
const s3 = await getS3Client();
const response = await s3.client.send(new s3.GetObjectCommand({
Bucket: _config.bucket,
Key: s3Key,
}));
const chunks = [];
for await (const chunk of response.Body) {
chunks.push(chunk);
}
writeFileSync(tempPath, Buffer.concat(chunks));
console.log(`📄 Downloaded ${basename(s3Key)} from S3 (${chunks.reduce((s, c) => s + c.length, 0)} bytes)`);
return tempPath;
} catch (err) {
console.warn(`⚠️ Failed to download ${s3Key} from S3: ${err.message}`);
// Fall back to local path if it exists
if (existsSync(localPath)) return localPath;
throw new Error(`File not found: ${s3Key} (S3) or ${localPath} (local)`);
}
}