From 76c9d0df31a119b02fde93ab4dc29fa72fd91acd Mon Sep 17 00:00:00 2001 From: Matthew Jackson Date: Fri, 6 Mar 2026 22:07:31 -0800 Subject: [PATCH] Add S3 binary file support and resume download - ensureLocalFile() downloads binary files (resume PDF) from S3 to temp - Applier downloads resume from S3 before applying - Cached in /tmp to avoid re-downloading each run Co-Authored-By: Claude Opus 4.6 --- job_applier.mjs | 7 +++++++ lib/storage.mjs | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/job_applier.mjs b/job_applier.mjs index 050fecf..994491d 100644 --- a/job_applier.mjs +++ b/job_applier.mjs @@ -20,6 +20,7 @@ process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return orig process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); }; import { getJobsByStatus, updateJobStatus, appendLog, loadConfig, isAlreadyApplied, initQueue } from './lib/queue.mjs'; +import { ensureLocalFile } from './lib/storage.mjs'; import { acquireLock } from './lib/lock.mjs'; import { createBrowser } from './lib/browser.mjs'; import { ensureAuth } from './lib/session.mjs'; @@ -45,6 +46,12 @@ async function main() { const settings = loadConfig(resolve(__dir, 'config/settings.json')); await initQueue(settings); const profile = loadConfig(resolve(__dir, 'config/profile.json')); + + // Ensure resume is available locally (downloads from S3 if needed) + if (profile.resume_path) { + profile.resume_path = await ensureLocalFile('config/Matthew_Jackson_Resume.pdf', profile.resume_path); + } + const answersPath = resolve(__dir, 'config/answers.json'); const answers = existsSync(answersPath) ? loadConfig(answersPath) : []; const maxApps = settings.max_applications_per_run || Infinity; diff --git a/lib/storage.mjs b/lib/storage.mjs index 81dc4fa..e615739 100644 --- a/lib/storage.mjs +++ b/lib/storage.mjs @@ -2,7 +2,7 @@ * storage.mjs — Pluggable data storage (local disk or S3) * * When type is "local": reads/writes go to local disk (default). - * When type is "s3": S3 is the primary store. No local files for data. + * When type is "s3": S3 is the primary store. * - Versioned bucket means every write is recoverable. * - In-memory cache in queue.mjs handles read performance. * @@ -10,8 +10,10 @@ * storage: { type: "s3", bucket: "claw-apply-data", region: "us-west-2" } * storage: { type: "local" } (default) */ -import { readFileSync, writeFileSync, existsSync } from 'fs'; -import { basename } from 'path'; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; +import { basename, dirname } from 'path'; +import { tmpdir } from 'os'; +import { join } from 'path'; let _s3Client = null; let _config = { type: 'local' }; @@ -101,8 +103,49 @@ export async function saveJSON(filePath, data) { } // Local storage — atomic write + const dir = dirname(filePath); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); const tmp = filePath + '.tmp'; writeFileSync(tmp, body); const { renameSync } = await import('fs'); renameSync(tmp, filePath); } + +/** + * Load a binary file (e.g. resume PDF) from storage. + * For S3: downloads to a temp file and returns the local path. + * For local: returns the path as-is (must already exist). + * + * @param {string} s3Key — S3 key (e.g. "config/Matthew_Jackson_Resume.pdf") + * @param {string} localPath — local file path (used as-is for local storage) + * @returns {string} — local file path (may be temp file for S3) + */ +export async function ensureLocalFile(s3Key, localPath) { + if (_config.type !== 's3') { + return localPath; + } + + // If the file already exists locally (cached from previous download), use it + const tempPath = join(tmpdir(), basename(s3Key)); + if (existsSync(tempPath)) return tempPath; + + try { + const s3 = await getS3Client(); + const response = await s3.client.send(new s3.GetObjectCommand({ + Bucket: _config.bucket, + Key: s3Key, + })); + const chunks = []; + for await (const chunk of response.Body) { + chunks.push(chunk); + } + writeFileSync(tempPath, Buffer.concat(chunks)); + console.log(`📄 Downloaded ${basename(s3Key)} from S3 (${chunks.reduce((s, c) => s + c.length, 0)} bytes)`); + return tempPath; + } catch (err) { + console.warn(`⚠️ Failed to download ${s3Key} from S3: ${err.message}`); + // Fall back to local path if it exists + if (existsSync(localPath)) return localPath; + throw new Error(`File not found: ${s3Key} (S3) or ${localPath} (local)`); + } +}