From e62756c6ca575a16ae701922891856ebe5b7efa6 Mon Sep 17 00:00:00 2001 From: Matthew Jackson Date: Fri, 6 Mar 2026 10:01:53 -0800 Subject: [PATCH] =?UTF-8?q?fix:=20robustness=20improvements=20=E2=80=94=20?= =?UTF-8?q?atomic=20writes,=20timeouts,=20shell=20injection,=20validation?= =?UTF-8?q?=20errors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Atomic JSON writes (write-to-tmp + rename) prevent queue/log corruption - Per-job (3min) and overall run (45min) timeouts prevent hangs - execFileSync in ai_answer.mjs prevents shell injection with resume paths - Validation error detection after form fill in Easy Apply modal - Config-driven enabled_apply_types (from settings.json) - isRequired() detects required/aria-required/label * patterns - getLabel() strips trailing * from required field labels - Actionable logging on failures ("Action: ..." messages) Co-Authored-By: Claude Opus 4.6 --- job_applier.mjs | 23 +++++++++++++++++------ lib/ai_answer.mjs | 23 ++++++++++++++--------- lib/apply/easy_apply.mjs | 29 ++++++++++++++++++++++++++--- lib/constants.mjs | 4 ++++ lib/form_filler.mjs | 28 +++++++++++++++++++++++----- lib/queue.mjs | 18 +++++++++++++++--- 6 files changed, 99 insertions(+), 26 deletions(-) diff --git a/job_applier.mjs b/job_applier.mjs index 704ab8c..6f69ff2 100644 --- a/job_applier.mjs +++ b/job_applier.mjs @@ -20,11 +20,11 @@ import { applyToJob, supportedTypes } from './lib/apply/index.mjs'; import { sendTelegram, formatApplySummary } from './lib/notify.mjs'; import { generateAnswer } from './lib/ai_answer.mjs'; import { - APPLY_BETWEEN_DELAY_BASE, APPLY_BETWEEN_DELAY_JITTER, DEFAULT_MAX_RETRIES + APPLY_BETWEEN_DELAY_BASE, APPLY_BETWEEN_DELAY_JITTER, DEFAULT_MAX_RETRIES, + APPLY_RUN_TIMEOUT_MS, PER_JOB_TIMEOUT_MS } from './lib/constants.mjs'; -// Which apply types are currently enabled -const ENABLED_APPLY_TYPES = ['easy_apply']; +const DEFAULT_ENABLED_APPLY_TYPES = ['easy_apply']; const isPreview = process.argv.includes('--preview'); @@ -41,6 +41,7 @@ async function main() { const formFiller = new FormFiller(profile, answers); const maxApps = settings.max_applications_per_run || Infinity; const maxRetries = settings.max_retries ?? DEFAULT_MAX_RETRIES; + const enabledTypes = settings.enabled_apply_types || DEFAULT_ENABLED_APPLY_TYPES; const apiKey = process.env.ANTHROPIC_API_KEY || settings.anthropic_api_key; const startedAt = Date.now(); @@ -72,14 +73,14 @@ async function main() { // Get + sort jobs — only enabled apply types const allJobs = getJobsByStatus(['new', 'needs_answer']) - .filter(j => ENABLED_APPLY_TYPES.includes(j.apply_type)) + .filter(j => enabledTypes.includes(j.apply_type)) .sort((a, b) => { const ap = APPLY_PRIORITY.indexOf(a.apply_type ?? 'unknown_external'); const bp = APPLY_PRIORITY.indexOf(b.apply_type ?? 'unknown_external'); return (ap === -1 ? 99 : ap) - (bp === -1 ? 99 : bp); }); const jobs = allJobs.slice(0, maxApps); - console.log(`Enabled types: ${ENABLED_APPLY_TYPES.join(', ')}\n`); + console.log(`Enabled types: ${enabledTypes.join(', ')}\n`); results.total = jobs.length; if (jobs.length === 0) { console.log('Nothing to apply to. Run job_searcher.mjs first.'); return; } @@ -128,8 +129,18 @@ async function main() { console.log(` → [${job.apply_type}] ${job.title} @ ${job.company || '?'}`); + // Check overall run timeout + if (Date.now() - startedAt > APPLY_RUN_TIMEOUT_MS) { + console.log(` ⏱️ Run timeout (${Math.round(APPLY_RUN_TIMEOUT_MS / 60000)}min) — stopping`); + break; + } + try { - const result = await applyToJob(browser.page, job, formFiller); + // Per-job timeout — prevents a single hung browser from blocking the run + const result = await Promise.race([ + applyToJob(browser.page, job, formFiller), + new Promise((_, reject) => setTimeout(() => reject(new Error('Job apply timed out')), PER_JOB_TIMEOUT_MS)), + ]); await handleResult(job, result, results, settings, profile, apiKey); } catch (e) { console.error(` ❌ Error: ${e.message}`); diff --git a/lib/ai_answer.mjs b/lib/ai_answer.mjs index c195098..04c1665 100644 --- a/lib/ai_answer.mjs +++ b/lib/ai_answer.mjs @@ -16,20 +16,25 @@ import { ANTHROPIC_API_URL } from './constants.mjs'; export async function generateAnswer(question, profile, apiKey, job = {}) { if (!apiKey) return null; - // Read resume text if available + // Read resume text if available — try pdftotext for PDFs, fall back to raw read let resumeText = ''; if (profile.resume_path && existsSync(profile.resume_path)) { - try { - // Try to read as text — PDF will be garbled but still useful for key facts - // If pdftotext is available, use it; otherwise skip - const { execSync } = await import('child_process'); + // Only attempt pdftotext for .pdf files + if (profile.resume_path.toLowerCase().endsWith('.pdf')) { try { - resumeText = execSync(`pdftotext "${profile.resume_path}" -`, { timeout: 5000 }).toString().slice(0, 4000); + const { execFileSync } = await import('child_process'); + // execFileSync avoids shell injection — args passed as array, not interpolated + resumeText = execFileSync('pdftotext', [profile.resume_path, '-'], { timeout: 3000 }).toString().slice(0, 4000); } catch { - // pdftotext not available — skip resume text + // pdftotext not available or failed — skip + } + } else { + // Plain text resume + try { + resumeText = readFileSync(profile.resume_path, 'utf8').slice(0, 4000); + } catch { + // ignore } - } catch { - // ignore } } diff --git a/lib/apply/easy_apply.mjs b/lib/apply/easy_apply.mjs index 0cfa677..11d7676 100644 --- a/lib/apply/easy_apply.mjs +++ b/lib/apply/easy_apply.mjs @@ -83,7 +83,9 @@ export async function apply(page, job, formFiller) { Array.from(document.querySelectorAll('[aria-label*="Easy Apply"], [aria-label*="Apply"]')) .map(el => ({ tag: el.tagName, aria: el.getAttribute('aria-label'), visible: el.offsetParent !== null })) ).catch(() => []); - console.log(` ℹ️ No Easy Apply element found. Apply-related elements: ${JSON.stringify(applyEls)}`); + console.log(` ℹ️ No Easy Apply button found. Page URL: ${page.url()}`); + console.log(` ℹ️ Apply-related elements on page: ${JSON.stringify(applyEls)}`); + console.log(` Action: job may have been removed, filled, or changed to external apply`); return { status: 'skipped_easy_apply_unsupported', meta }; } @@ -97,7 +99,11 @@ export async function apply(page, job, formFiller) { // Click Easy Apply and wait for modal to appear await page.click(LINKEDIN_APPLY_BUTTON_SELECTOR, { timeout: APPLY_CLICK_TIMEOUT }).catch(() => {}); const modal = await page.waitForSelector(LINKEDIN_EASY_APPLY_MODAL_SELECTOR, { timeout: 8000 }).catch(() => null); - if (!modal) return { status: 'no_modal', meta }; + if (!modal) { + console.log(` ❌ Modal did not open after clicking Easy Apply`); + console.log(` Action: LinkedIn may have changed the modal structure or login expired`); + return { status: 'no_modal', meta }; + } const MODAL = LINKEDIN_EASY_APPLY_MODAL_SELECTOR; @@ -146,6 +152,22 @@ export async function apply(page, job, formFiller) { await page.waitForTimeout(MODAL_STEP_WAIT); + // Check for validation errors after form fill — if LinkedIn shows errors, + // the form won't advance. Re-check errors AFTER fill since fill may have resolved them. + const postFillErrors = await page.evaluate((sel) => { + const modal = document.querySelector(sel); + if (!modal) return []; + return Array.from(modal.querySelectorAll('[class*="error"], [aria-invalid="true"], .artdeco-inline-feedback--error')) + .map(e => e.textContent?.trim().slice(0, 80)).filter(Boolean); + }, MODAL).catch(() => []); + + if (postFillErrors.length > 0) { + console.log(` [step ${step}] ❌ Validation errors after fill: ${JSON.stringify(postFillErrors)}`); + console.log(` Action: check answers.json or profile.json for missing/wrong answers`); + await dismissModal(page, MODAL); + return { status: 'incomplete', meta, validation_errors: postFillErrors }; + } + // --- Button check order: Next → Review → Submit --- // Check Next first — only fall through to Submit when there's no forward navigation. // This prevents accidentally clicking a Submit-like element on early modal steps. @@ -195,7 +217,8 @@ export async function apply(page, job, formFiller) { return { status: 'stuck', meta }; } - console.log(` [step ${step}] no Next/Review/Submit found — breaking`); + console.log(` [step ${step}] ❌ No Next/Review/Submit button found in modal`); + console.log(` Action: LinkedIn may have changed button text/structure. Check button snapshot above.`); break; } diff --git a/lib/constants.mjs b/lib/constants.mjs index 2156000..e45c1fa 100644 --- a/lib/constants.mjs +++ b/lib/constants.mjs @@ -87,3 +87,7 @@ export const EXTERNAL_ATS_PATTERNS = [ // --- Queue --- export const DEFAULT_MAX_RETRIES = 2; + +// --- Run limits --- +export const APPLY_RUN_TIMEOUT_MS = 45 * 60 * 1000; // 45 minutes +export const PER_JOB_TIMEOUT_MS = 3 * 60 * 1000; // 3 minutes per job diff --git a/lib/form_filler.mjs b/lib/form_filler.mjs index c82ff77..75e2bf0 100644 --- a/lib/form_filler.mjs +++ b/lib/form_filler.mjs @@ -119,10 +119,30 @@ export class FormFiller { const ariaLabel = node.getAttribute('aria-label') || ''; const ariaLabelledBy = node.getAttribute('aria-labelledby'); const linked = ariaLabelledBy ? document.getElementById(ariaLabelledBy)?.textContent?.trim() : ''; - return forLabel || ariaLabel || linked || node.placeholder || node.name || ''; + // Clean up — remove trailing * from required field labels + const raw = forLabel || ariaLabel || linked || node.placeholder || node.name || ''; + return raw.replace(/\s*\*\s*$/, '').trim(); }).catch(() => ''); } + /** + * Check if a form element is required. + * LinkedIn uses multiple patterns: required attribute, aria-required, or * in label. + */ + async isRequired(el) { + return await el.evaluate(node => { + if (node.required || node.getAttribute('required') !== null) return true; + if (node.getAttribute('aria-required') === 'true') return true; + // Check if the associated label contains * + const id = node.id; + if (id) { + const label = document.querySelector(`label[for="${id}"]`); + if (label && label.textContent.includes('*')) return true; + } + return false; + }).catch(() => false); + } + /** * Select the first option from an autocomplete dropdown. * Waits for the dropdown to appear, then clicks the first option. @@ -181,8 +201,7 @@ export class FormFiller { await this.selectAutocomplete(page, inp); } } else if (!answer) { - const required = await inp.getAttribute('required').catch(() => null); - if (required !== null) unknown.push(lbl); + if (await this.isRequired(inp)) unknown.push(lbl); } } @@ -196,8 +215,7 @@ export class FormFiller { if (answer) { await ta.fill(answer).catch(() => {}); } else { - const required = await ta.getAttribute('required').catch(() => null); - if (required !== null) unknown.push(lbl); + if (await this.isRequired(ta)) unknown.push(lbl); } } diff --git a/lib/queue.mjs b/lib/queue.mjs index 5f5f2c5..e5abad0 100644 --- a/lib/queue.mjs +++ b/lib/queue.mjs @@ -3,7 +3,7 @@ * Handles jobs_queue.json read/write/update * Uses in-memory cache to avoid redundant disk I/O within a run. */ -import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; +import { readFileSync, writeFileSync, renameSync, existsSync, mkdirSync } from 'fs'; import { dirname, resolve } from 'path'; import { fileURLToPath } from 'url'; @@ -37,6 +37,17 @@ function ensureDir(path) { if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); } +/** + * Atomic write — writes to a temp file then renames. + * Prevents corruption if two processes write simultaneously or the process + * crashes mid-write. rename() is atomic on POSIX filesystems. + */ +function atomicWriteJSON(filePath, data) { + const tmp = filePath + '.tmp'; + writeFileSync(tmp, JSON.stringify(data, null, 2)); + renameSync(tmp, filePath); +} + // --- In-memory caches (populated on first read, invalidated on write) --- let _queueCache = null; let _logCache = null; @@ -50,7 +61,7 @@ export function loadQueue() { export function saveQueue(jobs) { ensureDir(QUEUE_PATH); - writeFileSync(QUEUE_PATH, JSON.stringify(jobs, null, 2)); + atomicWriteJSON(QUEUE_PATH, jobs); _queueCache = jobs; } @@ -62,7 +73,8 @@ function loadLog() { } function saveLog(log) { - writeFileSync(LOG_PATH, JSON.stringify(log, null, 2)); + ensureDir(LOG_PATH); + atomicWriteJSON(LOG_PATH, log); _logCache = log; }