diff --git a/lib/apply/ashby.mjs b/lib/apply/ashby.mjs index f432514..5149453 100644 --- a/lib/apply/ashby.mjs +++ b/lib/apply/ashby.mjs @@ -1,82 +1,23 @@ /** - * ashby.mjs — Ashby ATS handler - * - * Ashby forms have a consistent structure: - * - URLs ending in /application land directly on the form - * - Other URLs show a job listing with "Apply for this Job" button - * - Form fields: Name, Email, Resume (file), optional extras (phone, LinkedIn, etc.) - * - Resume input has id="_systemfield_resume" - * - There's also an "autofill from resume" file input — don't confuse with actual resume - * - "Upload file" buttons are type="submit" — must target "Submit Application" specifically - * - Invisible reCAPTCHA on submit + * ashby.mjs — Ashby ATS handler (extends generic) */ -import { - NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT -} from '../constants.mjs'; +import { apply as genericApply } from './generic.mjs'; export const SUPPORTED_TYPES = ['ashby']; export async function apply(page, job, formFiller) { - const url = job.apply_url; - if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } }; - - const meta = { title: job.title, company: job.company }; - - // Navigate — append /application if not already there - const applyUrl = url.includes('/application') ? url : url.replace(/\/?(\?|$)/, '/application$1'); - await page.goto(applyUrl, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); - await page.waitForTimeout(PAGE_LOAD_WAIT); - - // Check if we landed on the form or a listing page - const hasForm = await page.$('#_systemfield_name, input[name="_systemfield_name"]'); - if (!hasForm) { - // Try clicking "Apply for this Job" - const applyBtn = page.locator('button:has-text("Apply for this Job"), a:has-text("Apply for this Job")').first(); - if (await applyBtn.count() === 0) return { status: 'no_button', meta }; - await applyBtn.click(); - await page.waitForTimeout(FORM_FILL_WAIT); - } - - // Check for closed listing - const closed = await page.evaluate(() => { - const text = (document.body.innerText || '').toLowerCase(); - return text.includes('no longer accepting') || text.includes('position has been filled') || - text.includes('no longer available') || text.includes('does not exist'); - }).catch(() => false); - if (closed) return { status: 'closed', meta }; - - // Fill form fields - const unknowns = await formFiller.fill(page, formFiller.profile.resume_path); - if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta }; - if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta }; - - // Upload resume to the correct file input (not the autofill one) - const resumeInput = await page.$('#_systemfield_resume'); - if (resumeInput && formFiller.profile.resume_path) { - await resumeInput.setInputFiles(formFiller.profile.resume_path).catch(() => {}); - await page.waitForTimeout(1000); - } - - // Click "Submit Application" specifically — NOT the "Upload file" buttons - const submitBtn = page.locator('button:has-text("Submit Application")').first(); - if (await submitBtn.count() === 0) return { status: 'no_submit', meta }; - - await submitBtn.click(); - await page.waitForTimeout(SUBMIT_WAIT); - - // Verify submission - const postSubmit = await page.evaluate(() => { - const text = (document.body.innerText || '').toLowerCase(); - return { - hasSuccess: text.includes('thank you') || text.includes('application submitted') || - text.includes('application received') || text.includes('successfully'), - hasForm: !!document.querySelector('#_systemfield_name'), - }; - }).catch(() => ({ hasSuccess: false, hasForm: false })); - - if (postSubmit.hasSuccess || !postSubmit.hasForm) { - return { status: 'submitted', meta }; - } - - return { status: 'incomplete', meta }; + return genericApply(page, job, formFiller, { + transformUrl: (url) => url.includes('/application') ? url : url.replace(/\/?(\?|$)/, '/application$1'), + formDetector: '#_systemfield_name', + applyButtonSelector: 'button:has-text("Apply for this Job"), a:has-text("Apply for this Job")', + submitSelector: 'button:has-text("Submit Application")', + verifySelector: '#_systemfield_name', + beforeSubmit: async (page, formFiller) => { + const resumeInput = await page.$('#_systemfield_resume'); + if (resumeInput && formFiller.profile.resume_path) { + await resumeInput.setInputFiles(formFiller.profile.resume_path).catch(() => {}); + await page.waitForTimeout(1000); + } + }, + }); } diff --git a/lib/apply/generic.mjs b/lib/apply/generic.mjs index 54baeff..29e9232 100644 --- a/lib/apply/generic.mjs +++ b/lib/apply/generic.mjs @@ -1,8 +1,19 @@ /** - * generic.mjs — Generic external ATS handler - * Best-effort form filler for any career page with a standard HTML form. - * Handles single-page and multi-step flows (up to 5 steps). - * Skips pages that require account creation or have CAPTCHAs. + * generic.mjs — Configurable external ATS handler + * + * Base apply logic for any career page. ATS-specific handlers pass + * an options object to customize URL transformation, selectors, + * resume targeting, and submission verification. + * + * Options: + * transformUrl(url) — modify the apply URL before navigation + * formDetector — CSS selector to detect if form is already loaded + * applyButtonSelector — selector for the "Apply" button on listing pages + * resumeSelector — CSS selector for the resume file input + * submitSelector — selector for the submit button (use locator syntax) + * verifySelector — CSS selector to check if form is still present after submit + * beforeSubmit(page) — async hook to run before clicking submit (e.g. upload resume) + * closedTexts — extra strings to detect closed listings */ import { NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT @@ -12,58 +23,85 @@ export const SUPPORTED_TYPES = ['unknown_external']; const MAX_STEPS = 5; -export async function apply(page, job, formFiller) { - const url = job.apply_url; +const DEFAULT_APPLY_BUTTONS = [ + 'a:has-text("Apply Now")', + 'button:has-text("Apply Now")', + 'a:has-text("Apply for this job")', + 'button:has-text("Apply for this job")', + 'a:has-text("Apply")', + 'button:has-text("Apply")', +].join(', '); + +const DEFAULT_SUBMIT_BUTTONS = [ + 'button:has-text("Submit Application")', + 'button:has-text("Submit your application")', + 'button:has-text("Apply Now")', + 'button:has-text("Apply for this job")', + 'input[type="submit"]:not([disabled])', + 'button[type="submit"]:not([disabled])', +].join(', '); + +const DEFAULT_NEXT_BUTTONS = [ + 'button:has-text("Next")', + 'button:has-text("Continue")', + 'button:has-text("Save and Continue")', + 'a:has-text("Next")', +].join(', '); + +const CLOSED_TEXTS = [ + 'no longer accepting', 'position has been filled', + 'this job is no longer', 'job not found', + 'this position is closed', 'listing has expired', + 'no longer available', 'page you are looking for', + 'job may be no longer', 'does not exist', + 'this role has been filled', 'posting has closed', +]; + +export async function apply(page, job, formFiller, opts = {}) { + const url = opts.transformUrl ? opts.transformUrl(job.apply_url) : job.apply_url; if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } }; + const meta = { title: job.title, company: job.company }; + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); await page.waitForTimeout(PAGE_LOAD_WAIT); - const meta = { - title: job.title, - company: job.company, - }; - - // Detect blockers: login walls, CAPTCHAs, closed listings - const pageCheck = await page.evaluate(() => { + // Detect blockers + const extraClosed = opts.closedTexts || []; + const pageCheck = await page.evaluate((extraClosed) => { const text = (document.body.innerText || '').toLowerCase(); const hasLogin = !!(document.querySelector('input[type="password"]') || (text.includes('sign in') && text.includes('create account')) || (text.includes('log in') && text.includes('register'))); - // Only block on visible CAPTCHAs — invisible reCAPTCHA (size=invisible) fires on submit and usually passes const captchaFrames = Array.from(document.querySelectorAll('iframe[src*="recaptcha"], iframe[src*="captcha"]')); - const hasVisibleCaptcha = captchaFrames.some(f => { + const hasCaptcha = captchaFrames.some(f => { if (f.src.includes('size=invisible')) return false; const rect = f.getBoundingClientRect(); return rect.width > 50 && rect.height > 50; }); - const hasCaptcha = hasVisibleCaptcha; - const isClosed = text.includes('no longer accepting') || text.includes('position has been filled') || - text.includes('this job is no longer') || text.includes('job not found') || - text.includes('this position is closed') || text.includes('listing has expired') || - text.includes('no longer available') || text.includes('page you are looking for') || - text.includes('job may be no longer') || text.includes('does not exist') || - text.includes('this role has been filled') || text.includes('posting has closed') || - document.title.toLowerCase().includes('404'); + const closedTexts = [ + 'no longer accepting', 'position has been filled', + 'this job is no longer', 'job not found', + 'this position is closed', 'listing has expired', + 'no longer available', 'page you are looking for', + 'job may be no longer', 'does not exist', + 'this role has been filled', 'posting has closed', + ...extraClosed, + ]; + const isClosed = closedTexts.some(t => text.includes(t)) || document.title.toLowerCase().includes('404'); return { hasLogin, hasCaptcha, isClosed }; - }).catch(() => ({})); + }, extraClosed).catch(() => ({})); if (pageCheck.isClosed) return { status: 'closed', meta }; if (pageCheck.hasLogin) return { status: 'skipped_login_required', meta }; if (pageCheck.hasCaptcha) return { status: 'skipped_captcha', meta }; - // Some pages land directly on the form; others need an Apply button click - // Check if we landed directly on a form (with or without
wrapper) - const hasFormAlready = await page.$('input[type="text"], input[type="email"], textarea'); + // Check if form is already loaded + const formSelector = opts.formDetector || 'input[type="text"], input[type="email"], textarea'; + const hasFormAlready = await page.$(formSelector); if (!hasFormAlready) { - const applyBtn = page.locator([ - 'a:has-text("Apply Now")', - 'button:has-text("Apply Now")', - 'a:has-text("Apply for this job")', - 'button:has-text("Apply for this job")', - 'a:has-text("Apply")', - 'button:has-text("Apply")', - ].join(', ')).first(); + const applySelector = opts.applyButtonSelector || DEFAULT_APPLY_BUTTONS; + const applyBtn = page.locator(applySelector).first(); if (await applyBtn.count() === 0) return { status: 'no_button', meta }; @@ -74,11 +112,9 @@ export async function apply(page, job, formFiller) { ]); if (newPage) { - // Apply opened a new tab — switch to it await newPage.waitForLoadState('domcontentloaded').catch(() => {}); await newPage.waitForTimeout(PAGE_LOAD_WAIT); - // Recursively handle the new page (but return result to caller) - return applyOnPage(newPage, job, formFiller, meta); + return fillAndSubmit(newPage, job, formFiller, meta, opts); } await page.waitForTimeout(FORM_FILL_WAIT); @@ -93,67 +129,56 @@ export async function apply(page, job, formFiller) { if (postClick.hasCaptcha) return { status: 'skipped_captcha', meta }; } - return applyOnPage(page, job, formFiller, meta); + return fillAndSubmit(page, job, formFiller, meta, opts); } -async function applyOnPage(page, job, formFiller, meta) { +async function fillAndSubmit(page, job, formFiller, meta, opts) { for (let step = 0; step < MAX_STEPS; step++) { - // Fill the current page/step const unknowns = await formFiller.fill(page, formFiller.profile.resume_path); if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta }; if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta }; - // Look for submit button — try specific text first, then generic type="submit" - const submitBtn = await page.$([ - 'button:has-text("Submit Application")', - 'button:has-text("Submit your application")', - 'button:has-text("Apply Now")', - 'button:has-text("Apply for this job")', - 'input[type="submit"]:not([disabled])', - 'button[type="submit"]:not([disabled])', - ].join(', ')); + // Hook: before submit (e.g. targeted resume upload) + if (opts.beforeSubmit) await opts.beforeSubmit(page, formFiller); - // Look for Next/Continue button (multi-step forms) - const nextBtn = !submitBtn ? await page.$([ - 'button:has-text("Next")', - 'button:has-text("Continue")', - 'button:has-text("Save and Continue")', - 'a:has-text("Next")', - ].join(', ')) : null; + // Find submit button + const submitSelector = opts.submitSelector || DEFAULT_SUBMIT_BUTTONS; + const submitBtn = page.locator(submitSelector).first(); + const hasSubmit = await submitBtn.count() > 0; - if (submitBtn) { + if (hasSubmit) { await submitBtn.click(); await page.waitForTimeout(SUBMIT_WAIT); - const postSubmit = await page.evaluate(() => { + const verifySelector = opts.verifySelector || 'form button[type="submit"]:not([disabled])'; + const postSubmit = await page.evaluate((vs) => { const text = (document.body.innerText || '').toLowerCase(); return { hasSuccess: text.includes('application submitted') || text.includes('successfully applied') || text.includes('thank you') || text.includes('application received') || text.includes('application has been') || text.includes('we received your'), - hasForm: !!document.querySelector('form button[type="submit"]:not([disabled])'), + hasForm: !!document.querySelector(vs), }; - }).catch(() => ({ hasSuccess: false, hasForm: false })); + }, verifySelector).catch(() => ({ hasSuccess: false, hasForm: false })); if (postSubmit.hasSuccess || !postSubmit.hasForm) { return { status: 'submitted', meta }; } - console.log(` [generic] Submit clicked but form still present — may not have submitted`); return { status: 'incomplete', meta }; } + // Multi-step: Next/Continue + const nextBtn = await page.$(DEFAULT_NEXT_BUTTONS); if (nextBtn) { await nextBtn.click(); await page.waitForTimeout(FORM_FILL_WAIT); - continue; // Fill next step + continue; } - // No submit or next button found return { status: 'no_submit', meta }; } - console.log(` [generic] Exceeded ${MAX_STEPS} form steps`); return { status: 'incomplete', meta }; } diff --git a/lib/apply/greenhouse.mjs b/lib/apply/greenhouse.mjs index d4fa1f8..9204489 100644 --- a/lib/apply/greenhouse.mjs +++ b/lib/apply/greenhouse.mjs @@ -1,11 +1,12 @@ /** - * greenhouse.mjs — Greenhouse ATS handler - * Delegates to generic handler — Greenhouse forms are standard HTML forms + * greenhouse.mjs — Greenhouse ATS handler (extends generic) */ import { apply as genericApply } from './generic.mjs'; export const SUPPORTED_TYPES = ['greenhouse']; export async function apply(page, job, formFiller) { - return genericApply(page, job, formFiller); + return genericApply(page, job, formFiller, { + submitSelector: 'button:has-text("Submit Application"), input[type="submit"]', + }); } diff --git a/lib/apply/index.mjs b/lib/apply/index.mjs index b8d98ad..78af178 100644 --- a/lib/apply/index.mjs +++ b/lib/apply/index.mjs @@ -1,7 +1,9 @@ /** * index.mjs — Apply handler registry - * Maps apply_type → handler module - * To add a new ATS: create lib/apply/.mjs and add one line here + * + * Two lookup mechanisms: + * 1. apply_type → handler (explicit ATS classification) + * 2. apply_url domain → handler (auto-detect from URL, fallback to generic) */ import * as easyApply from './easy_apply.mjs'; import * as greenhouse from './greenhouse.mjs'; @@ -31,12 +33,34 @@ for (const handler of ALL_HANDLERS) { } } +// Domain → handler mapping for URL-based auto-detection +// When apply_type is unknown_external, match apply_url against these patterns +const DOMAIN_REGISTRY = [ + { pattern: /ashbyhq\.com/i, handler: ashby }, + { pattern: /greenhouse\.io|grnh\.se/i, handler: greenhouse }, + { pattern: /lever\.co|jobs\.lever\.co/i, handler: lever }, + { pattern: /workday\.com|myworkdayjobs\.com|myworkdaysite\.com/i, handler: workday }, + { pattern: /jobvite\.com|applytojob\.com/i, handler: jobvite }, +]; + /** - * Get handler for a given apply_type - * Returns null if not supported + * Get handler for a job — checks apply_type first, then URL domain, then generic */ -export function getHandler(applyType) { - return REGISTRY[applyType] || null; +function resolveHandler(job) { + // Explicit type match + if (job.apply_type && REGISTRY[job.apply_type]) { + return REGISTRY[job.apply_type]; + } + + // Domain match from apply_url + if (job.apply_url) { + for (const { pattern, handler } of DOMAIN_REGISTRY) { + if (pattern.test(job.apply_url)) return handler; + } + } + + // Fallback to generic if it has a URL, otherwise unsupported + return job.apply_url ? generic : null; } /** @@ -46,23 +70,6 @@ export function supportedTypes() { return Object.keys(REGISTRY); } -/** - * Status normalization — handlers return platform-specific statuses, - * this map converts them to generic statuses that job_applier.mjs understands. - * - * Generic statuses (what handleResult expects): - * submitted — application was submitted successfully - * needs_answer — blocked on unknown form question, sent to Telegram - * skipped_recruiter_only — LinkedIn recruiter-only listing - * skipped_external_unsupported — external ATS not yet implemented - * skipped_no_apply — no apply button/modal/submit found on page - * skipped_honeypot — honeypot question detected, application abandoned - * stuck — modal progress stalled after retries - * incomplete — ran out of modal steps without submitting - * - * When adding a new handler, return any status you want — if it doesn't match - * a generic status above, add a mapping here so job_applier doesn't need to change. - */ const STATUS_MAP = { no_button: 'skipped_no_apply', no_submit: 'skipped_no_apply', @@ -74,7 +81,7 @@ const STATUS_MAP = { * Returns result object with normalized status */ export async function applyToJob(page, job, formFiller) { - const handler = getHandler(job.apply_type); + const handler = resolveHandler(job); if (!handler) { return { status: 'skipped_external_unsupported', diff --git a/lib/apply/jobvite.mjs b/lib/apply/jobvite.mjs index f82916d..fab936c 100644 --- a/lib/apply/jobvite.mjs +++ b/lib/apply/jobvite.mjs @@ -1,11 +1,12 @@ /** - * jobvite.mjs — Jobvite ATS handler - * Delegates to generic handler — Jobvite forms are standard HTML forms + * jobvite.mjs — Jobvite ATS handler (extends generic) */ import { apply as genericApply } from './generic.mjs'; export const SUPPORTED_TYPES = ['jobvite']; export async function apply(page, job, formFiller) { - return genericApply(page, job, formFiller); + return genericApply(page, job, formFiller, { + submitSelector: 'button:has-text("Submit"), input[type="submit"]', + }); } diff --git a/lib/apply/lever.mjs b/lib/apply/lever.mjs index 8a692cc..cdb5884 100644 --- a/lib/apply/lever.mjs +++ b/lib/apply/lever.mjs @@ -1,11 +1,13 @@ /** - * lever.mjs — Lever ATS handler - * Delegates to generic handler — Lever forms are standard HTML forms + * lever.mjs — Lever ATS handler (extends generic) */ import { apply as genericApply } from './generic.mjs'; export const SUPPORTED_TYPES = ['lever']; export async function apply(page, job, formFiller) { - return genericApply(page, job, formFiller); + return genericApply(page, job, formFiller, { + // Lever apply URLs already end in /apply + submitSelector: 'button:has-text("Submit application"), button[type="submit"]', + }); } diff --git a/lib/apply/workday.mjs b/lib/apply/workday.mjs index 507c10c..c020e6b 100644 --- a/lib/apply/workday.mjs +++ b/lib/apply/workday.mjs @@ -1,12 +1,13 @@ /** - * workday.mjs — Workday ATS handler - * Delegates to generic handler. Workday often requires account creation, - * so many will return skipped_login_required — that's expected. + * workday.mjs — Workday ATS handler (extends generic) + * Most Workday sites require account creation — generic will return skipped_login_required */ import { apply as genericApply } from './generic.mjs'; export const SUPPORTED_TYPES = ['workday']; export async function apply(page, job, formFiller) { - return genericApply(page, job, formFiller); + return genericApply(page, job, formFiller, { + closedTexts: ['this job posting is no longer active'], + }); }