Refactor apply handlers: generic with extensions + domain auto-routing

Generic handler now accepts options (transformUrl, formDetector,
submitSelector, resumeSelector, beforeSubmit, verifySelector, etc.).
Each ATS handler passes its overrides instead of reimplementing.

Registry resolves handlers by: apply_type -> URL domain -> generic fallback.
New ATS handlers only need to export SUPPORTED_TYPES and an apply() that
calls genericApply with platform-specific options.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 20:41:54 -08:00
parent 4f202a4e91
commit ae797d73eb
7 changed files with 155 additions and 177 deletions

View File

@@ -1,82 +1,23 @@
/**
* ashby.mjs — Ashby ATS handler
*
* Ashby forms have a consistent structure:
* - URLs ending in /application land directly on the form
* - Other URLs show a job listing with "Apply for this Job" button
* - Form fields: Name, Email, Resume (file), optional extras (phone, LinkedIn, etc.)
* - Resume input has id="_systemfield_resume"
* - There's also an "autofill from resume" file input — don't confuse with actual resume
* - "Upload file" buttons are type="submit" — must target "Submit Application" specifically
* - Invisible reCAPTCHA on submit
* ashby.mjs — Ashby ATS handler (extends generic)
*/
import {
NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT
} from '../constants.mjs';
import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['ashby'];
export async function apply(page, job, formFiller) {
const url = job.apply_url;
if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } };
const meta = { title: job.title, company: job.company };
// Navigate — append /application if not already there
const applyUrl = url.includes('/application') ? url : url.replace(/\/?(\?|$)/, '/application$1');
await page.goto(applyUrl, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(PAGE_LOAD_WAIT);
// Check if we landed on the form or a listing page
const hasForm = await page.$('#_systemfield_name, input[name="_systemfield_name"]');
if (!hasForm) {
// Try clicking "Apply for this Job"
const applyBtn = page.locator('button:has-text("Apply for this Job"), a:has-text("Apply for this Job")').first();
if (await applyBtn.count() === 0) return { status: 'no_button', meta };
await applyBtn.click();
await page.waitForTimeout(FORM_FILL_WAIT);
}
// Check for closed listing
const closed = await page.evaluate(() => {
const text = (document.body.innerText || '').toLowerCase();
return text.includes('no longer accepting') || text.includes('position has been filled') ||
text.includes('no longer available') || text.includes('does not exist');
}).catch(() => false);
if (closed) return { status: 'closed', meta };
// Fill form fields
const unknowns = await formFiller.fill(page, formFiller.profile.resume_path);
if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta };
if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta };
// Upload resume to the correct file input (not the autofill one)
const resumeInput = await page.$('#_systemfield_resume');
if (resumeInput && formFiller.profile.resume_path) {
await resumeInput.setInputFiles(formFiller.profile.resume_path).catch(() => {});
await page.waitForTimeout(1000);
}
// Click "Submit Application" specifically — NOT the "Upload file" buttons
const submitBtn = page.locator('button:has-text("Submit Application")').first();
if (await submitBtn.count() === 0) return { status: 'no_submit', meta };
await submitBtn.click();
await page.waitForTimeout(SUBMIT_WAIT);
// Verify submission
const postSubmit = await page.evaluate(() => {
const text = (document.body.innerText || '').toLowerCase();
return {
hasSuccess: text.includes('thank you') || text.includes('application submitted') ||
text.includes('application received') || text.includes('successfully'),
hasForm: !!document.querySelector('#_systemfield_name'),
};
}).catch(() => ({ hasSuccess: false, hasForm: false }));
if (postSubmit.hasSuccess || !postSubmit.hasForm) {
return { status: 'submitted', meta };
}
return { status: 'incomplete', meta };
return genericApply(page, job, formFiller, {
transformUrl: (url) => url.includes('/application') ? url : url.replace(/\/?(\?|$)/, '/application$1'),
formDetector: '#_systemfield_name',
applyButtonSelector: 'button:has-text("Apply for this Job"), a:has-text("Apply for this Job")',
submitSelector: 'button:has-text("Submit Application")',
verifySelector: '#_systemfield_name',
beforeSubmit: async (page, formFiller) => {
const resumeInput = await page.$('#_systemfield_resume');
if (resumeInput && formFiller.profile.resume_path) {
await resumeInput.setInputFiles(formFiller.profile.resume_path).catch(() => {});
await page.waitForTimeout(1000);
}
},
});
}

View File

@@ -1,8 +1,19 @@
/**
* generic.mjs — Generic external ATS handler
* Best-effort form filler for any career page with a standard HTML form.
* Handles single-page and multi-step flows (up to 5 steps).
* Skips pages that require account creation or have CAPTCHAs.
* generic.mjs — Configurable external ATS handler
*
* Base apply logic for any career page. ATS-specific handlers pass
* an options object to customize URL transformation, selectors,
* resume targeting, and submission verification.
*
* Options:
* transformUrl(url) — modify the apply URL before navigation
* formDetector — CSS selector to detect if form is already loaded
* applyButtonSelector — selector for the "Apply" button on listing pages
* resumeSelector — CSS selector for the resume file input
* submitSelector — selector for the submit button (use locator syntax)
* verifySelector — CSS selector to check if form is still present after submit
* beforeSubmit(page) — async hook to run before clicking submit (e.g. upload resume)
* closedTexts — extra strings to detect closed listings
*/
import {
NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT
@@ -12,58 +23,85 @@ export const SUPPORTED_TYPES = ['unknown_external'];
const MAX_STEPS = 5;
export async function apply(page, job, formFiller) {
const url = job.apply_url;
const DEFAULT_APPLY_BUTTONS = [
'a:has-text("Apply Now")',
'button:has-text("Apply Now")',
'a:has-text("Apply for this job")',
'button:has-text("Apply for this job")',
'a:has-text("Apply")',
'button:has-text("Apply")',
].join(', ');
const DEFAULT_SUBMIT_BUTTONS = [
'button:has-text("Submit Application")',
'button:has-text("Submit your application")',
'button:has-text("Apply Now")',
'button:has-text("Apply for this job")',
'input[type="submit"]:not([disabled])',
'button[type="submit"]:not([disabled])',
].join(', ');
const DEFAULT_NEXT_BUTTONS = [
'button:has-text("Next")',
'button:has-text("Continue")',
'button:has-text("Save and Continue")',
'a:has-text("Next")',
].join(', ');
const CLOSED_TEXTS = [
'no longer accepting', 'position has been filled',
'this job is no longer', 'job not found',
'this position is closed', 'listing has expired',
'no longer available', 'page you are looking for',
'job may be no longer', 'does not exist',
'this role has been filled', 'posting has closed',
];
export async function apply(page, job, formFiller, opts = {}) {
const url = opts.transformUrl ? opts.transformUrl(job.apply_url) : job.apply_url;
if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } };
const meta = { title: job.title, company: job.company };
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(PAGE_LOAD_WAIT);
const meta = {
title: job.title,
company: job.company,
};
// Detect blockers: login walls, CAPTCHAs, closed listings
const pageCheck = await page.evaluate(() => {
// Detect blockers
const extraClosed = opts.closedTexts || [];
const pageCheck = await page.evaluate((extraClosed) => {
const text = (document.body.innerText || '').toLowerCase();
const hasLogin = !!(document.querySelector('input[type="password"]') ||
(text.includes('sign in') && text.includes('create account')) ||
(text.includes('log in') && text.includes('register')));
// Only block on visible CAPTCHAs — invisible reCAPTCHA (size=invisible) fires on submit and usually passes
const captchaFrames = Array.from(document.querySelectorAll('iframe[src*="recaptcha"], iframe[src*="captcha"]'));
const hasVisibleCaptcha = captchaFrames.some(f => {
const hasCaptcha = captchaFrames.some(f => {
if (f.src.includes('size=invisible')) return false;
const rect = f.getBoundingClientRect();
return rect.width > 50 && rect.height > 50;
});
const hasCaptcha = hasVisibleCaptcha;
const isClosed = text.includes('no longer accepting') || text.includes('position has been filled') ||
text.includes('this job is no longer') || text.includes('job not found') ||
text.includes('this position is closed') || text.includes('listing has expired') ||
text.includes('no longer available') || text.includes('page you are looking for') ||
text.includes('job may be no longer') || text.includes('does not exist') ||
text.includes('this role has been filled') || text.includes('posting has closed') ||
document.title.toLowerCase().includes('404');
const closedTexts = [
'no longer accepting', 'position has been filled',
'this job is no longer', 'job not found',
'this position is closed', 'listing has expired',
'no longer available', 'page you are looking for',
'job may be no longer', 'does not exist',
'this role has been filled', 'posting has closed',
...extraClosed,
];
const isClosed = closedTexts.some(t => text.includes(t)) || document.title.toLowerCase().includes('404');
return { hasLogin, hasCaptcha, isClosed };
}).catch(() => ({}));
}, extraClosed).catch(() => ({}));
if (pageCheck.isClosed) return { status: 'closed', meta };
if (pageCheck.hasLogin) return { status: 'skipped_login_required', meta };
if (pageCheck.hasCaptcha) return { status: 'skipped_captcha', meta };
// Some pages land directly on the form; others need an Apply button click
// Check if we landed directly on a form (with or without <form> wrapper)
const hasFormAlready = await page.$('input[type="text"], input[type="email"], textarea');
// Check if form is already loaded
const formSelector = opts.formDetector || 'input[type="text"], input[type="email"], textarea';
const hasFormAlready = await page.$(formSelector);
if (!hasFormAlready) {
const applyBtn = page.locator([
'a:has-text("Apply Now")',
'button:has-text("Apply Now")',
'a:has-text("Apply for this job")',
'button:has-text("Apply for this job")',
'a:has-text("Apply")',
'button:has-text("Apply")',
].join(', ')).first();
const applySelector = opts.applyButtonSelector || DEFAULT_APPLY_BUTTONS;
const applyBtn = page.locator(applySelector).first();
if (await applyBtn.count() === 0) return { status: 'no_button', meta };
@@ -74,11 +112,9 @@ export async function apply(page, job, formFiller) {
]);
if (newPage) {
// Apply opened a new tab — switch to it
await newPage.waitForLoadState('domcontentloaded').catch(() => {});
await newPage.waitForTimeout(PAGE_LOAD_WAIT);
// Recursively handle the new page (but return result to caller)
return applyOnPage(newPage, job, formFiller, meta);
return fillAndSubmit(newPage, job, formFiller, meta, opts);
}
await page.waitForTimeout(FORM_FILL_WAIT);
@@ -93,67 +129,56 @@ export async function apply(page, job, formFiller) {
if (postClick.hasCaptcha) return { status: 'skipped_captcha', meta };
}
return applyOnPage(page, job, formFiller, meta);
return fillAndSubmit(page, job, formFiller, meta, opts);
}
async function applyOnPage(page, job, formFiller, meta) {
async function fillAndSubmit(page, job, formFiller, meta, opts) {
for (let step = 0; step < MAX_STEPS; step++) {
// Fill the current page/step
const unknowns = await formFiller.fill(page, formFiller.profile.resume_path);
if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta };
if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta };
// Look for submit button — try specific text first, then generic type="submit"
const submitBtn = await page.$([
'button:has-text("Submit Application")',
'button:has-text("Submit your application")',
'button:has-text("Apply Now")',
'button:has-text("Apply for this job")',
'input[type="submit"]:not([disabled])',
'button[type="submit"]:not([disabled])',
].join(', '));
// Hook: before submit (e.g. targeted resume upload)
if (opts.beforeSubmit) await opts.beforeSubmit(page, formFiller);
// Look for Next/Continue button (multi-step forms)
const nextBtn = !submitBtn ? await page.$([
'button:has-text("Next")',
'button:has-text("Continue")',
'button:has-text("Save and Continue")',
'a:has-text("Next")',
].join(', ')) : null;
// Find submit button
const submitSelector = opts.submitSelector || DEFAULT_SUBMIT_BUTTONS;
const submitBtn = page.locator(submitSelector).first();
const hasSubmit = await submitBtn.count() > 0;
if (submitBtn) {
if (hasSubmit) {
await submitBtn.click();
await page.waitForTimeout(SUBMIT_WAIT);
const postSubmit = await page.evaluate(() => {
const verifySelector = opts.verifySelector || 'form button[type="submit"]:not([disabled])';
const postSubmit = await page.evaluate((vs) => {
const text = (document.body.innerText || '').toLowerCase();
return {
hasSuccess: text.includes('application submitted') || text.includes('successfully applied') ||
text.includes('thank you') || text.includes('application received') ||
text.includes('application has been') || text.includes('we received your'),
hasForm: !!document.querySelector('form button[type="submit"]:not([disabled])'),
hasForm: !!document.querySelector(vs),
};
}).catch(() => ({ hasSuccess: false, hasForm: false }));
}, verifySelector).catch(() => ({ hasSuccess: false, hasForm: false }));
if (postSubmit.hasSuccess || !postSubmit.hasForm) {
return { status: 'submitted', meta };
}
console.log(` [generic] Submit clicked but form still present — may not have submitted`);
return { status: 'incomplete', meta };
}
// Multi-step: Next/Continue
const nextBtn = await page.$(DEFAULT_NEXT_BUTTONS);
if (nextBtn) {
await nextBtn.click();
await page.waitForTimeout(FORM_FILL_WAIT);
continue; // Fill next step
continue;
}
// No submit or next button found
return { status: 'no_submit', meta };
}
console.log(` [generic] Exceeded ${MAX_STEPS} form steps`);
return { status: 'incomplete', meta };
}

View File

@@ -1,11 +1,12 @@
/**
* greenhouse.mjs — Greenhouse ATS handler
* Delegates to generic handler — Greenhouse forms are standard HTML forms
* greenhouse.mjs — Greenhouse ATS handler (extends generic)
*/
import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['greenhouse'];
export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller);
return genericApply(page, job, formFiller, {
submitSelector: 'button:has-text("Submit Application"), input[type="submit"]',
});
}

View File

@@ -1,7 +1,9 @@
/**
* index.mjs — Apply handler registry
* Maps apply_type → handler module
* To add a new ATS: create lib/apply/<name>.mjs and add one line here
*
* Two lookup mechanisms:
* 1. apply_type → handler (explicit ATS classification)
* 2. apply_url domain → handler (auto-detect from URL, fallback to generic)
*/
import * as easyApply from './easy_apply.mjs';
import * as greenhouse from './greenhouse.mjs';
@@ -31,12 +33,34 @@ for (const handler of ALL_HANDLERS) {
}
}
// Domain → handler mapping for URL-based auto-detection
// When apply_type is unknown_external, match apply_url against these patterns
const DOMAIN_REGISTRY = [
{ pattern: /ashbyhq\.com/i, handler: ashby },
{ pattern: /greenhouse\.io|grnh\.se/i, handler: greenhouse },
{ pattern: /lever\.co|jobs\.lever\.co/i, handler: lever },
{ pattern: /workday\.com|myworkdayjobs\.com|myworkdaysite\.com/i, handler: workday },
{ pattern: /jobvite\.com|applytojob\.com/i, handler: jobvite },
];
/**
* Get handler for a given apply_type
* Returns null if not supported
* Get handler for a job — checks apply_type first, then URL domain, then generic
*/
export function getHandler(applyType) {
return REGISTRY[applyType] || null;
function resolveHandler(job) {
// Explicit type match
if (job.apply_type && REGISTRY[job.apply_type]) {
return REGISTRY[job.apply_type];
}
// Domain match from apply_url
if (job.apply_url) {
for (const { pattern, handler } of DOMAIN_REGISTRY) {
if (pattern.test(job.apply_url)) return handler;
}
}
// Fallback to generic if it has a URL, otherwise unsupported
return job.apply_url ? generic : null;
}
/**
@@ -46,23 +70,6 @@ export function supportedTypes() {
return Object.keys(REGISTRY);
}
/**
* Status normalization — handlers return platform-specific statuses,
* this map converts them to generic statuses that job_applier.mjs understands.
*
* Generic statuses (what handleResult expects):
* submitted — application was submitted successfully
* needs_answer — blocked on unknown form question, sent to Telegram
* skipped_recruiter_only — LinkedIn recruiter-only listing
* skipped_external_unsupported — external ATS not yet implemented
* skipped_no_apply — no apply button/modal/submit found on page
* skipped_honeypot — honeypot question detected, application abandoned
* stuck — modal progress stalled after retries
* incomplete — ran out of modal steps without submitting
*
* When adding a new handler, return any status you want — if it doesn't match
* a generic status above, add a mapping here so job_applier doesn't need to change.
*/
const STATUS_MAP = {
no_button: 'skipped_no_apply',
no_submit: 'skipped_no_apply',
@@ -74,7 +81,7 @@ const STATUS_MAP = {
* Returns result object with normalized status
*/
export async function applyToJob(page, job, formFiller) {
const handler = getHandler(job.apply_type);
const handler = resolveHandler(job);
if (!handler) {
return {
status: 'skipped_external_unsupported',

View File

@@ -1,11 +1,12 @@
/**
* jobvite.mjs — Jobvite ATS handler
* Delegates to generic handler — Jobvite forms are standard HTML forms
* jobvite.mjs — Jobvite ATS handler (extends generic)
*/
import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['jobvite'];
export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller);
return genericApply(page, job, formFiller, {
submitSelector: 'button:has-text("Submit"), input[type="submit"]',
});
}

View File

@@ -1,11 +1,13 @@
/**
* lever.mjs — Lever ATS handler
* Delegates to generic handler — Lever forms are standard HTML forms
* lever.mjs — Lever ATS handler (extends generic)
*/
import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['lever'];
export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller);
return genericApply(page, job, formFiller, {
// Lever apply URLs already end in /apply
submitSelector: 'button:has-text("Submit application"), button[type="submit"]',
});
}

View File

@@ -1,12 +1,13 @@
/**
* workday.mjs — Workday ATS handler
* Delegates to generic handler. Workday often requires account creation,
* so many will return skipped_login_required — that's expected.
* workday.mjs — Workday ATS handler (extends generic)
* Most Workday sites require account creation — generic will return skipped_login_required
*/
import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['workday'];
export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller);
return genericApply(page, job, formFiller, {
closedTexts: ['this job posting is no longer active'],
});
}