Refactor apply handlers: generic with extensions + domain auto-routing

Generic handler now accepts options (transformUrl, formDetector,
submitSelector, resumeSelector, beforeSubmit, verifySelector, etc.).
Each ATS handler passes its overrides instead of reimplementing.

Registry resolves handlers by: apply_type -> URL domain -> generic fallback.
New ATS handlers only need to export SUPPORTED_TYPES and an apply() that
calls genericApply with platform-specific options.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 20:41:54 -08:00
parent 4f202a4e91
commit ae797d73eb
7 changed files with 155 additions and 177 deletions

View File

@@ -1,82 +1,23 @@
/** /**
* ashby.mjs — Ashby ATS handler * ashby.mjs — Ashby ATS handler (extends generic)
*
* Ashby forms have a consistent structure:
* - URLs ending in /application land directly on the form
* - Other URLs show a job listing with "Apply for this Job" button
* - Form fields: Name, Email, Resume (file), optional extras (phone, LinkedIn, etc.)
* - Resume input has id="_systemfield_resume"
* - There's also an "autofill from resume" file input — don't confuse with actual resume
* - "Upload file" buttons are type="submit" — must target "Submit Application" specifically
* - Invisible reCAPTCHA on submit
*/ */
import { import { apply as genericApply } from './generic.mjs';
NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT
} from '../constants.mjs';
export const SUPPORTED_TYPES = ['ashby']; export const SUPPORTED_TYPES = ['ashby'];
export async function apply(page, job, formFiller) { export async function apply(page, job, formFiller) {
const url = job.apply_url; return genericApply(page, job, formFiller, {
if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } }; transformUrl: (url) => url.includes('/application') ? url : url.replace(/\/?(\?|$)/, '/application$1'),
formDetector: '#_systemfield_name',
const meta = { title: job.title, company: job.company }; applyButtonSelector: 'button:has-text("Apply for this Job"), a:has-text("Apply for this Job")',
submitSelector: 'button:has-text("Submit Application")',
// Navigate — append /application if not already there verifySelector: '#_systemfield_name',
const applyUrl = url.includes('/application') ? url : url.replace(/\/?(\?|$)/, '/application$1'); beforeSubmit: async (page, formFiller) => {
await page.goto(applyUrl, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); const resumeInput = await page.$('#_systemfield_resume');
await page.waitForTimeout(PAGE_LOAD_WAIT); if (resumeInput && formFiller.profile.resume_path) {
await resumeInput.setInputFiles(formFiller.profile.resume_path).catch(() => {});
// Check if we landed on the form or a listing page await page.waitForTimeout(1000);
const hasForm = await page.$('#_systemfield_name, input[name="_systemfield_name"]'); }
if (!hasForm) { },
// Try clicking "Apply for this Job" });
const applyBtn = page.locator('button:has-text("Apply for this Job"), a:has-text("Apply for this Job")').first();
if (await applyBtn.count() === 0) return { status: 'no_button', meta };
await applyBtn.click();
await page.waitForTimeout(FORM_FILL_WAIT);
}
// Check for closed listing
const closed = await page.evaluate(() => {
const text = (document.body.innerText || '').toLowerCase();
return text.includes('no longer accepting') || text.includes('position has been filled') ||
text.includes('no longer available') || text.includes('does not exist');
}).catch(() => false);
if (closed) return { status: 'closed', meta };
// Fill form fields
const unknowns = await formFiller.fill(page, formFiller.profile.resume_path);
if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta };
if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta };
// Upload resume to the correct file input (not the autofill one)
const resumeInput = await page.$('#_systemfield_resume');
if (resumeInput && formFiller.profile.resume_path) {
await resumeInput.setInputFiles(formFiller.profile.resume_path).catch(() => {});
await page.waitForTimeout(1000);
}
// Click "Submit Application" specifically — NOT the "Upload file" buttons
const submitBtn = page.locator('button:has-text("Submit Application")').first();
if (await submitBtn.count() === 0) return { status: 'no_submit', meta };
await submitBtn.click();
await page.waitForTimeout(SUBMIT_WAIT);
// Verify submission
const postSubmit = await page.evaluate(() => {
const text = (document.body.innerText || '').toLowerCase();
return {
hasSuccess: text.includes('thank you') || text.includes('application submitted') ||
text.includes('application received') || text.includes('successfully'),
hasForm: !!document.querySelector('#_systemfield_name'),
};
}).catch(() => ({ hasSuccess: false, hasForm: false }));
if (postSubmit.hasSuccess || !postSubmit.hasForm) {
return { status: 'submitted', meta };
}
return { status: 'incomplete', meta };
} }

View File

@@ -1,8 +1,19 @@
/** /**
* generic.mjs — Generic external ATS handler * generic.mjs — Configurable external ATS handler
* Best-effort form filler for any career page with a standard HTML form. *
* Handles single-page and multi-step flows (up to 5 steps). * Base apply logic for any career page. ATS-specific handlers pass
* Skips pages that require account creation or have CAPTCHAs. * an options object to customize URL transformation, selectors,
* resume targeting, and submission verification.
*
* Options:
* transformUrl(url) — modify the apply URL before navigation
* formDetector — CSS selector to detect if form is already loaded
* applyButtonSelector — selector for the "Apply" button on listing pages
* resumeSelector — CSS selector for the resume file input
* submitSelector — selector for the submit button (use locator syntax)
* verifySelector — CSS selector to check if form is still present after submit
* beforeSubmit(page) — async hook to run before clicking submit (e.g. upload resume)
* closedTexts — extra strings to detect closed listings
*/ */
import { import {
NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT
@@ -12,58 +23,85 @@ export const SUPPORTED_TYPES = ['unknown_external'];
const MAX_STEPS = 5; const MAX_STEPS = 5;
export async function apply(page, job, formFiller) { const DEFAULT_APPLY_BUTTONS = [
const url = job.apply_url; 'a:has-text("Apply Now")',
'button:has-text("Apply Now")',
'a:has-text("Apply for this job")',
'button:has-text("Apply for this job")',
'a:has-text("Apply")',
'button:has-text("Apply")',
].join(', ');
const DEFAULT_SUBMIT_BUTTONS = [
'button:has-text("Submit Application")',
'button:has-text("Submit your application")',
'button:has-text("Apply Now")',
'button:has-text("Apply for this job")',
'input[type="submit"]:not([disabled])',
'button[type="submit"]:not([disabled])',
].join(', ');
const DEFAULT_NEXT_BUTTONS = [
'button:has-text("Next")',
'button:has-text("Continue")',
'button:has-text("Save and Continue")',
'a:has-text("Next")',
].join(', ');
const CLOSED_TEXTS = [
'no longer accepting', 'position has been filled',
'this job is no longer', 'job not found',
'this position is closed', 'listing has expired',
'no longer available', 'page you are looking for',
'job may be no longer', 'does not exist',
'this role has been filled', 'posting has closed',
];
export async function apply(page, job, formFiller, opts = {}) {
const url = opts.transformUrl ? opts.transformUrl(job.apply_url) : job.apply_url;
if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } }; if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } };
const meta = { title: job.title, company: job.company };
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(PAGE_LOAD_WAIT); await page.waitForTimeout(PAGE_LOAD_WAIT);
const meta = { // Detect blockers
title: job.title, const extraClosed = opts.closedTexts || [];
company: job.company, const pageCheck = await page.evaluate((extraClosed) => {
};
// Detect blockers: login walls, CAPTCHAs, closed listings
const pageCheck = await page.evaluate(() => {
const text = (document.body.innerText || '').toLowerCase(); const text = (document.body.innerText || '').toLowerCase();
const hasLogin = !!(document.querySelector('input[type="password"]') || const hasLogin = !!(document.querySelector('input[type="password"]') ||
(text.includes('sign in') && text.includes('create account')) || (text.includes('sign in') && text.includes('create account')) ||
(text.includes('log in') && text.includes('register'))); (text.includes('log in') && text.includes('register')));
// Only block on visible CAPTCHAs — invisible reCAPTCHA (size=invisible) fires on submit and usually passes
const captchaFrames = Array.from(document.querySelectorAll('iframe[src*="recaptcha"], iframe[src*="captcha"]')); const captchaFrames = Array.from(document.querySelectorAll('iframe[src*="recaptcha"], iframe[src*="captcha"]'));
const hasVisibleCaptcha = captchaFrames.some(f => { const hasCaptcha = captchaFrames.some(f => {
if (f.src.includes('size=invisible')) return false; if (f.src.includes('size=invisible')) return false;
const rect = f.getBoundingClientRect(); const rect = f.getBoundingClientRect();
return rect.width > 50 && rect.height > 50; return rect.width > 50 && rect.height > 50;
}); });
const hasCaptcha = hasVisibleCaptcha; const closedTexts = [
const isClosed = text.includes('no longer accepting') || text.includes('position has been filled') || 'no longer accepting', 'position has been filled',
text.includes('this job is no longer') || text.includes('job not found') || 'this job is no longer', 'job not found',
text.includes('this position is closed') || text.includes('listing has expired') || 'this position is closed', 'listing has expired',
text.includes('no longer available') || text.includes('page you are looking for') || 'no longer available', 'page you are looking for',
text.includes('job may be no longer') || text.includes('does not exist') || 'job may be no longer', 'does not exist',
text.includes('this role has been filled') || text.includes('posting has closed') || 'this role has been filled', 'posting has closed',
document.title.toLowerCase().includes('404'); ...extraClosed,
];
const isClosed = closedTexts.some(t => text.includes(t)) || document.title.toLowerCase().includes('404');
return { hasLogin, hasCaptcha, isClosed }; return { hasLogin, hasCaptcha, isClosed };
}).catch(() => ({})); }, extraClosed).catch(() => ({}));
if (pageCheck.isClosed) return { status: 'closed', meta }; if (pageCheck.isClosed) return { status: 'closed', meta };
if (pageCheck.hasLogin) return { status: 'skipped_login_required', meta }; if (pageCheck.hasLogin) return { status: 'skipped_login_required', meta };
if (pageCheck.hasCaptcha) return { status: 'skipped_captcha', meta }; if (pageCheck.hasCaptcha) return { status: 'skipped_captcha', meta };
// Some pages land directly on the form; others need an Apply button click // Check if form is already loaded
// Check if we landed directly on a form (with or without <form> wrapper) const formSelector = opts.formDetector || 'input[type="text"], input[type="email"], textarea';
const hasFormAlready = await page.$('input[type="text"], input[type="email"], textarea'); const hasFormAlready = await page.$(formSelector);
if (!hasFormAlready) { if (!hasFormAlready) {
const applyBtn = page.locator([ const applySelector = opts.applyButtonSelector || DEFAULT_APPLY_BUTTONS;
'a:has-text("Apply Now")', const applyBtn = page.locator(applySelector).first();
'button:has-text("Apply Now")',
'a:has-text("Apply for this job")',
'button:has-text("Apply for this job")',
'a:has-text("Apply")',
'button:has-text("Apply")',
].join(', ')).first();
if (await applyBtn.count() === 0) return { status: 'no_button', meta }; if (await applyBtn.count() === 0) return { status: 'no_button', meta };
@@ -74,11 +112,9 @@ export async function apply(page, job, formFiller) {
]); ]);
if (newPage) { if (newPage) {
// Apply opened a new tab — switch to it
await newPage.waitForLoadState('domcontentloaded').catch(() => {}); await newPage.waitForLoadState('domcontentloaded').catch(() => {});
await newPage.waitForTimeout(PAGE_LOAD_WAIT); await newPage.waitForTimeout(PAGE_LOAD_WAIT);
// Recursively handle the new page (but return result to caller) return fillAndSubmit(newPage, job, formFiller, meta, opts);
return applyOnPage(newPage, job, formFiller, meta);
} }
await page.waitForTimeout(FORM_FILL_WAIT); await page.waitForTimeout(FORM_FILL_WAIT);
@@ -93,67 +129,56 @@ export async function apply(page, job, formFiller) {
if (postClick.hasCaptcha) return { status: 'skipped_captcha', meta }; if (postClick.hasCaptcha) return { status: 'skipped_captcha', meta };
} }
return applyOnPage(page, job, formFiller, meta); return fillAndSubmit(page, job, formFiller, meta, opts);
} }
async function applyOnPage(page, job, formFiller, meta) { async function fillAndSubmit(page, job, formFiller, meta, opts) {
for (let step = 0; step < MAX_STEPS; step++) { for (let step = 0; step < MAX_STEPS; step++) {
// Fill the current page/step
const unknowns = await formFiller.fill(page, formFiller.profile.resume_path); const unknowns = await formFiller.fill(page, formFiller.profile.resume_path);
if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta }; if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta };
if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta }; if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta };
// Look for submit button — try specific text first, then generic type="submit" // Hook: before submit (e.g. targeted resume upload)
const submitBtn = await page.$([ if (opts.beforeSubmit) await opts.beforeSubmit(page, formFiller);
'button:has-text("Submit Application")',
'button:has-text("Submit your application")',
'button:has-text("Apply Now")',
'button:has-text("Apply for this job")',
'input[type="submit"]:not([disabled])',
'button[type="submit"]:not([disabled])',
].join(', '));
// Look for Next/Continue button (multi-step forms) // Find submit button
const nextBtn = !submitBtn ? await page.$([ const submitSelector = opts.submitSelector || DEFAULT_SUBMIT_BUTTONS;
'button:has-text("Next")', const submitBtn = page.locator(submitSelector).first();
'button:has-text("Continue")', const hasSubmit = await submitBtn.count() > 0;
'button:has-text("Save and Continue")',
'a:has-text("Next")',
].join(', ')) : null;
if (submitBtn) { if (hasSubmit) {
await submitBtn.click(); await submitBtn.click();
await page.waitForTimeout(SUBMIT_WAIT); await page.waitForTimeout(SUBMIT_WAIT);
const postSubmit = await page.evaluate(() => { const verifySelector = opts.verifySelector || 'form button[type="submit"]:not([disabled])';
const postSubmit = await page.evaluate((vs) => {
const text = (document.body.innerText || '').toLowerCase(); const text = (document.body.innerText || '').toLowerCase();
return { return {
hasSuccess: text.includes('application submitted') || text.includes('successfully applied') || hasSuccess: text.includes('application submitted') || text.includes('successfully applied') ||
text.includes('thank you') || text.includes('application received') || text.includes('thank you') || text.includes('application received') ||
text.includes('application has been') || text.includes('we received your'), text.includes('application has been') || text.includes('we received your'),
hasForm: !!document.querySelector('form button[type="submit"]:not([disabled])'), hasForm: !!document.querySelector(vs),
}; };
}).catch(() => ({ hasSuccess: false, hasForm: false })); }, verifySelector).catch(() => ({ hasSuccess: false, hasForm: false }));
if (postSubmit.hasSuccess || !postSubmit.hasForm) { if (postSubmit.hasSuccess || !postSubmit.hasForm) {
return { status: 'submitted', meta }; return { status: 'submitted', meta };
} }
console.log(` [generic] Submit clicked but form still present — may not have submitted`);
return { status: 'incomplete', meta }; return { status: 'incomplete', meta };
} }
// Multi-step: Next/Continue
const nextBtn = await page.$(DEFAULT_NEXT_BUTTONS);
if (nextBtn) { if (nextBtn) {
await nextBtn.click(); await nextBtn.click();
await page.waitForTimeout(FORM_FILL_WAIT); await page.waitForTimeout(FORM_FILL_WAIT);
continue; // Fill next step continue;
} }
// No submit or next button found
return { status: 'no_submit', meta }; return { status: 'no_submit', meta };
} }
console.log(` [generic] Exceeded ${MAX_STEPS} form steps`);
return { status: 'incomplete', meta }; return { status: 'incomplete', meta };
} }

View File

@@ -1,11 +1,12 @@
/** /**
* greenhouse.mjs — Greenhouse ATS handler * greenhouse.mjs — Greenhouse ATS handler (extends generic)
* Delegates to generic handler — Greenhouse forms are standard HTML forms
*/ */
import { apply as genericApply } from './generic.mjs'; import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['greenhouse']; export const SUPPORTED_TYPES = ['greenhouse'];
export async function apply(page, job, formFiller) { export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller); return genericApply(page, job, formFiller, {
submitSelector: 'button:has-text("Submit Application"), input[type="submit"]',
});
} }

View File

@@ -1,7 +1,9 @@
/** /**
* index.mjs — Apply handler registry * index.mjs — Apply handler registry
* Maps apply_type → handler module *
* To add a new ATS: create lib/apply/<name>.mjs and add one line here * Two lookup mechanisms:
* 1. apply_type → handler (explicit ATS classification)
* 2. apply_url domain → handler (auto-detect from URL, fallback to generic)
*/ */
import * as easyApply from './easy_apply.mjs'; import * as easyApply from './easy_apply.mjs';
import * as greenhouse from './greenhouse.mjs'; import * as greenhouse from './greenhouse.mjs';
@@ -31,12 +33,34 @@ for (const handler of ALL_HANDLERS) {
} }
} }
// Domain → handler mapping for URL-based auto-detection
// When apply_type is unknown_external, match apply_url against these patterns
const DOMAIN_REGISTRY = [
{ pattern: /ashbyhq\.com/i, handler: ashby },
{ pattern: /greenhouse\.io|grnh\.se/i, handler: greenhouse },
{ pattern: /lever\.co|jobs\.lever\.co/i, handler: lever },
{ pattern: /workday\.com|myworkdayjobs\.com|myworkdaysite\.com/i, handler: workday },
{ pattern: /jobvite\.com|applytojob\.com/i, handler: jobvite },
];
/** /**
* Get handler for a given apply_type * Get handler for a job — checks apply_type first, then URL domain, then generic
* Returns null if not supported
*/ */
export function getHandler(applyType) { function resolveHandler(job) {
return REGISTRY[applyType] || null; // Explicit type match
if (job.apply_type && REGISTRY[job.apply_type]) {
return REGISTRY[job.apply_type];
}
// Domain match from apply_url
if (job.apply_url) {
for (const { pattern, handler } of DOMAIN_REGISTRY) {
if (pattern.test(job.apply_url)) return handler;
}
}
// Fallback to generic if it has a URL, otherwise unsupported
return job.apply_url ? generic : null;
} }
/** /**
@@ -46,23 +70,6 @@ export function supportedTypes() {
return Object.keys(REGISTRY); return Object.keys(REGISTRY);
} }
/**
* Status normalization — handlers return platform-specific statuses,
* this map converts them to generic statuses that job_applier.mjs understands.
*
* Generic statuses (what handleResult expects):
* submitted — application was submitted successfully
* needs_answer — blocked on unknown form question, sent to Telegram
* skipped_recruiter_only — LinkedIn recruiter-only listing
* skipped_external_unsupported — external ATS not yet implemented
* skipped_no_apply — no apply button/modal/submit found on page
* skipped_honeypot — honeypot question detected, application abandoned
* stuck — modal progress stalled after retries
* incomplete — ran out of modal steps without submitting
*
* When adding a new handler, return any status you want — if it doesn't match
* a generic status above, add a mapping here so job_applier doesn't need to change.
*/
const STATUS_MAP = { const STATUS_MAP = {
no_button: 'skipped_no_apply', no_button: 'skipped_no_apply',
no_submit: 'skipped_no_apply', no_submit: 'skipped_no_apply',
@@ -74,7 +81,7 @@ const STATUS_MAP = {
* Returns result object with normalized status * Returns result object with normalized status
*/ */
export async function applyToJob(page, job, formFiller) { export async function applyToJob(page, job, formFiller) {
const handler = getHandler(job.apply_type); const handler = resolveHandler(job);
if (!handler) { if (!handler) {
return { return {
status: 'skipped_external_unsupported', status: 'skipped_external_unsupported',

View File

@@ -1,11 +1,12 @@
/** /**
* jobvite.mjs — Jobvite ATS handler * jobvite.mjs — Jobvite ATS handler (extends generic)
* Delegates to generic handler — Jobvite forms are standard HTML forms
*/ */
import { apply as genericApply } from './generic.mjs'; import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['jobvite']; export const SUPPORTED_TYPES = ['jobvite'];
export async function apply(page, job, formFiller) { export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller); return genericApply(page, job, formFiller, {
submitSelector: 'button:has-text("Submit"), input[type="submit"]',
});
} }

View File

@@ -1,11 +1,13 @@
/** /**
* lever.mjs — Lever ATS handler * lever.mjs — Lever ATS handler (extends generic)
* Delegates to generic handler — Lever forms are standard HTML forms
*/ */
import { apply as genericApply } from './generic.mjs'; import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['lever']; export const SUPPORTED_TYPES = ['lever'];
export async function apply(page, job, formFiller) { export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller); return genericApply(page, job, formFiller, {
// Lever apply URLs already end in /apply
submitSelector: 'button:has-text("Submit application"), button[type="submit"]',
});
} }

View File

@@ -1,12 +1,13 @@
/** /**
* workday.mjs — Workday ATS handler * workday.mjs — Workday ATS handler (extends generic)
* Delegates to generic handler. Workday often requires account creation, * Most Workday sites require account creation — generic will return skipped_login_required
* so many will return skipped_login_required — that's expected.
*/ */
import { apply as genericApply } from './generic.mjs'; import { apply as genericApply } from './generic.mjs';
export const SUPPORTED_TYPES = ['workday']; export const SUPPORTED_TYPES = ['workday'];
export async function apply(page, job, formFiller) { export async function apply(page, job, formFiller) {
return genericApply(page, job, formFiller); return genericApply(page, job, formFiller, {
closedTexts: ['this job posting is no longer active'],
});
} }