Refactor apply handlers: generic with extensions + domain auto-routing

Generic handler now accepts options (transformUrl, formDetector,
submitSelector, resumeSelector, beforeSubmit, verifySelector, etc.).
Each ATS handler passes its overrides instead of reimplementing.

Registry resolves handlers by: apply_type -> URL domain -> generic fallback.
New ATS handlers only need to export SUPPORTED_TYPES and an apply() that
calls genericApply with platform-specific options.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 20:41:54 -08:00
parent 4f202a4e91
commit ae797d73eb
7 changed files with 155 additions and 177 deletions

View File

@@ -1,8 +1,19 @@
/**
* generic.mjs — Generic external ATS handler
* Best-effort form filler for any career page with a standard HTML form.
* Handles single-page and multi-step flows (up to 5 steps).
* Skips pages that require account creation or have CAPTCHAs.
* generic.mjs — Configurable external ATS handler
*
* Base apply logic for any career page. ATS-specific handlers pass
* an options object to customize URL transformation, selectors,
* resume targeting, and submission verification.
*
* Options:
* transformUrl(url) — modify the apply URL before navigation
* formDetector — CSS selector to detect if form is already loaded
* applyButtonSelector — selector for the "Apply" button on listing pages
* resumeSelector — CSS selector for the resume file input
* submitSelector — selector for the submit button (use locator syntax)
* verifySelector — CSS selector to check if form is still present after submit
* beforeSubmit(page) — async hook to run before clicking submit (e.g. upload resume)
* closedTexts — extra strings to detect closed listings
*/
import {
NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, FORM_FILL_WAIT, SUBMIT_WAIT
@@ -12,58 +23,85 @@ export const SUPPORTED_TYPES = ['unknown_external'];
const MAX_STEPS = 5;
export async function apply(page, job, formFiller) {
const url = job.apply_url;
const DEFAULT_APPLY_BUTTONS = [
'a:has-text("Apply Now")',
'button:has-text("Apply Now")',
'a:has-text("Apply for this job")',
'button:has-text("Apply for this job")',
'a:has-text("Apply")',
'button:has-text("Apply")',
].join(', ');
const DEFAULT_SUBMIT_BUTTONS = [
'button:has-text("Submit Application")',
'button:has-text("Submit your application")',
'button:has-text("Apply Now")',
'button:has-text("Apply for this job")',
'input[type="submit"]:not([disabled])',
'button[type="submit"]:not([disabled])',
].join(', ');
const DEFAULT_NEXT_BUTTONS = [
'button:has-text("Next")',
'button:has-text("Continue")',
'button:has-text("Save and Continue")',
'a:has-text("Next")',
].join(', ');
const CLOSED_TEXTS = [
'no longer accepting', 'position has been filled',
'this job is no longer', 'job not found',
'this position is closed', 'listing has expired',
'no longer available', 'page you are looking for',
'job may be no longer', 'does not exist',
'this role has been filled', 'posting has closed',
];
export async function apply(page, job, formFiller, opts = {}) {
const url = opts.transformUrl ? opts.transformUrl(job.apply_url) : job.apply_url;
if (!url) return { status: 'no_button', meta: { title: job.title, company: job.company } };
const meta = { title: job.title, company: job.company };
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(PAGE_LOAD_WAIT);
const meta = {
title: job.title,
company: job.company,
};
// Detect blockers: login walls, CAPTCHAs, closed listings
const pageCheck = await page.evaluate(() => {
// Detect blockers
const extraClosed = opts.closedTexts || [];
const pageCheck = await page.evaluate((extraClosed) => {
const text = (document.body.innerText || '').toLowerCase();
const hasLogin = !!(document.querySelector('input[type="password"]') ||
(text.includes('sign in') && text.includes('create account')) ||
(text.includes('log in') && text.includes('register')));
// Only block on visible CAPTCHAs — invisible reCAPTCHA (size=invisible) fires on submit and usually passes
const captchaFrames = Array.from(document.querySelectorAll('iframe[src*="recaptcha"], iframe[src*="captcha"]'));
const hasVisibleCaptcha = captchaFrames.some(f => {
const hasCaptcha = captchaFrames.some(f => {
if (f.src.includes('size=invisible')) return false;
const rect = f.getBoundingClientRect();
return rect.width > 50 && rect.height > 50;
});
const hasCaptcha = hasVisibleCaptcha;
const isClosed = text.includes('no longer accepting') || text.includes('position has been filled') ||
text.includes('this job is no longer') || text.includes('job not found') ||
text.includes('this position is closed') || text.includes('listing has expired') ||
text.includes('no longer available') || text.includes('page you are looking for') ||
text.includes('job may be no longer') || text.includes('does not exist') ||
text.includes('this role has been filled') || text.includes('posting has closed') ||
document.title.toLowerCase().includes('404');
const closedTexts = [
'no longer accepting', 'position has been filled',
'this job is no longer', 'job not found',
'this position is closed', 'listing has expired',
'no longer available', 'page you are looking for',
'job may be no longer', 'does not exist',
'this role has been filled', 'posting has closed',
...extraClosed,
];
const isClosed = closedTexts.some(t => text.includes(t)) || document.title.toLowerCase().includes('404');
return { hasLogin, hasCaptcha, isClosed };
}).catch(() => ({}));
}, extraClosed).catch(() => ({}));
if (pageCheck.isClosed) return { status: 'closed', meta };
if (pageCheck.hasLogin) return { status: 'skipped_login_required', meta };
if (pageCheck.hasCaptcha) return { status: 'skipped_captcha', meta };
// Some pages land directly on the form; others need an Apply button click
// Check if we landed directly on a form (with or without <form> wrapper)
const hasFormAlready = await page.$('input[type="text"], input[type="email"], textarea');
// Check if form is already loaded
const formSelector = opts.formDetector || 'input[type="text"], input[type="email"], textarea';
const hasFormAlready = await page.$(formSelector);
if (!hasFormAlready) {
const applyBtn = page.locator([
'a:has-text("Apply Now")',
'button:has-text("Apply Now")',
'a:has-text("Apply for this job")',
'button:has-text("Apply for this job")',
'a:has-text("Apply")',
'button:has-text("Apply")',
].join(', ')).first();
const applySelector = opts.applyButtonSelector || DEFAULT_APPLY_BUTTONS;
const applyBtn = page.locator(applySelector).first();
if (await applyBtn.count() === 0) return { status: 'no_button', meta };
@@ -74,11 +112,9 @@ export async function apply(page, job, formFiller) {
]);
if (newPage) {
// Apply opened a new tab — switch to it
await newPage.waitForLoadState('domcontentloaded').catch(() => {});
await newPage.waitForTimeout(PAGE_LOAD_WAIT);
// Recursively handle the new page (but return result to caller)
return applyOnPage(newPage, job, formFiller, meta);
return fillAndSubmit(newPage, job, formFiller, meta, opts);
}
await page.waitForTimeout(FORM_FILL_WAIT);
@@ -93,67 +129,56 @@ export async function apply(page, job, formFiller) {
if (postClick.hasCaptcha) return { status: 'skipped_captcha', meta };
}
return applyOnPage(page, job, formFiller, meta);
return fillAndSubmit(page, job, formFiller, meta, opts);
}
async function applyOnPage(page, job, formFiller, meta) {
async function fillAndSubmit(page, job, formFiller, meta, opts) {
for (let step = 0; step < MAX_STEPS; step++) {
// Fill the current page/step
const unknowns = await formFiller.fill(page, formFiller.profile.resume_path);
if (unknowns[0]?.honeypot) return { status: 'skipped_honeypot', meta };
if (unknowns.length > 0) return { status: 'needs_answer', pending_question: unknowns[0], meta };
// Look for submit button — try specific text first, then generic type="submit"
const submitBtn = await page.$([
'button:has-text("Submit Application")',
'button:has-text("Submit your application")',
'button:has-text("Apply Now")',
'button:has-text("Apply for this job")',
'input[type="submit"]:not([disabled])',
'button[type="submit"]:not([disabled])',
].join(', '));
// Hook: before submit (e.g. targeted resume upload)
if (opts.beforeSubmit) await opts.beforeSubmit(page, formFiller);
// Look for Next/Continue button (multi-step forms)
const nextBtn = !submitBtn ? await page.$([
'button:has-text("Next")',
'button:has-text("Continue")',
'button:has-text("Save and Continue")',
'a:has-text("Next")',
].join(', ')) : null;
// Find submit button
const submitSelector = opts.submitSelector || DEFAULT_SUBMIT_BUTTONS;
const submitBtn = page.locator(submitSelector).first();
const hasSubmit = await submitBtn.count() > 0;
if (submitBtn) {
if (hasSubmit) {
await submitBtn.click();
await page.waitForTimeout(SUBMIT_WAIT);
const postSubmit = await page.evaluate(() => {
const verifySelector = opts.verifySelector || 'form button[type="submit"]:not([disabled])';
const postSubmit = await page.evaluate((vs) => {
const text = (document.body.innerText || '').toLowerCase();
return {
hasSuccess: text.includes('application submitted') || text.includes('successfully applied') ||
text.includes('thank you') || text.includes('application received') ||
text.includes('application has been') || text.includes('we received your'),
hasForm: !!document.querySelector('form button[type="submit"]:not([disabled])'),
hasForm: !!document.querySelector(vs),
};
}).catch(() => ({ hasSuccess: false, hasForm: false }));
}, verifySelector).catch(() => ({ hasSuccess: false, hasForm: false }));
if (postSubmit.hasSuccess || !postSubmit.hasForm) {
return { status: 'submitted', meta };
}
console.log(` [generic] Submit clicked but form still present — may not have submitted`);
return { status: 'incomplete', meta };
}
// Multi-step: Next/Continue
const nextBtn = await page.$(DEFAULT_NEXT_BUTTONS);
if (nextBtn) {
await nextBtn.click();
await page.waitForTimeout(FORM_FILL_WAIT);
continue; // Fill next step
continue;
}
// No submit or next button found
return { status: 'no_submit', meta };
}
console.log(` [generic] Exceeded ${MAX_STEPS} form steps`);
return { status: 'incomplete', meta };
}