Classify unknown_external jobs by following Apply redirects

After LinkedIn search completes, visits each unknown_external job page,
clicks the Apply button, captures the redirect URL, and matches against
known ATS patterns to identify the actual application platform.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 18:13:32 -08:00
parent cc0d15ece7
commit 69eb6b124f
2 changed files with 75 additions and 2 deletions

View File

@@ -143,3 +143,66 @@ export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
return jobs;
}
/**
* Classify unknown_external jobs by following the Apply button redirect.
* Visits each job page, clicks Apply, captures the redirect URL, and
* matches against known ATS patterns.
*
* @param {Page} page — authenticated LinkedIn browser page
* @param {Array} jobs — jobs with apply_type 'unknown_external'
* @param {Function} onClassified — callback(job, apply_type, apply_url) for each classified job
* @returns {{ classified: number, remaining: number }}
*/
export async function classifyExternalJobs(page, jobs, onClassified) {
let classified = 0;
let remaining = 0;
for (const job of jobs) {
const url = job.url || job.apply_url;
if (!url) { remaining++; continue; }
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(2000);
// Click the Apply button and catch the new tab
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|| await page.$('a.jobs-apply-button');
if (!applyBtn) { remaining++; continue; }
const [newPage] = await Promise.all([
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
applyBtn.click(),
]);
let externalUrl = null;
if (newPage) {
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
externalUrl = newPage.url();
await newPage.close().catch(() => {});
}
if (!externalUrl || externalUrl.includes('linkedin.com')) {
remaining++;
continue;
}
// Match against ATS patterns
let applyType = 'unknown_external';
for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) {
if (pattern.test(externalUrl)) {
applyType = name;
break;
}
}
onClassified(job, applyType, externalUrl);
classified++;
} catch {
remaining++;
}
}
return { classified, remaining };
}