Classify unknown_external jobs by following Apply redirects
After LinkedIn search completes, visits each unknown_external job page, clicks the Apply button, captures the redirect URL, and matches against known ATS patterns to identify the actual application platform. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -143,3 +143,66 @@ export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
|
||||
|
||||
return jobs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify unknown_external jobs by following the Apply button redirect.
|
||||
* Visits each job page, clicks Apply, captures the redirect URL, and
|
||||
* matches against known ATS patterns.
|
||||
*
|
||||
* @param {Page} page — authenticated LinkedIn browser page
|
||||
* @param {Array} jobs — jobs with apply_type 'unknown_external'
|
||||
* @param {Function} onClassified — callback(job, apply_type, apply_url) for each classified job
|
||||
* @returns {{ classified: number, remaining: number }}
|
||||
*/
|
||||
export async function classifyExternalJobs(page, jobs, onClassified) {
|
||||
let classified = 0;
|
||||
let remaining = 0;
|
||||
|
||||
for (const job of jobs) {
|
||||
const url = job.url || job.apply_url;
|
||||
if (!url) { remaining++; continue; }
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Click the Apply button and catch the new tab
|
||||
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|
||||
|| await page.$('a.jobs-apply-button');
|
||||
if (!applyBtn) { remaining++; continue; }
|
||||
|
||||
const [newPage] = await Promise.all([
|
||||
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
|
||||
applyBtn.click(),
|
||||
]);
|
||||
|
||||
let externalUrl = null;
|
||||
if (newPage) {
|
||||
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
|
||||
externalUrl = newPage.url();
|
||||
await newPage.close().catch(() => {});
|
||||
}
|
||||
|
||||
if (!externalUrl || externalUrl.includes('linkedin.com')) {
|
||||
remaining++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Match against ATS patterns
|
||||
let applyType = 'unknown_external';
|
||||
for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) {
|
||||
if (pattern.test(externalUrl)) {
|
||||
applyType = name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
onClassified(job, applyType, externalUrl);
|
||||
classified++;
|
||||
} catch {
|
||||
remaining++;
|
||||
}
|
||||
}
|
||||
|
||||
return { classified, remaining };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user