Classify unknown_external jobs by following Apply redirects

After LinkedIn search completes, visits each unknown_external job page,
clicks the Apply button, captures the redirect URL, and matches against
known ATS patterns to identify the actual application platform.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 18:13:32 -08:00
parent cc0d15ece7
commit 69eb6b124f
2 changed files with 75 additions and 2 deletions

View File

@@ -20,11 +20,11 @@ const origStderrWrite = process.stderr.write.bind(process.stderr);
process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return origStdoutWrite(chunk, ...args); }; process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return origStdoutWrite(chunk, ...args); };
process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); }; process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); };
import { addJobs, loadQueue, loadConfig } from './lib/queue.mjs'; import { addJobs, loadQueue, loadConfig, getJobsByStatus, updateJobStatus } from './lib/queue.mjs';
import { writeFileSync, readFileSync, existsSync } from 'fs'; import { writeFileSync, readFileSync, existsSync } from 'fs';
import { acquireLock } from './lib/lock.mjs'; import { acquireLock } from './lib/lock.mjs';
import { createBrowser } from './lib/browser.mjs'; import { createBrowser } from './lib/browser.mjs';
import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs'; import { verifyLogin as liLogin, searchLinkedIn, classifyExternalJobs } from './lib/linkedin.mjs';
import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs'; import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
import { sendTelegram, formatSearchSummary } from './lib/notify.mjs'; import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs'; import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
@@ -202,6 +202,16 @@ async function main() {
} }
platformsRun.push('LinkedIn'); platformsRun.push('LinkedIn');
// Classify unknown_external jobs using the existing LinkedIn browser session
const unclassified = getJobsByStatus('new').filter(j => j.apply_type === 'unknown_external');
if (unclassified.length > 0) {
console.log(`\n🔍 Classifying ${unclassified.length} external jobs...`);
const { classified, remaining } = await classifyExternalJobs(liBrowser.page, unclassified, (job, applyType, applyUrl) => {
updateJobStatus(job.id, 'new', { apply_type: applyType, apply_url: applyUrl });
});
console.log(` ✅ Classified ${classified}, ${remaining} still unknown`);
}
} catch (e) { } catch (e) {
console.error(` ❌ LinkedIn error: ${e.message}`); console.error(` ❌ LinkedIn error: ${e.message}`);
if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`); if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`);

View File

@@ -143,3 +143,66 @@ export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
return jobs; return jobs;
} }
/**
* Classify unknown_external jobs by following the Apply button redirect.
* Visits each job page, clicks Apply, captures the redirect URL, and
* matches against known ATS patterns.
*
* @param {Page} page — authenticated LinkedIn browser page
* @param {Array} jobs — jobs with apply_type 'unknown_external'
* @param {Function} onClassified — callback(job, apply_type, apply_url) for each classified job
* @returns {{ classified: number, remaining: number }}
*/
export async function classifyExternalJobs(page, jobs, onClassified) {
let classified = 0;
let remaining = 0;
for (const job of jobs) {
const url = job.url || job.apply_url;
if (!url) { remaining++; continue; }
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(2000);
// Click the Apply button and catch the new tab
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|| await page.$('a.jobs-apply-button');
if (!applyBtn) { remaining++; continue; }
const [newPage] = await Promise.all([
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
applyBtn.click(),
]);
let externalUrl = null;
if (newPage) {
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
externalUrl = newPage.url();
await newPage.close().catch(() => {});
}
if (!externalUrl || externalUrl.includes('linkedin.com')) {
remaining++;
continue;
}
// Match against ATS patterns
let applyType = 'unknown_external';
for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) {
if (pattern.test(externalUrl)) {
applyType = name;
break;
}
}
onClassified(job, applyType, externalUrl);
classified++;
} catch {
remaining++;
}
}
return { classified, remaining };
}