Classify unknown_external jobs by following Apply redirects
After LinkedIn search completes, visits each unknown_external job page, clicks the Apply button, captures the redirect URL, and matches against known ATS patterns to identify the actual application platform. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -20,11 +20,11 @@ const origStderrWrite = process.stderr.write.bind(process.stderr);
|
|||||||
process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return origStdoutWrite(chunk, ...args); };
|
process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return origStdoutWrite(chunk, ...args); };
|
||||||
process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); };
|
process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); };
|
||||||
|
|
||||||
import { addJobs, loadQueue, loadConfig } from './lib/queue.mjs';
|
import { addJobs, loadQueue, loadConfig, getJobsByStatus, updateJobStatus } from './lib/queue.mjs';
|
||||||
import { writeFileSync, readFileSync, existsSync } from 'fs';
|
import { writeFileSync, readFileSync, existsSync } from 'fs';
|
||||||
import { acquireLock } from './lib/lock.mjs';
|
import { acquireLock } from './lib/lock.mjs';
|
||||||
import { createBrowser } from './lib/browser.mjs';
|
import { createBrowser } from './lib/browser.mjs';
|
||||||
import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs';
|
import { verifyLogin as liLogin, searchLinkedIn, classifyExternalJobs } from './lib/linkedin.mjs';
|
||||||
import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
|
import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
|
||||||
import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
|
import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
|
||||||
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
|
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
|
||||||
@@ -202,6 +202,16 @@ async function main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
platformsRun.push('LinkedIn');
|
platformsRun.push('LinkedIn');
|
||||||
|
|
||||||
|
// Classify unknown_external jobs using the existing LinkedIn browser session
|
||||||
|
const unclassified = getJobsByStatus('new').filter(j => j.apply_type === 'unknown_external');
|
||||||
|
if (unclassified.length > 0) {
|
||||||
|
console.log(`\n🔍 Classifying ${unclassified.length} external jobs...`);
|
||||||
|
const { classified, remaining } = await classifyExternalJobs(liBrowser.page, unclassified, (job, applyType, applyUrl) => {
|
||||||
|
updateJobStatus(job.id, 'new', { apply_type: applyType, apply_url: applyUrl });
|
||||||
|
});
|
||||||
|
console.log(` ✅ Classified ${classified}, ${remaining} still unknown`);
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(` ❌ LinkedIn error: ${e.message}`);
|
console.error(` ❌ LinkedIn error: ${e.message}`);
|
||||||
if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`);
|
if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`);
|
||||||
|
|||||||
@@ -143,3 +143,66 @@ export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
|
|||||||
|
|
||||||
return jobs;
|
return jobs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classify unknown_external jobs by following the Apply button redirect.
|
||||||
|
* Visits each job page, clicks Apply, captures the redirect URL, and
|
||||||
|
* matches against known ATS patterns.
|
||||||
|
*
|
||||||
|
* @param {Page} page — authenticated LinkedIn browser page
|
||||||
|
* @param {Array} jobs — jobs with apply_type 'unknown_external'
|
||||||
|
* @param {Function} onClassified — callback(job, apply_type, apply_url) for each classified job
|
||||||
|
* @returns {{ classified: number, remaining: number }}
|
||||||
|
*/
|
||||||
|
export async function classifyExternalJobs(page, jobs, onClassified) {
|
||||||
|
let classified = 0;
|
||||||
|
let remaining = 0;
|
||||||
|
|
||||||
|
for (const job of jobs) {
|
||||||
|
const url = job.url || job.apply_url;
|
||||||
|
if (!url) { remaining++; continue; }
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
// Click the Apply button and catch the new tab
|
||||||
|
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|
||||||
|
|| await page.$('a.jobs-apply-button');
|
||||||
|
if (!applyBtn) { remaining++; continue; }
|
||||||
|
|
||||||
|
const [newPage] = await Promise.all([
|
||||||
|
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
|
||||||
|
applyBtn.click(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let externalUrl = null;
|
||||||
|
if (newPage) {
|
||||||
|
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
|
||||||
|
externalUrl = newPage.url();
|
||||||
|
await newPage.close().catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!externalUrl || externalUrl.includes('linkedin.com')) {
|
||||||
|
remaining++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match against ATS patterns
|
||||||
|
let applyType = 'unknown_external';
|
||||||
|
for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) {
|
||||||
|
if (pattern.test(externalUrl)) {
|
||||||
|
applyType = name;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onClassified(job, applyType, externalUrl);
|
||||||
|
classified++;
|
||||||
|
} catch {
|
||||||
|
remaining++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { classified, remaining };
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user