diff --git a/job_searcher.mjs b/job_searcher.mjs index 3a20023..0668055 100644 --- a/job_searcher.mjs +++ b/job_searcher.mjs @@ -20,11 +20,11 @@ const origStderrWrite = process.stderr.write.bind(process.stderr); process.stdout.write = (chunk, ...args) => { logStream.write(chunk); return origStdoutWrite(chunk, ...args); }; process.stderr.write = (chunk, ...args) => { logStream.write(chunk); return origStderrWrite(chunk, ...args); }; -import { addJobs, loadQueue, loadConfig } from './lib/queue.mjs'; +import { addJobs, loadQueue, loadConfig, getJobsByStatus, updateJobStatus } from './lib/queue.mjs'; import { writeFileSync, readFileSync, existsSync } from 'fs'; import { acquireLock } from './lib/lock.mjs'; import { createBrowser } from './lib/browser.mjs'; -import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs'; +import { verifyLogin as liLogin, searchLinkedIn, classifyExternalJobs } from './lib/linkedin.mjs'; import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs'; import { sendTelegram, formatSearchSummary } from './lib/notify.mjs'; import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs'; @@ -202,6 +202,16 @@ async function main() { } platformsRun.push('LinkedIn'); + + // Classify unknown_external jobs using the existing LinkedIn browser session + const unclassified = getJobsByStatus('new').filter(j => j.apply_type === 'unknown_external'); + if (unclassified.length > 0) { + console.log(`\nšŸ” Classifying ${unclassified.length} external jobs...`); + const { classified, remaining } = await classifyExternalJobs(liBrowser.page, unclassified, (job, applyType, applyUrl) => { + updateJobStatus(job.id, 'new', { apply_type: applyType, apply_url: applyUrl }); + }); + console.log(` āœ… Classified ${classified}, ${remaining} still unknown`); + } } catch (e) { console.error(` āŒ LinkedIn error: ${e.message}`); if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`); diff --git a/lib/linkedin.mjs b/lib/linkedin.mjs index 51ebdbe..63b9ec6 100644 --- a/lib/linkedin.mjs +++ b/lib/linkedin.mjs @@ -143,3 +143,66 @@ export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) { return jobs; } + +/** + * Classify unknown_external jobs by following the Apply button redirect. + * Visits each job page, clicks Apply, captures the redirect URL, and + * matches against known ATS patterns. + * + * @param {Page} page — authenticated LinkedIn browser page + * @param {Array} jobs — jobs with apply_type 'unknown_external' + * @param {Function} onClassified — callback(job, apply_type, apply_url) for each classified job + * @returns {{ classified: number, remaining: number }} + */ +export async function classifyExternalJobs(page, jobs, onClassified) { + let classified = 0; + let remaining = 0; + + for (const job of jobs) { + const url = job.url || job.apply_url; + if (!url) { remaining++; continue; } + + try { + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); + await page.waitForTimeout(2000); + + // Click the Apply button and catch the new tab + const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])') + || await page.$('a.jobs-apply-button'); + if (!applyBtn) { remaining++; continue; } + + const [newPage] = await Promise.all([ + page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null), + applyBtn.click(), + ]); + + let externalUrl = null; + if (newPage) { + await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {}); + externalUrl = newPage.url(); + await newPage.close().catch(() => {}); + } + + if (!externalUrl || externalUrl.includes('linkedin.com')) { + remaining++; + continue; + } + + // Match against ATS patterns + let applyType = 'unknown_external'; + for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) { + if (pattern.test(externalUrl)) { + applyType = name; + break; + } + } + + onClassified(job, applyType, externalUrl); + classified++; + } catch { + remaining++; + } + } + + return { classified, remaining }; +}