diff --git a/job_searcher.mjs b/job_searcher.mjs index 5c9b951..58a1b2f 100644 --- a/job_searcher.mjs +++ b/job_searcher.mjs @@ -19,8 +19,6 @@ import { sendTelegram, formatSearchSummary } from './lib/notify.mjs'; import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs'; import { generateKeywords } from './lib/keywords.mjs'; import { initProgress, isCompleted, markComplete } from './lib/search_progress.mjs'; -import { classifyBatch } from './lib/classifier.mjs'; -import { getJobsByStatus, updateJobStatus } from './lib/queue.mjs'; async function main() { const lock = acquireLock('searcher', resolve(__dir, 'data')); @@ -170,37 +168,6 @@ async function main() { } } - // --- Phase 2: Classify new jobs --- - const unclassified = getJobsByStatus('new').filter(j => !j.apply_type); - if (unclassified.length > 0) { - console.log(`\nšŸ”Ž Phase 2: Classifying ${unclassified.length} jobs...`); - let liBrowser2; - try { - liBrowser2 = await createBrowser(settings, 'linkedin'); - await liLogin(liBrowser2.page); - let done = 0; - const liJobs = unclassified.filter(j => j.platform === 'linkedin'); - await classifyBatch(liBrowser2.page, liJobs, { - onClassified: (job) => { - updateJobStatus(job.id, 'new', { apply_type: job.apply_type, apply_url: job.apply_url, classified_at: job.classified_at }); - done++; - process.stdout.write(`\r Classified ${done}/${liJobs.length} — last: ${job.apply_type} (${job.title?.substring(0, 30)})`); - } - }); - console.log(`\r āœ… ${liJobs.length} LinkedIn jobs classified`); - } catch (e) { - console.error(` āŒ Classification error: ${e.message}`); - } finally { - await liBrowser2?.browser?.close().catch(() => {}); - } - // Wellfound jobs default to easy_apply (Wellfound uses its own apply flow) - const wfJobs = unclassified.filter(j => j.platform === 'wellfound'); - for (const job of wfJobs) { - updateJobStatus(job.id, 'new', { apply_type: 'wellfound_apply', classified_at: Date.now() }); - } - if (wfJobs.length > 0) console.log(` āœ… ${wfJobs.length} Wellfound jobs marked for apply`); - } - // Summary const summary = formatSearchSummary(totalAdded, totalSeen - totalAdded, platformsRun); console.log(`\n${summary.replace(/\*/g, '')}`); diff --git a/lib/constants.mjs b/lib/constants.mjs index 302d09d..1436ef5 100644 --- a/lib/constants.mjs +++ b/lib/constants.mjs @@ -51,6 +51,22 @@ export const SEARCH_RESULTS_MAX = 30; export const TELEGRAM_API_BASE = 'https://api.telegram.org/bot'; export const NOTIFY_RATE_LIMIT_MS = 1500; +// --- ATS platforms (for URL-based detection) --- +export const EXTERNAL_ATS_PATTERNS = [ + { name: 'greenhouse', pattern: /greenhouse\.io/i }, + { name: 'lever', pattern: /lever\.co/i }, + { name: 'workday', pattern: /workday\.com|myworkdayjobs\.com/i }, + { name: 'ashby', pattern: /ashbyhq\.com/i }, + { name: 'jobvite', pattern: /jobvite\.com/i }, + { name: 'smartrecruiters', pattern: /smartrecruiters\.com/i }, + { name: 'icims', pattern: /icims\.com/i }, + { name: 'taleo', pattern: /taleo\.net/i }, + { name: 'bamboohr', pattern: /bamboohr\.com/i }, + { name: 'rippling', pattern: /rippling\.com/i }, + { name: 'workable', pattern: /workable\.com/i }, + { name: 'dover', pattern: /dover\.com/i }, +]; + // --- Queue --- export const DEFAULT_REVIEW_WINDOW_MINUTES = 30; export const DEFAULT_MAX_RETRIES = 2; diff --git a/lib/linkedin.mjs b/lib/linkedin.mjs index cfd0558..770cc24 100644 --- a/lib/linkedin.mjs +++ b/lib/linkedin.mjs @@ -8,11 +8,19 @@ import { LINKEDIN_EASY_APPLY_MODAL_SELECTOR, LINKEDIN_APPLY_BUTTON_SELECTOR, LINKEDIN_SUBMIT_SELECTOR, LINKEDIN_NEXT_SELECTOR, LINKEDIN_REVIEW_SELECTOR, LINKEDIN_DISMISS_SELECTOR, - LINKEDIN_MAX_MODAL_STEPS + LINKEDIN_MAX_MODAL_STEPS, EXTERNAL_ATS_PATTERNS } from './constants.mjs'; const MAX_SEARCH_PAGES = 40; +function detectAts(url) { + if (!url) return 'unknown_external'; + for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) { + if (pattern.test(url)) return name; + } + return 'unknown_external'; +} + export async function verifyLogin(page) { await page.goto(`${LINKEDIN_BASE}/feed/`, { waitUntil: 'domcontentloaded', timeout: FEED_NAVIGATION_TIMEOUT }); await page.waitForTimeout(CLICK_WAIT); @@ -21,57 +29,105 @@ export async function verifyLogin(page) { export async function searchLinkedIn(page, search, { onPage } = {}) { const jobs = []; + const seenIds = new Set(); for (const keyword of search.keywords) { const params = new URLSearchParams({ keywords: keyword, sortBy: 'DD' }); if (search.filters?.remote) params.set('f_WT', '2'); if (search.filters?.easy_apply_only) params.set('f_LF', 'f_AL'); if (search.filters?.posted_within_days) { - const seconds = (search.filters.posted_within_days * 86400); - params.set('f_TPR', `r${seconds}`); + params.set('f_TPR', `r${search.filters.posted_within_days * 86400}`); } const url = `${LINKEDIN_BASE}/jobs/search/?${params.toString()}`; await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); await page.waitForTimeout(PAGE_LOAD_WAIT); - // Paginate through all result pages let pageNum = 0; while (pageNum < MAX_SEARCH_PAGES) { - // Scroll to load all cards on current page + // Scroll to load all cards await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(SCROLL_WAIT); - const found = await page.evaluate(({ track, excludes }) => { - const ids = [...new Set( + // Get all job IDs on this page + const pageIds = await page.evaluate(() => + [...new Set( Array.from(document.querySelectorAll('a[href*="/jobs/view/"]')) .map(a => a.href.match(/\/jobs\/view\/(\d+)/)?.[1]) .filter(Boolean) - )]; + )] + ); - return ids.map(id => { - const link = document.querySelector(`a[href*="/jobs/view/${id}"]`); - const container = link?.closest('li') || link?.parentElement; - const title = container?.querySelector('strong, [class*="title"], h3')?.textContent?.trim() - || link?.textContent?.trim() || ''; - const company = container?.querySelector('[class*="company"], [class*="subtitle"], h4')?.textContent?.trim() || ''; - const location = container?.querySelector('[class*="location"]')?.textContent?.trim() || ''; + const pageJobs = []; - const titleLower = title.toLowerCase(); - const companyLower = company.toLowerCase(); + for (const jobId of pageIds) { + if (seenIds.has(jobId)) continue; + + // Click the job card to load right panel + try { + await page.evaluate((id) => { + const link = document.querySelector(`a[href*="/jobs/view/${id}"]`); + link?.closest('li')?.click() || link?.click(); + }, jobId); + await page.waitForTimeout(CLICK_WAIT); + } catch {} + + // Read title, company, location from detail panel (more accurate) + const meta = await page.evaluate(({ id, track, excludes }) => { + const panel = document.querySelector('.jobs-unified-top-card, .job-details-jobs-unified-top-card__job-title'); + const title = document.querySelector('.job-details-jobs-unified-top-card__job-title, h1[class*="title"]')?.textContent?.trim() + || document.querySelector('.jobs-unified-top-card__job-title')?.textContent?.trim() || ''; + const company = document.querySelector('.job-details-jobs-unified-top-card__company-name a, .jobs-unified-top-card__company-name a')?.textContent?.trim() || ''; + const location = document.querySelector('.job-details-jobs-unified-top-card__bullet, .jobs-unified-top-card__bullet')?.textContent?.trim() || ''; + + const tl = title.toLowerCase(), cl = company.toLowerCase(); for (const ex of excludes) { - if (titleLower.includes(ex.toLowerCase()) || companyLower.includes(ex.toLowerCase())) return null; + if (tl.includes(ex.toLowerCase()) || cl.includes(ex.toLowerCase())) return null; + } + return { title, company, location }; + }, { id: jobId, track: search.track, excludes: search.exclude_keywords || [] }); + + if (!meta) { seenIds.add(jobId); continue; } // excluded + + // Detect apply type from right panel + const applyInfo = await page.evaluate(({ atsPatterns }) => { + const eaBtn = document.querySelector('button.jobs-apply-button[aria-label*="Easy Apply"]'); + if (eaBtn) return { apply_type: 'easy_apply', apply_url: null }; + + const interestedBtn = document.querySelector('button[aria-label*="interested"]'); + if (interestedBtn) return { apply_type: 'recruiter_only', apply_url: null }; + + // Look for external ATS link + const allLinks = Array.from(document.querySelectorAll('a[href]')).map(a => a.href); + for (const href of allLinks) { + for (const { name, pattern } of atsPatterns) { + if (new RegExp(pattern).test(href)) return { apply_type: name, apply_url: href }; + } } - return { id: `li_${id}`, platform: 'linkedin', track, title, company, location, - url: `https://www.linkedin.com/jobs/view/${id}/`, jobId: id }; - }).filter(Boolean); - }, { track: search.track, excludes: search.exclude_keywords || [] }); + const externalBtn = document.querySelector('button.jobs-apply-button:not([aria-label*="Easy Apply"])'); + if (externalBtn) return { apply_type: 'unknown_external', apply_url: null }; - jobs.push(...found); - if (found.length > 0 && onPage) onPage(found); + return { apply_type: 'unknown', apply_url: null }; + }, { atsPatterns: EXTERNAL_ATS_PATTERNS.map(({ name, pattern }) => ({ name, pattern: pattern.source })) }); + + seenIds.add(jobId); + const job = { + id: `li_${jobId}`, + platform: 'linkedin', + track: search.track, + jobId, + url: `https://www.linkedin.com/jobs/view/${jobId}/`, + classified_at: Date.now(), + ...meta, + ...applyInfo, + }; + pageJobs.push(job); + jobs.push(job); + } + + if (pageJobs.length > 0 && onPage) onPage(pageJobs); - // Click next page button const nextBtn = await page.$('button[aria-label="View next page"]'); if (!nextBtn) break; await nextBtn.click(); @@ -80,38 +136,43 @@ export async function searchLinkedIn(page, search, { onPage } = {}) { } } - // Dedupe by jobId - const seen = new Set(); - return jobs.filter(j => { if (seen.has(j.id)) return false; seen.add(j.id); return true; }); + return jobs; } export async function applyLinkedIn(page, job, formFiller) { - // Navigate directly to job page + // Use pre-classified apply_type from searcher if available + const meta = { title: job.title, company: job.company }; + + // Route by apply_type — no re-detection needed if already classified + if (job.apply_type && job.apply_type !== 'easy_apply' && job.apply_type !== 'unknown') { + if (job.apply_type === 'recruiter_only') return { status: 'skipped_recruiter_only', meta }; + // External ATS — skip for now, already have the URL + return { status: 'skipped_external_unsupported', meta, externalUrl: job.apply_url || '' }; + } + + // Navigate to job page await page.goto(job.url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); await page.waitForTimeout(PAGE_LOAD_WAIT); - // Get title/company from detail panel - const meta = await page.evaluate(() => ({ + // Re-read meta from page (more accurate title/company) + const pageMeta = await page.evaluate(() => ({ title: document.querySelector('.job-details-jobs-unified-top-card__job-title, h1[class*="title"]')?.textContent?.trim(), company: document.querySelector('.job-details-jobs-unified-top-card__company-name a, .jobs-unified-top-card__company-name a')?.textContent?.trim(), })); + Object.assign(meta, pageMeta); - // Detect apply type + // Verify Easy Apply button is present (classify may have been wrong) const eaBtn = await page.$(`${LINKEDIN_APPLY_BUTTON_SELECTOR}[aria-label*="Easy Apply"]`); + const interestedBtn = await page.$('button[aria-label*="interested"]'); const externalBtn = await page.$(`${LINKEDIN_APPLY_BUTTON_SELECTOR}:not([aria-label*="Easy Apply"])`); - const interestedBtn = await page.$('button[aria-label*="interested"], button:has-text("I\'m interested")'); if (!eaBtn && interestedBtn) return { status: 'skipped_recruiter_only', meta }; if (!eaBtn && externalBtn) { - // Capture the external apply URL for ATS analysis - const externalUrl = await externalBtn.evaluate(el => el.getAttribute('href') || el.dataset?.href || '') - .catch(() => ''); - // Also check for redirect links in the page const applyLink = await page.evaluate(() => { const a = document.querySelector('a[href*="greenhouse"], a[href*="lever"], a[href*="workday"], a[href*="ashby"], a[href*="jobvite"], a[href*="smartrecruiters"], a[href*="icims"], a[href*="taleo"]'); return a?.href || ''; }).catch(() => ''); - return { status: 'skipped_external_unsupported', meta, externalUrl: applyLink || externalUrl }; + return { status: 'skipped_external_unsupported', meta, externalUrl: applyLink }; } if (!eaBtn) return { status: 'skipped_easy_apply_unsupported', meta };