refactor: classify apply_type inline during search (click card → detect right panel); no second pass

This commit is contained in:
2026-03-06 01:00:59 +00:00
parent dee6e98603
commit 2574276a85
3 changed files with 115 additions and 71 deletions

View File

@@ -51,6 +51,22 @@ export const SEARCH_RESULTS_MAX = 30;
export const TELEGRAM_API_BASE = 'https://api.telegram.org/bot';
export const NOTIFY_RATE_LIMIT_MS = 1500;
// --- ATS platforms (for URL-based detection) ---
export const EXTERNAL_ATS_PATTERNS = [
{ name: 'greenhouse', pattern: /greenhouse\.io/i },
{ name: 'lever', pattern: /lever\.co/i },
{ name: 'workday', pattern: /workday\.com|myworkdayjobs\.com/i },
{ name: 'ashby', pattern: /ashbyhq\.com/i },
{ name: 'jobvite', pattern: /jobvite\.com/i },
{ name: 'smartrecruiters', pattern: /smartrecruiters\.com/i },
{ name: 'icims', pattern: /icims\.com/i },
{ name: 'taleo', pattern: /taleo\.net/i },
{ name: 'bamboohr', pattern: /bamboohr\.com/i },
{ name: 'rippling', pattern: /rippling\.com/i },
{ name: 'workable', pattern: /workable\.com/i },
{ name: 'dover', pattern: /dover\.com/i },
];
// --- Queue ---
export const DEFAULT_REVIEW_WINDOW_MINUTES = 30;
export const DEFAULT_MAX_RETRIES = 2;

View File

@@ -8,11 +8,19 @@ import {
LINKEDIN_EASY_APPLY_MODAL_SELECTOR, LINKEDIN_APPLY_BUTTON_SELECTOR,
LINKEDIN_SUBMIT_SELECTOR, LINKEDIN_NEXT_SELECTOR,
LINKEDIN_REVIEW_SELECTOR, LINKEDIN_DISMISS_SELECTOR,
LINKEDIN_MAX_MODAL_STEPS
LINKEDIN_MAX_MODAL_STEPS, EXTERNAL_ATS_PATTERNS
} from './constants.mjs';
const MAX_SEARCH_PAGES = 40;
function detectAts(url) {
if (!url) return 'unknown_external';
for (const { name, pattern } of EXTERNAL_ATS_PATTERNS) {
if (pattern.test(url)) return name;
}
return 'unknown_external';
}
export async function verifyLogin(page) {
await page.goto(`${LINKEDIN_BASE}/feed/`, { waitUntil: 'domcontentloaded', timeout: FEED_NAVIGATION_TIMEOUT });
await page.waitForTimeout(CLICK_WAIT);
@@ -21,57 +29,105 @@ export async function verifyLogin(page) {
export async function searchLinkedIn(page, search, { onPage } = {}) {
const jobs = [];
const seenIds = new Set();
for (const keyword of search.keywords) {
const params = new URLSearchParams({ keywords: keyword, sortBy: 'DD' });
if (search.filters?.remote) params.set('f_WT', '2');
if (search.filters?.easy_apply_only) params.set('f_LF', 'f_AL');
if (search.filters?.posted_within_days) {
const seconds = (search.filters.posted_within_days * 86400);
params.set('f_TPR', `r${seconds}`);
params.set('f_TPR', `r${search.filters.posted_within_days * 86400}`);
}
const url = `${LINKEDIN_BASE}/jobs/search/?${params.toString()}`;
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(PAGE_LOAD_WAIT);
// Paginate through all result pages
let pageNum = 0;
while (pageNum < MAX_SEARCH_PAGES) {
// Scroll to load all cards on current page
// Scroll to load all cards
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await page.waitForTimeout(SCROLL_WAIT);
const found = await page.evaluate(({ track, excludes }) => {
const ids = [...new Set(
// Get all job IDs on this page
const pageIds = await page.evaluate(() =>
[...new Set(
Array.from(document.querySelectorAll('a[href*="/jobs/view/"]'))
.map(a => a.href.match(/\/jobs\/view\/(\d+)/)?.[1])
.filter(Boolean)
)];
)]
);
return ids.map(id => {
const link = document.querySelector(`a[href*="/jobs/view/${id}"]`);
const container = link?.closest('li') || link?.parentElement;
const title = container?.querySelector('strong, [class*="title"], h3')?.textContent?.trim()
|| link?.textContent?.trim() || '';
const company = container?.querySelector('[class*="company"], [class*="subtitle"], h4')?.textContent?.trim() || '';
const location = container?.querySelector('[class*="location"]')?.textContent?.trim() || '';
const pageJobs = [];
const titleLower = title.toLowerCase();
const companyLower = company.toLowerCase();
for (const jobId of pageIds) {
if (seenIds.has(jobId)) continue;
// Click the job card to load right panel
try {
await page.evaluate((id) => {
const link = document.querySelector(`a[href*="/jobs/view/${id}"]`);
link?.closest('li')?.click() || link?.click();
}, jobId);
await page.waitForTimeout(CLICK_WAIT);
} catch {}
// Read title, company, location from detail panel (more accurate)
const meta = await page.evaluate(({ id, track, excludes }) => {
const panel = document.querySelector('.jobs-unified-top-card, .job-details-jobs-unified-top-card__job-title');
const title = document.querySelector('.job-details-jobs-unified-top-card__job-title, h1[class*="title"]')?.textContent?.trim()
|| document.querySelector('.jobs-unified-top-card__job-title')?.textContent?.trim() || '';
const company = document.querySelector('.job-details-jobs-unified-top-card__company-name a, .jobs-unified-top-card__company-name a')?.textContent?.trim() || '';
const location = document.querySelector('.job-details-jobs-unified-top-card__bullet, .jobs-unified-top-card__bullet')?.textContent?.trim() || '';
const tl = title.toLowerCase(), cl = company.toLowerCase();
for (const ex of excludes) {
if (titleLower.includes(ex.toLowerCase()) || companyLower.includes(ex.toLowerCase())) return null;
if (tl.includes(ex.toLowerCase()) || cl.includes(ex.toLowerCase())) return null;
}
return { title, company, location };
}, { id: jobId, track: search.track, excludes: search.exclude_keywords || [] });
if (!meta) { seenIds.add(jobId); continue; } // excluded
// Detect apply type from right panel
const applyInfo = await page.evaluate(({ atsPatterns }) => {
const eaBtn = document.querySelector('button.jobs-apply-button[aria-label*="Easy Apply"]');
if (eaBtn) return { apply_type: 'easy_apply', apply_url: null };
const interestedBtn = document.querySelector('button[aria-label*="interested"]');
if (interestedBtn) return { apply_type: 'recruiter_only', apply_url: null };
// Look for external ATS link
const allLinks = Array.from(document.querySelectorAll('a[href]')).map(a => a.href);
for (const href of allLinks) {
for (const { name, pattern } of atsPatterns) {
if (new RegExp(pattern).test(href)) return { apply_type: name, apply_url: href };
}
}
return { id: `li_${id}`, platform: 'linkedin', track, title, company, location,
url: `https://www.linkedin.com/jobs/view/${id}/`, jobId: id };
}).filter(Boolean);
}, { track: search.track, excludes: search.exclude_keywords || [] });
const externalBtn = document.querySelector('button.jobs-apply-button:not([aria-label*="Easy Apply"])');
if (externalBtn) return { apply_type: 'unknown_external', apply_url: null };
jobs.push(...found);
if (found.length > 0 && onPage) onPage(found);
return { apply_type: 'unknown', apply_url: null };
}, { atsPatterns: EXTERNAL_ATS_PATTERNS.map(({ name, pattern }) => ({ name, pattern: pattern.source })) });
seenIds.add(jobId);
const job = {
id: `li_${jobId}`,
platform: 'linkedin',
track: search.track,
jobId,
url: `https://www.linkedin.com/jobs/view/${jobId}/`,
classified_at: Date.now(),
...meta,
...applyInfo,
};
pageJobs.push(job);
jobs.push(job);
}
if (pageJobs.length > 0 && onPage) onPage(pageJobs);
// Click next page button
const nextBtn = await page.$('button[aria-label="View next page"]');
if (!nextBtn) break;
await nextBtn.click();
@@ -80,38 +136,43 @@ export async function searchLinkedIn(page, search, { onPage } = {}) {
}
}
// Dedupe by jobId
const seen = new Set();
return jobs.filter(j => { if (seen.has(j.id)) return false; seen.add(j.id); return true; });
return jobs;
}
export async function applyLinkedIn(page, job, formFiller) {
// Navigate directly to job page
// Use pre-classified apply_type from searcher if available
const meta = { title: job.title, company: job.company };
// Route by apply_type — no re-detection needed if already classified
if (job.apply_type && job.apply_type !== 'easy_apply' && job.apply_type !== 'unknown') {
if (job.apply_type === 'recruiter_only') return { status: 'skipped_recruiter_only', meta };
// External ATS — skip for now, already have the URL
return { status: 'skipped_external_unsupported', meta, externalUrl: job.apply_url || '' };
}
// Navigate to job page
await page.goto(job.url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(PAGE_LOAD_WAIT);
// Get title/company from detail panel
const meta = await page.evaluate(() => ({
// Re-read meta from page (more accurate title/company)
const pageMeta = await page.evaluate(() => ({
title: document.querySelector('.job-details-jobs-unified-top-card__job-title, h1[class*="title"]')?.textContent?.trim(),
company: document.querySelector('.job-details-jobs-unified-top-card__company-name a, .jobs-unified-top-card__company-name a')?.textContent?.trim(),
}));
Object.assign(meta, pageMeta);
// Detect apply type
// Verify Easy Apply button is present (classify may have been wrong)
const eaBtn = await page.$(`${LINKEDIN_APPLY_BUTTON_SELECTOR}[aria-label*="Easy Apply"]`);
const interestedBtn = await page.$('button[aria-label*="interested"]');
const externalBtn = await page.$(`${LINKEDIN_APPLY_BUTTON_SELECTOR}:not([aria-label*="Easy Apply"])`);
const interestedBtn = await page.$('button[aria-label*="interested"], button:has-text("I\'m interested")');
if (!eaBtn && interestedBtn) return { status: 'skipped_recruiter_only', meta };
if (!eaBtn && externalBtn) {
// Capture the external apply URL for ATS analysis
const externalUrl = await externalBtn.evaluate(el => el.getAttribute('href') || el.dataset?.href || '')
.catch(() => '');
// Also check for redirect links in the page
const applyLink = await page.evaluate(() => {
const a = document.querySelector('a[href*="greenhouse"], a[href*="lever"], a[href*="workday"], a[href*="ashby"], a[href*="jobvite"], a[href*="smartrecruiters"], a[href*="icims"], a[href*="taleo"]');
return a?.href || '';
}).catch(() => '');
return { status: 'skipped_external_unsupported', meta, externalUrl: applyLink || externalUrl };
return { status: 'skipped_external_unsupported', meta, externalUrl: applyLink };
}
if (!eaBtn) return { status: 'skipped_easy_apply_unsupported', meta };