feat: searcher Phase 2 classifies apply type; applier sorts by priority; already-applied detection
This commit is contained in:
@@ -10,7 +10,7 @@ import { fileURLToPath } from 'url';
|
||||
|
||||
const __dir = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
import { getJobsByStatus, updateJobStatus, appendLog, loadConfig } from './lib/queue.mjs';
|
||||
import { getJobsByStatus, updateJobStatus, appendLog, loadConfig, isAlreadyApplied } from './lib/queue.mjs';
|
||||
import { writeFileSync } from 'fs';
|
||||
import { acquireLock } from './lib/lock.mjs';
|
||||
import { createBrowser } from './lib/browser.mjs';
|
||||
@@ -59,10 +59,21 @@ async function main() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get jobs to process: new + needs_answer (retries)
|
||||
const allJobs = getJobsByStatus(['new', 'needs_answer']);
|
||||
// Priority order for apply types
|
||||
const APPLY_PRIORITY = ['easy_apply', 'wellfound_apply', 'greenhouse', 'lever', 'ashby', 'workday', 'unknown_external'];
|
||||
|
||||
// Get jobs to process: new + needs_answer, sorted by apply_type priority
|
||||
const allJobs = getJobsByStatus(['new', 'needs_answer'])
|
||||
.sort((a, b) => {
|
||||
const ap = APPLY_PRIORITY.indexOf(a.apply_type ?? 'unknown_external');
|
||||
const bp = APPLY_PRIORITY.indexOf(b.apply_type ?? 'unknown_external');
|
||||
return (ap === -1 ? 99 : ap) - (bp === -1 ? 99 : bp);
|
||||
});
|
||||
const jobs = allJobs.slice(0, maxApps);
|
||||
console.log(`📋 ${jobs.length} job(s) to process${allJobs.length > jobs.length ? ` (capped from ${allJobs.length})` : ''}\n`);
|
||||
const typeSummary = Object.entries(
|
||||
jobs.reduce((acc, j) => { acc[j.apply_type || 'unclassified'] = (acc[j.apply_type || 'unclassified'] || 0) + 1; return acc; }, {})
|
||||
).map(([k, v]) => `${v} ${k}`).join(', ');
|
||||
console.log(`📋 ${jobs.length} job(s) to process — ${typeSummary}\n`);
|
||||
|
||||
if (jobs.length === 0) {
|
||||
console.log('Nothing to apply to. Run job_searcher.mjs first.');
|
||||
@@ -72,7 +83,7 @@ async function main() {
|
||||
const results = {
|
||||
submitted: 0, failed: 0, needs_answer: 0, total: jobs.length,
|
||||
skipped_recruiter: 0, skipped_external: 0, skipped_no_easy_apply: 0,
|
||||
atsCounts: {}
|
||||
already_applied: 0, atsCounts: {}
|
||||
};
|
||||
|
||||
// Group by platform
|
||||
@@ -90,7 +101,12 @@ async function main() {
|
||||
console.log(' ✅ Logged in\n');
|
||||
|
||||
for (const job of liJobs) {
|
||||
console.log(` → ${job.title} @ ${job.company || '?'}`);
|
||||
if (isAlreadyApplied(job.id)) {
|
||||
console.log(` ⏭️ Already applied — ${job.title} @ ${job.company || '?'}`);
|
||||
updateJobStatus(job.id, 'already_applied', {});
|
||||
continue;
|
||||
}
|
||||
console.log(` → ${job.title} @ ${job.company || '?'} [${job.apply_type || 'unclassified'}]`);
|
||||
try {
|
||||
const result = await applyLinkedIn(liBrowser.page, job, formFiller);
|
||||
await handleResult(job, result, results, settings);
|
||||
@@ -117,7 +133,12 @@ async function main() {
|
||||
console.log(' ✅ Started\n');
|
||||
|
||||
for (const job of wfJobs) {
|
||||
console.log(` → ${job.title} @ ${job.company || '?'}`);
|
||||
if (isAlreadyApplied(job.id)) {
|
||||
console.log(` ⏭️ Already applied — ${job.title} @ ${job.company || '?'}`);
|
||||
updateJobStatus(job.id, 'already_applied', {});
|
||||
continue;
|
||||
}
|
||||
console.log(` → ${job.title} @ ${job.company || '?'} [${job.apply_type || 'unclassified'}]`);
|
||||
try {
|
||||
const result = await applyWellfound(wfBrowser.page, job, formFiller);
|
||||
await handleResult(job, result, results, settings);
|
||||
|
||||
@@ -19,6 +19,8 @@ import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
|
||||
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
|
||||
import { generateKeywords } from './lib/keywords.mjs';
|
||||
import { initProgress, isCompleted, markComplete } from './lib/search_progress.mjs';
|
||||
import { classifyBatch } from './lib/classifier.mjs';
|
||||
import { getJobsByStatus, updateJobStatus } from './lib/queue.mjs';
|
||||
|
||||
async function main() {
|
||||
const lock = acquireLock('searcher', resolve(__dir, 'data'));
|
||||
@@ -168,6 +170,37 @@ async function main() {
|
||||
}
|
||||
}
|
||||
|
||||
// --- Phase 2: Classify new jobs ---
|
||||
const unclassified = getJobsByStatus('new').filter(j => !j.apply_type);
|
||||
if (unclassified.length > 0) {
|
||||
console.log(`\n🔎 Phase 2: Classifying ${unclassified.length} jobs...`);
|
||||
let liBrowser2;
|
||||
try {
|
||||
liBrowser2 = await createBrowser(settings, 'linkedin');
|
||||
await liLogin(liBrowser2.page);
|
||||
let done = 0;
|
||||
const liJobs = unclassified.filter(j => j.platform === 'linkedin');
|
||||
await classifyBatch(liBrowser2.page, liJobs, {
|
||||
onClassified: (job) => {
|
||||
updateJobStatus(job.id, 'new', { apply_type: job.apply_type, apply_url: job.apply_url, classified_at: job.classified_at });
|
||||
done++;
|
||||
process.stdout.write(`\r Classified ${done}/${liJobs.length} — last: ${job.apply_type} (${job.title?.substring(0, 30)})`);
|
||||
}
|
||||
});
|
||||
console.log(`\r ✅ ${liJobs.length} LinkedIn jobs classified`);
|
||||
} catch (e) {
|
||||
console.error(` ❌ Classification error: ${e.message}`);
|
||||
} finally {
|
||||
await liBrowser2?.browser?.close().catch(() => {});
|
||||
}
|
||||
// Wellfound jobs default to easy_apply (Wellfound uses its own apply flow)
|
||||
const wfJobs = unclassified.filter(j => j.platform === 'wellfound');
|
||||
for (const job of wfJobs) {
|
||||
updateJobStatus(job.id, 'new', { apply_type: 'wellfound_apply', classified_at: Date.now() });
|
||||
}
|
||||
if (wfJobs.length > 0) console.log(` ✅ ${wfJobs.length} Wellfound jobs marked for apply`);
|
||||
}
|
||||
|
||||
// Summary
|
||||
const summary = formatSearchSummary(totalAdded, totalSeen - totalAdded, platformsRun);
|
||||
console.log(`\n${summary.replace(/\*/g, '')}`);
|
||||
|
||||
86
lib/classifier.mjs
Normal file
86
lib/classifier.mjs
Normal file
@@ -0,0 +1,86 @@
|
||||
/**
|
||||
* classifier.mjs — Detect apply type for each job
|
||||
* Visits each job page and classifies: easy_apply, greenhouse, lever, workday, ashby, etc.
|
||||
* Run by searcher as Phase 2 after collecting URLs
|
||||
*/
|
||||
import {
|
||||
LINKEDIN_BASE, NAVIGATION_TIMEOUT, PAGE_LOAD_WAIT, CLICK_WAIT,
|
||||
LINKEDIN_APPLY_BUTTON_SELECTOR
|
||||
} from './constants.mjs';
|
||||
|
||||
const EXTERNAL_ATS = [
|
||||
{ name: 'greenhouse', pattern: /greenhouse\.io/i },
|
||||
{ name: 'lever', pattern: /lever\.co/i },
|
||||
{ name: 'workday', pattern: /workday\.com|myworkdayjobs\.com/i },
|
||||
{ name: 'ashby', pattern: /ashbyhq\.com/i },
|
||||
{ name: 'jobvite', pattern: /jobvite\.com/i },
|
||||
{ name: 'smartrecruiters', pattern: /smartrecruiters\.com/i },
|
||||
{ name: 'icims', pattern: /icims\.com/i },
|
||||
{ name: 'taleo', pattern: /taleo\.net/i },
|
||||
{ name: 'bamboohr', pattern: /bamboohr\.com/i },
|
||||
{ name: 'rippling', pattern: /rippling\.com/i },
|
||||
{ name: 'workable', pattern: /workable\.com/i },
|
||||
{ name: 'breezyhr', pattern: /breezy\.hr/i },
|
||||
{ name: 'recruitee', pattern: /recruitee\.com/i },
|
||||
{ name: 'dover', pattern: /dover\.com/i },
|
||||
];
|
||||
|
||||
function detectAts(url) {
|
||||
if (!url) return null;
|
||||
for (const ats of EXTERNAL_ATS) {
|
||||
if (ats.pattern.test(url)) return ats.name;
|
||||
}
|
||||
return 'unknown_external';
|
||||
}
|
||||
|
||||
export async function classifyLinkedInJob(page, job) {
|
||||
try {
|
||||
await page.goto(job.url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
|
||||
await page.waitForTimeout(PAGE_LOAD_WAIT);
|
||||
|
||||
// Check for Easy Apply
|
||||
const eaBtn = await page.$(`${LINKEDIN_APPLY_BUTTON_SELECTOR}[aria-label*="Easy Apply"]`);
|
||||
if (eaBtn) return { apply_type: 'easy_apply', apply_url: job.url };
|
||||
|
||||
// Check for recruiter-only
|
||||
const interestedBtn = await page.$('button[aria-label*="interested"]');
|
||||
if (interestedBtn) return { apply_type: 'recruiter_only', apply_url: null };
|
||||
|
||||
// Check for external apply button and find ATS URL
|
||||
const externalBtn = await page.$(`${LINKEDIN_APPLY_BUTTON_SELECTOR}:not([aria-label*="Easy Apply"])`);
|
||||
if (externalBtn) {
|
||||
// Try to find the actual ATS link in the page
|
||||
const atsUrl = await page.evaluate(() => {
|
||||
const patterns = [
|
||||
'greenhouse', 'lever', 'workday', 'myworkday', 'ashby', 'jobvite',
|
||||
'smartrecruiters', 'icims', 'taleo', 'bamboohr', 'rippling', 'workable'
|
||||
];
|
||||
const links = Array.from(document.querySelectorAll('a[href]'));
|
||||
for (const a of links) {
|
||||
for (const p of patterns) {
|
||||
if (a.href.includes(p)) return a.href;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const platform = detectAts(atsUrl) || 'unknown_external';
|
||||
return { apply_type: platform, apply_url: atsUrl };
|
||||
}
|
||||
|
||||
return { apply_type: 'unknown', apply_url: null };
|
||||
} catch (e) {
|
||||
return { apply_type: 'error', apply_url: null, error: e.message };
|
||||
}
|
||||
}
|
||||
|
||||
export async function classifyBatch(page, jobs, { onClassified } = {}) {
|
||||
const results = [];
|
||||
for (const job of jobs) {
|
||||
const classification = await classifyLinkedInJob(page, job);
|
||||
const classified = { ...job, ...classification, classified_at: Date.now() };
|
||||
results.push(classified);
|
||||
if (onClassified) onClassified(classified);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
@@ -46,7 +46,8 @@ export function formatSearchSummary(added, skipped, platforms) {
|
||||
|
||||
export function formatApplySummary(results) {
|
||||
const { submitted, failed, needs_answer, total,
|
||||
skipped_recruiter, skipped_external, skipped_no_easy_apply, atsCounts } = results;
|
||||
skipped_recruiter, skipped_external, skipped_no_easy_apply,
|
||||
already_applied, atsCounts } = results;
|
||||
|
||||
const lines = [
|
||||
`✅ *Apply Run Complete* — ${total} jobs processed`,
|
||||
@@ -54,6 +55,7 @@ export function formatApplySummary(results) {
|
||||
`📬 Applied: ${submitted}`,
|
||||
`⏭️ Skipped (no Easy Apply): ${skipped_no_easy_apply}`,
|
||||
`🚫 Recruiter-only: ${skipped_recruiter}`,
|
||||
`🔁 Already applied: ${already_applied || 0}`,
|
||||
`❌ Failed: ${failed}`,
|
||||
`💬 Needs your answer: ${needs_answer}`,
|
||||
];
|
||||
|
||||
@@ -72,6 +72,11 @@ export function appendLog(entry) {
|
||||
saveLog(log);
|
||||
}
|
||||
|
||||
export function isAlreadyApplied(jobId) {
|
||||
const log = loadLog();
|
||||
return log.some(e => e.id === jobId && e.status === 'applied');
|
||||
}
|
||||
|
||||
export function getJobsByStatus(status) {
|
||||
const queue = loadQueue();
|
||||
if (Array.isArray(status)) return queue.filter(j => status.includes(j.status));
|
||||
|
||||
16
status.mjs
16
status.mjs
@@ -36,9 +36,13 @@ function buildStatus() {
|
||||
const byPlatform = {};
|
||||
const atsCounts = {};
|
||||
|
||||
const byApplyType = {};
|
||||
for (const job of queue) {
|
||||
byStatus[job.status] = (byStatus[job.status] || 0) + 1;
|
||||
byPlatform[job.platform] = (byPlatform[job.platform] || 0) + 1;
|
||||
if (job.status === 'new' && job.apply_type) {
|
||||
byApplyType[job.apply_type] = (byApplyType[job.apply_type] || 0) + 1;
|
||||
}
|
||||
if (job.status === 'skipped_external_unsupported' && job.ats_platform) {
|
||||
atsCounts[job.ats_platform] = (atsCounts[job.ats_platform] || 0) + 1;
|
||||
}
|
||||
@@ -80,6 +84,7 @@ function buildStatus() {
|
||||
by_platform: byPlatform,
|
||||
},
|
||||
ats_breakdown: atsCounts,
|
||||
apply_type_breakdown: byApplyType,
|
||||
last_applied: lastApplied ? {
|
||||
title: lastApplied.title,
|
||||
company: lastApplied.company,
|
||||
@@ -138,7 +143,18 @@ function formatReport(s) {
|
||||
``,
|
||||
`📋 *Queue — ${q.total} total jobs*`,
|
||||
` 🆕 Ready to apply: ${q.new}`,
|
||||
);
|
||||
|
||||
if (s.apply_type_breakdown && Object.keys(s.apply_type_breakdown).length > 0) {
|
||||
const sorted = Object.entries(s.apply_type_breakdown).sort((a, b) => b[1] - a[1]);
|
||||
for (const [type, count] of sorted) {
|
||||
lines.push(` • ${type}: ${count}`);
|
||||
}
|
||||
}
|
||||
|
||||
lines.push(
|
||||
` ✅ Applied: ${q.applied}`,
|
||||
` 🔁 Already applied: ${byStatus['already_applied'] || 0}`,
|
||||
` 💬 Needs your answer: ${q.needs_answer}`,
|
||||
` ❌ Failed: ${q.failed}`,
|
||||
` 🚫 Recruiter-only: ${q.skipped_recruiter}`,
|
||||
|
||||
Reference in New Issue
Block a user