Files
claw-apply/job_searcher.mjs

219 lines
8.6 KiB
JavaScript

#!/usr/bin/env node
/**
* job_searcher.mjs — claw-apply Job Searcher
* Searches LinkedIn + Wellfound and populates the jobs queue
* Run via cron or manually: node job_searcher.mjs
*/
import { dirname, resolve } from 'path';
import { fileURLToPath } from 'url';
const __dir = dirname(fileURLToPath(import.meta.url));
import { addJobs, loadQueue, loadConfig } from './lib/queue.mjs';
import { writeFileSync, readFileSync, existsSync } from 'fs';
import { acquireLock } from './lib/lock.mjs';
import { createBrowser } from './lib/browser.mjs';
import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs';
import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
import { generateKeywords } from './lib/keywords.mjs';
import { initProgress, isCompleted, markComplete } from './lib/search_progress.mjs';
import { classifyBatch } from './lib/classifier.mjs';
import { getJobsByStatus, updateJobStatus } from './lib/queue.mjs';
async function main() {
const lock = acquireLock('searcher', resolve(__dir, 'data'));
console.log('🔍 claw-apply: Job Searcher starting\n');
let totalAdded = 0, totalSeen = 0;
const platformsRun = [];
const startedAt = Date.now();
const writeLastRun = (finished = false) => {
writeFileSync(resolve(__dir, 'data/searcher_last_run.json'), JSON.stringify({
started_at: startedAt,
finished_at: finished ? Date.now() : null,
finished,
added: totalAdded,
seen: totalSeen,
skipped_dupes: totalSeen - totalAdded,
platforms: platformsRun,
}, null, 2));
};
lock.onShutdown(() => {
console.log(' Writing partial results to last-run file...');
writeLastRun(false);
});
// Load config
const settings = loadConfig(resolve(__dir, 'config/settings.json'));
const searchConfig = loadConfig(resolve(__dir, 'config/search_config.json'));
// First run detection: if queue is empty, use first_run_days lookback
const profile = loadConfig(resolve(__dir, 'config/profile.json'));
const anthropicKey = process.env.ANTHROPIC_API_KEY || settings.anthropic_api_key;
// Enhance keywords with AI if API key available
if (anthropicKey) {
console.log('🤖 Generating AI-enhanced search keywords...');
for (const search of searchConfig.searches) {
try {
const aiKeywords = await generateKeywords(search, profile, anthropicKey);
const merged = [...new Set([...search.keywords, ...aiKeywords])];
console.log(` [${search.name}] ${search.keywords.length}${merged.length} keywords`);
search.keywords = merged;
} catch (e) {
console.warn(` [${search.name}] AI keywords failed, using static: ${e.message}`);
}
}
console.log('');
}
// Determine lookback: check for an in-progress run first, then fall back to first-run/normal logic
const savedProgress = existsSync(resolve(__dir, 'data/search_progress.json'))
? JSON.parse(readFileSync(resolve(__dir, 'data/search_progress.json'), 'utf8'))
: null;
const isFirstRun = loadQueue().length === 0;
const lookbackDays = savedProgress?.lookback_days
|| (isFirstRun ? (searchConfig.first_run_days || DEFAULT_FIRST_RUN_DAYS) : (searchConfig.posted_within_days || 2));
if (savedProgress?.lookback_days) {
console.log(`🔁 Resuming ${lookbackDays}-day search run\n`);
} else if (isFirstRun) {
console.log(`📅 First run — looking back ${lookbackDays} days\n`);
}
// Init progress tracking — enables resume on restart
initProgress(resolve(__dir, 'data'), lookbackDays);
// Group searches by platform
const liSearches = searchConfig.searches.filter(s => s.platforms?.includes('linkedin'));
const wfSearches = searchConfig.searches.filter(s => s.platforms?.includes('wellfound'));
// --- LinkedIn ---
if (liSearches.length > 0) {
console.log('🔗 LinkedIn search...');
let liBrowser;
try {
liBrowser = await createBrowser(settings, 'linkedin');
const loggedIn = await liLogin(liBrowser.page);
if (!loggedIn) throw new Error('LinkedIn not logged in');
console.log(' ✅ Logged in');
for (const search of liSearches) {
if (isCompleted('linkedin', search.name)) {
console.log(` [${search.name}] ✓ already done, skipping`);
continue;
}
const effectiveSearch = { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } };
let queryFound = 0, queryAdded = 0;
await searchLinkedIn(liBrowser.page, effectiveSearch, {
onPage: (pageJobs) => {
const added = addJobs(pageJobs);
totalAdded += added;
totalSeen += pageJobs.length;
queryFound += pageJobs.length;
queryAdded += added;
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
}
});
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
markComplete('linkedin', search.name, { found: queryFound, added: queryAdded });
}
platformsRun.push('LinkedIn');
} catch (e) {
console.error(` ❌ LinkedIn error: ${e.message}`);
} finally {
await liBrowser?.browser?.close().catch(() => {});
}
}
// --- Wellfound ---
if (wfSearches.length > 0) {
console.log('\n🌐 Wellfound search...');
let wfBrowser;
try {
wfBrowser = await createBrowser(settings, 'wellfound');
const loggedIn = await wfLogin(wfBrowser.page);
if (!loggedIn) console.warn(' ⚠️ Wellfound login unconfirmed, proceeding');
else console.log(' ✅ Logged in');
for (const search of wfSearches) {
if (isCompleted('wellfound', search.name)) {
console.log(` [${search.name}] ✓ already done, skipping`);
continue;
}
const effectiveSearch = { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } };
let queryFound = 0, queryAdded = 0;
await searchWellfound(wfBrowser.page, effectiveSearch, {
onPage: (pageJobs) => {
const added = addJobs(pageJobs);
totalAdded += added;
totalSeen += pageJobs.length;
queryFound += pageJobs.length;
queryAdded += added;
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
}
});
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
markComplete('wellfound', search.name, { found: queryFound, added: queryAdded });
}
platformsRun.push('Wellfound');
} catch (e) {
console.error(` ❌ Wellfound error: ${e.message}`);
} finally {
await wfBrowser?.browser?.close().catch(() => {});
}
}
// --- Phase 2: Classify new jobs ---
const unclassified = getJobsByStatus('new').filter(j => !j.apply_type);
if (unclassified.length > 0) {
console.log(`\n🔎 Phase 2: Classifying ${unclassified.length} jobs...`);
let liBrowser2;
try {
liBrowser2 = await createBrowser(settings, 'linkedin');
await liLogin(liBrowser2.page);
let done = 0;
const liJobs = unclassified.filter(j => j.platform === 'linkedin');
await classifyBatch(liBrowser2.page, liJobs, {
onClassified: (job) => {
updateJobStatus(job.id, 'new', { apply_type: job.apply_type, apply_url: job.apply_url, classified_at: job.classified_at });
done++;
process.stdout.write(`\r Classified ${done}/${liJobs.length} — last: ${job.apply_type} (${job.title?.substring(0, 30)})`);
}
});
console.log(`\r${liJobs.length} LinkedIn jobs classified`);
} catch (e) {
console.error(` ❌ Classification error: ${e.message}`);
} finally {
await liBrowser2?.browser?.close().catch(() => {});
}
// Wellfound jobs default to easy_apply (Wellfound uses its own apply flow)
const wfJobs = unclassified.filter(j => j.platform === 'wellfound');
for (const job of wfJobs) {
updateJobStatus(job.id, 'new', { apply_type: 'wellfound_apply', classified_at: Date.now() });
}
if (wfJobs.length > 0) console.log(`${wfJobs.length} Wellfound jobs marked for apply`);
}
// Summary
const summary = formatSearchSummary(totalAdded, totalSeen - totalAdded, platformsRun);
console.log(`\n${summary.replace(/\*/g, '')}`);
if (totalAdded > 0) await sendTelegram(settings, summary);
writeLastRun(true);
console.log('\n✅ Search complete');
return { added: totalAdded, seen: totalSeen };
}
main().catch(e => {
console.error('Fatal:', e.message);
process.exit(1);
});