Make searcher bulletproof for 12-16hr runs
- Browser crash recovery: per-keyword error handling, auto-recreate browser and re-login if page dies mid-search - Platform-level retry: if browser creation or login fails entirely, retry up to 3 times with escalating waits (5/10/15 min) - Progress saved after each search track (not just at end) - Unhandled rejection handler to prevent silent process death - Fixed async callback in classifyExternalJobs - Added ensureLoggedIn to session.mjs for searcher flow Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -162,10 +162,13 @@ async function main() {
|
||||
const liSearches = searchConfig.searches.filter(s => s.platforms?.includes('linkedin'));
|
||||
const wfSearches = searchConfig.searches.filter(s => s.platforms?.includes('wellfound'));
|
||||
|
||||
const MAX_PLATFORM_RETRIES = 3;
|
||||
|
||||
// --- LinkedIn ---
|
||||
if (liSearches.length > 0) {
|
||||
console.log('🔗 LinkedIn search...');
|
||||
let liBrowser;
|
||||
for (let attempt = 1; attempt <= MAX_PLATFORM_RETRIES; attempt++) {
|
||||
console.log(`🔗 LinkedIn search...${attempt > 1 ? ` (attempt ${attempt}/${MAX_PLATFORM_RETRIES})` : ''}`);
|
||||
try {
|
||||
console.log(' Creating browser...');
|
||||
liBrowser = await createBrowser(settings, 'linkedin');
|
||||
@@ -183,6 +186,7 @@ async function main() {
|
||||
if (keywordStart > 0) console.log(` [${search.name}] resuming from keyword ${keywordStart + 1}/${search.keywords.length}`);
|
||||
const effectiveSearch = { ...search, keywords: search.keywords.slice(keywordStart), keywordOffset: keywordStart, filters: { ...search.filters, posted_within_days: lookbackDays } };
|
||||
let queryFound = 0, queryAdded = 0;
|
||||
try {
|
||||
await searchLinkedIn(liBrowser.page, effectiveSearch, {
|
||||
onPage: async (pageJobs) => {
|
||||
const added = await addJobs(pageJobs);
|
||||
@@ -196,10 +200,25 @@ async function main() {
|
||||
markKeywordComplete('linkedin', search.name, keywordStart + ki);
|
||||
}
|
||||
});
|
||||
} catch (searchErr) {
|
||||
console.error(`\n ⚠️ [${search.name}] search error: ${searchErr.message}`);
|
||||
// Check if browser is still alive
|
||||
const alive = await liBrowser.page.evaluate(() => true).catch(() => false);
|
||||
if (!alive) {
|
||||
console.log(' 🔄 Browser died — recreating...');
|
||||
await liBrowser.browser?.close().catch(() => {});
|
||||
liBrowser = await createBrowser(settings, 'linkedin');
|
||||
const relogged = await ensureLoggedIn(liBrowser.page, liLogin, 'linkedin', settings.kernel_api_key || process.env.KERNEL_API_KEY);
|
||||
if (!relogged) { console.error(' ❌ Could not re-login after browser crash'); break; }
|
||||
console.log(' ✅ Browser recovered');
|
||||
}
|
||||
}
|
||||
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||
markComplete('linkedin', search.name, { found: queryFound, added: queryAdded });
|
||||
const tc = trackCounts[search.name] || (trackCounts[search.name] = { found: 0, added: 0 });
|
||||
tc.found += queryFound; tc.added += queryAdded;
|
||||
// Save progress after each search track
|
||||
writeLastRun(false);
|
||||
}
|
||||
|
||||
platformsRun.push('LinkedIn');
|
||||
@@ -208,23 +227,36 @@ async function main() {
|
||||
const unclassified = getJobsByStatus('new').filter(j => j.apply_type === 'unknown_external' && !j.apply_url);
|
||||
if (unclassified.length > 0) {
|
||||
console.log(`\n🔍 Classifying ${unclassified.length} external jobs...`);
|
||||
const { classified, remaining } = await classifyExternalJobs(liBrowser.page, unclassified, (job, applyType, applyUrl) => {
|
||||
try {
|
||||
const { classified, remaining } = await classifyExternalJobs(liBrowser.page, unclassified, async (job, applyType, applyUrl) => {
|
||||
await updateJobStatus(job.id, 'new', { apply_type: applyType, apply_url: applyUrl });
|
||||
});
|
||||
console.log(` ✅ Classified ${classified}, ${remaining} still unknown`);
|
||||
} catch (classErr) {
|
||||
console.error(` ⚠️ Classification error: ${classErr.message}`);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(` ❌ LinkedIn error: ${e.message}`);
|
||||
if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`);
|
||||
if (attempt < MAX_PLATFORM_RETRIES) {
|
||||
const waitMin = attempt * 5;
|
||||
console.log(` ⏳ Retrying in ${waitMin} minutes...`);
|
||||
await new Promise(r => setTimeout(r, waitMin * 60 * 1000));
|
||||
continue;
|
||||
}
|
||||
} finally {
|
||||
await liBrowser?.browser?.close().catch(() => {});
|
||||
}
|
||||
break; // success or max retries — exit retry loop
|
||||
}
|
||||
}
|
||||
|
||||
// --- Wellfound ---
|
||||
if (wfSearches.length > 0) {
|
||||
console.log('\n🌐 Wellfound search...');
|
||||
let wfBrowser;
|
||||
for (let attempt = 1; attempt <= MAX_PLATFORM_RETRIES; attempt++) {
|
||||
console.log(`\n🌐 Wellfound search...${attempt > 1 ? ` (attempt ${attempt}/${MAX_PLATFORM_RETRIES})` : ''}`);
|
||||
try {
|
||||
console.log(' Creating browser...');
|
||||
wfBrowser = await createBrowser(settings, 'wellfound');
|
||||
@@ -240,6 +272,7 @@ async function main() {
|
||||
}
|
||||
const effectiveSearch = { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } };
|
||||
let queryFound = 0, queryAdded = 0;
|
||||
try {
|
||||
await searchWellfound(wfBrowser.page, effectiveSearch, {
|
||||
onPage: async (pageJobs) => {
|
||||
const added = await addJobs(pageJobs);
|
||||
@@ -250,19 +283,40 @@ async function main() {
|
||||
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
|
||||
}
|
||||
});
|
||||
} catch (searchErr) {
|
||||
console.error(`\n ⚠️ [${search.name}] search error: ${searchErr.message}`);
|
||||
const alive = await wfBrowser.page.evaluate(() => true).catch(() => false);
|
||||
if (!alive) {
|
||||
console.log(' 🔄 Browser died — recreating...');
|
||||
await wfBrowser.browser?.close().catch(() => {});
|
||||
wfBrowser = await createBrowser(settings, 'wellfound');
|
||||
const relogged = await ensureLoggedIn(wfBrowser.page, wfLogin, 'wellfound', settings.kernel_api_key || process.env.KERNEL_API_KEY);
|
||||
if (!relogged) { console.warn(' ⚠️ Could not re-login after browser crash'); break; }
|
||||
console.log(' ✅ Browser recovered');
|
||||
}
|
||||
}
|
||||
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||
markComplete('wellfound', search.name, { found: queryFound, added: queryAdded });
|
||||
const tc = trackCounts[search.name] || (trackCounts[search.name] = { found: 0, added: 0 });
|
||||
tc.found += queryFound; tc.added += queryAdded;
|
||||
writeLastRun(false);
|
||||
}
|
||||
|
||||
platformsRun.push('Wellfound');
|
||||
} catch (e) {
|
||||
console.error(` ❌ Wellfound error: ${e.message}`);
|
||||
if (e.stack) console.error(` Stack: ${e.stack.split('\n').slice(1, 3).join(' | ').trim()}`);
|
||||
if (attempt < MAX_PLATFORM_RETRIES) {
|
||||
const waitMin = attempt * 5;
|
||||
console.log(` ⏳ Retrying in ${waitMin} minutes...`);
|
||||
await new Promise(r => setTimeout(r, waitMin * 60 * 1000));
|
||||
continue;
|
||||
}
|
||||
} finally {
|
||||
await wfBrowser?.browser?.close().catch(() => {});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
@@ -282,6 +336,12 @@ async function main() {
|
||||
return { added: totalAdded, seen: totalSeen };
|
||||
}
|
||||
|
||||
// Catch unhandled rejections so the process doesn't silently die during a 12hr run
|
||||
process.on('unhandledRejection', (err) => {
|
||||
console.error('⚠️ Unhandled rejection:', err?.message || err);
|
||||
if (err?.stack) console.error(err.stack);
|
||||
});
|
||||
|
||||
main().then(() => {
|
||||
process.exit(0);
|
||||
}).catch(e => {
|
||||
|
||||
Reference in New Issue
Block a user