feat: per-keyword resume — restart picks up from last completed keyword, not keyword 1

This commit is contained in:
2026-03-06 02:23:07 +00:00
parent ffb9eec4cb
commit 65d6d1e50c
3 changed files with 35 additions and 11 deletions

View File

@@ -22,7 +22,7 @@ import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
import { generateKeywords } from './lib/keywords.mjs';
import { initProgress, isCompleted, markComplete } from './lib/search_progress.mjs';
import { initProgress, isCompleted, markComplete, getKeywordStart, markKeywordComplete } from './lib/search_progress.mjs';
import { ensureLoggedIn } from './lib/session.mjs';
async function main() {
@@ -112,7 +112,9 @@ async function main() {
console.log(` [${search.name}] ✓ already done, skipping`);
continue;
}
const effectiveSearch = { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } };
const keywordStart = getKeywordStart('linkedin', search.name);
if (keywordStart > 0) console.log(` [${search.name}] resuming from keyword ${keywordStart + 1}/${search.keywords.length}`);
const effectiveSearch = { ...search, keywords: search.keywords.slice(keywordStart), filters: { ...search.filters, posted_within_days: lookbackDays } };
let queryFound = 0, queryAdded = 0;
await searchLinkedIn(liBrowser.page, effectiveSearch, {
onPage: (pageJobs) => {
@@ -122,6 +124,9 @@ async function main() {
queryFound += pageJobs.length;
queryAdded += added;
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
},
onKeyword: (ki) => {
markKeywordComplete('linkedin', search.name, keywordStart + ki);
}
});
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);

View File

@@ -16,7 +16,8 @@ export async function verifyLogin(page) {
return page.url().includes('/feed');
}
export async function searchLinkedIn(page, search, { onPage } = {}) {
export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
const callbacks = { onPage, onKeyword };
const jobs = [];
const seenIds = new Set();
@@ -124,7 +125,7 @@ export async function searchLinkedIn(page, search, { onPage } = {}) {
jobs.push(job);
}
if (pageJobs.length > 0 && onPage) onPage(pageJobs);
if (pageJobs.length > 0 && callbacks.onPage) callbacks.onPage(pageJobs);
const nextBtn = await page.$('button[aria-label="View next page"]');
if (!nextBtn) break;
@@ -132,6 +133,8 @@ export async function searchLinkedIn(page, search, { onPage } = {}) {
await page.waitForTimeout(PAGE_LOAD_WAIT);
pageNum++;
}
// Mark keyword complete after all its pages are done
callbacks.onKeyword?.(ki);
}
return jobs;

View File

@@ -1,6 +1,6 @@
/**
* search_progress.mjs — Track which searches have completed
* Enables resume on restart without re-running finished searches
* Enables resume on restart — skips completed tracks and completed keywords within a track
*/
import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'fs';
@@ -12,24 +12,22 @@ export function initProgress(dataDir, lookbackDays) {
if (existsSync(progressPath)) {
const saved = JSON.parse(readFileSync(progressPath, 'utf8'));
// Only resume if same lookback window
if (saved.lookback_days === lookbackDays) {
progress = saved;
const done = progress.completed.length;
const done = progress.completed?.length ?? 0;
if (done > 0) {
console.log(`🔁 Resuming — skipping already-completed: ${progress.completed.join(', ')}\n`);
console.log(`🔁 Resuming — completed tracks: ${progress.completed.join(', ')}\n`);
}
return progress;
}
console.log(`🆕 New lookback window (${lookbackDays}d), starting fresh\n`);
}
// Fresh start
progress = {
lookback_days: lookbackDays,
started_at: Date.now(),
completed: [],
pending: [],
keyword_progress: {}, // key: "platform:track" → last completed keyword index (0-based)
};
save();
return progress;
@@ -40,11 +38,29 @@ export function isCompleted(platform, track) {
return progress.completed.includes(`${platform}:${track}`);
}
/** Returns the index of the first keyword to run (skips already-completed ones) */
export function getKeywordStart(platform, track) {
if (!progress) return 0;
const key = `${platform}:${track}`;
const last = progress.keyword_progress?.[key] ?? -1;
return last + 1; // resume from next keyword after last completed
}
/** Call after each keyword completes */
export function markKeywordComplete(platform, track, keywordIndex) {
if (!progress) return;
const key = `${platform}:${track}`;
if (!progress.keyword_progress) progress.keyword_progress = {};
progress.keyword_progress[key] = keywordIndex;
save();
}
export function markComplete(platform, track, stats) {
if (!progress) return;
const key = `${platform}:${track}`;
progress.pending = progress.pending.filter(k => k !== key);
if (!progress.completed.includes(key)) progress.completed.push(key);
// Clean up per-keyword progress for completed track
if (progress.keyword_progress) delete progress.keyword_progress[key];
progress[`stats:${key}`] = { ...stats, completed_at: Date.now() };
save();
}