feat: per-keyword resume — restart picks up from last completed keyword, not keyword 1
This commit is contained in:
@@ -22,7 +22,7 @@ import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
|
||||
import { sendTelegram, formatSearchSummary } from './lib/notify.mjs';
|
||||
import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs';
|
||||
import { generateKeywords } from './lib/keywords.mjs';
|
||||
import { initProgress, isCompleted, markComplete } from './lib/search_progress.mjs';
|
||||
import { initProgress, isCompleted, markComplete, getKeywordStart, markKeywordComplete } from './lib/search_progress.mjs';
|
||||
import { ensureLoggedIn } from './lib/session.mjs';
|
||||
|
||||
async function main() {
|
||||
@@ -112,7 +112,9 @@ async function main() {
|
||||
console.log(` [${search.name}] ✓ already done, skipping`);
|
||||
continue;
|
||||
}
|
||||
const effectiveSearch = { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } };
|
||||
const keywordStart = getKeywordStart('linkedin', search.name);
|
||||
if (keywordStart > 0) console.log(` [${search.name}] resuming from keyword ${keywordStart + 1}/${search.keywords.length}`);
|
||||
const effectiveSearch = { ...search, keywords: search.keywords.slice(keywordStart), filters: { ...search.filters, posted_within_days: lookbackDays } };
|
||||
let queryFound = 0, queryAdded = 0;
|
||||
await searchLinkedIn(liBrowser.page, effectiveSearch, {
|
||||
onPage: (pageJobs) => {
|
||||
@@ -122,6 +124,9 @@ async function main() {
|
||||
queryFound += pageJobs.length;
|
||||
queryAdded += added;
|
||||
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
|
||||
},
|
||||
onKeyword: (ki) => {
|
||||
markKeywordComplete('linkedin', search.name, keywordStart + ki);
|
||||
}
|
||||
});
|
||||
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||
|
||||
@@ -16,7 +16,8 @@ export async function verifyLogin(page) {
|
||||
return page.url().includes('/feed');
|
||||
}
|
||||
|
||||
export async function searchLinkedIn(page, search, { onPage } = {}) {
|
||||
export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
|
||||
const callbacks = { onPage, onKeyword };
|
||||
const jobs = [];
|
||||
const seenIds = new Set();
|
||||
|
||||
@@ -124,7 +125,7 @@ export async function searchLinkedIn(page, search, { onPage } = {}) {
|
||||
jobs.push(job);
|
||||
}
|
||||
|
||||
if (pageJobs.length > 0 && onPage) onPage(pageJobs);
|
||||
if (pageJobs.length > 0 && callbacks.onPage) callbacks.onPage(pageJobs);
|
||||
|
||||
const nextBtn = await page.$('button[aria-label="View next page"]');
|
||||
if (!nextBtn) break;
|
||||
@@ -132,6 +133,8 @@ export async function searchLinkedIn(page, search, { onPage } = {}) {
|
||||
await page.waitForTimeout(PAGE_LOAD_WAIT);
|
||||
pageNum++;
|
||||
}
|
||||
// Mark keyword complete after all its pages are done
|
||||
callbacks.onKeyword?.(ki);
|
||||
}
|
||||
|
||||
return jobs;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* search_progress.mjs — Track which searches have completed
|
||||
* Enables resume on restart without re-running finished searches
|
||||
* Enables resume on restart — skips completed tracks and completed keywords within a track
|
||||
*/
|
||||
import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'fs';
|
||||
|
||||
@@ -12,24 +12,22 @@ export function initProgress(dataDir, lookbackDays) {
|
||||
|
||||
if (existsSync(progressPath)) {
|
||||
const saved = JSON.parse(readFileSync(progressPath, 'utf8'));
|
||||
// Only resume if same lookback window
|
||||
if (saved.lookback_days === lookbackDays) {
|
||||
progress = saved;
|
||||
const done = progress.completed.length;
|
||||
const done = progress.completed?.length ?? 0;
|
||||
if (done > 0) {
|
||||
console.log(`🔁 Resuming — skipping already-completed: ${progress.completed.join(', ')}\n`);
|
||||
console.log(`🔁 Resuming — completed tracks: ${progress.completed.join(', ')}\n`);
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
console.log(`🆕 New lookback window (${lookbackDays}d), starting fresh\n`);
|
||||
}
|
||||
|
||||
// Fresh start
|
||||
progress = {
|
||||
lookback_days: lookbackDays,
|
||||
started_at: Date.now(),
|
||||
completed: [],
|
||||
pending: [],
|
||||
keyword_progress: {}, // key: "platform:track" → last completed keyword index (0-based)
|
||||
};
|
||||
save();
|
||||
return progress;
|
||||
@@ -40,11 +38,29 @@ export function isCompleted(platform, track) {
|
||||
return progress.completed.includes(`${platform}:${track}`);
|
||||
}
|
||||
|
||||
/** Returns the index of the first keyword to run (skips already-completed ones) */
|
||||
export function getKeywordStart(platform, track) {
|
||||
if (!progress) return 0;
|
||||
const key = `${platform}:${track}`;
|
||||
const last = progress.keyword_progress?.[key] ?? -1;
|
||||
return last + 1; // resume from next keyword after last completed
|
||||
}
|
||||
|
||||
/** Call after each keyword completes */
|
||||
export function markKeywordComplete(platform, track, keywordIndex) {
|
||||
if (!progress) return;
|
||||
const key = `${platform}:${track}`;
|
||||
if (!progress.keyword_progress) progress.keyword_progress = {};
|
||||
progress.keyword_progress[key] = keywordIndex;
|
||||
save();
|
||||
}
|
||||
|
||||
export function markComplete(platform, track, stats) {
|
||||
if (!progress) return;
|
||||
const key = `${platform}:${track}`;
|
||||
progress.pending = progress.pending.filter(k => k !== key);
|
||||
if (!progress.completed.includes(key)) progress.completed.push(key);
|
||||
// Clean up per-keyword progress for completed track
|
||||
if (progress.keyword_progress) delete progress.keyword_progress[key];
|
||||
progress[`stats:${key}`] = { ...stats, completed_at: Date.now() };
|
||||
save();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user