From 65d6d1e50cd636ed0c11d23458cce56c52e42923 Mon Sep 17 00:00:00 2001 From: Claw Date: Fri, 6 Mar 2026 02:23:07 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20per-keyword=20resume=20=E2=80=94=20rest?= =?UTF-8?q?art=20picks=20up=20from=20last=20completed=20keyword,=20not=20k?= =?UTF-8?q?eyword=201?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- job_searcher.mjs | 9 +++++++-- lib/linkedin.mjs | 7 +++++-- lib/search_progress.mjs | 30 +++++++++++++++++++++++------- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/job_searcher.mjs b/job_searcher.mjs index 1661d3a..e28c44c 100644 --- a/job_searcher.mjs +++ b/job_searcher.mjs @@ -22,7 +22,7 @@ import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs'; import { sendTelegram, formatSearchSummary } from './lib/notify.mjs'; import { DEFAULT_FIRST_RUN_DAYS } from './lib/constants.mjs'; import { generateKeywords } from './lib/keywords.mjs'; -import { initProgress, isCompleted, markComplete } from './lib/search_progress.mjs'; +import { initProgress, isCompleted, markComplete, getKeywordStart, markKeywordComplete } from './lib/search_progress.mjs'; import { ensureLoggedIn } from './lib/session.mjs'; async function main() { @@ -112,7 +112,9 @@ async function main() { console.log(` [${search.name}] ✓ already done, skipping`); continue; } - const effectiveSearch = { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }; + const keywordStart = getKeywordStart('linkedin', search.name); + if (keywordStart > 0) console.log(` [${search.name}] resuming from keyword ${keywordStart + 1}/${search.keywords.length}`); + const effectiveSearch = { ...search, keywords: search.keywords.slice(keywordStart), filters: { ...search.filters, posted_within_days: lookbackDays } }; let queryFound = 0, queryAdded = 0; await searchLinkedIn(liBrowser.page, effectiveSearch, { onPage: (pageJobs) => { @@ -122,6 +124,9 @@ async function main() { queryFound += pageJobs.length; queryAdded += added; process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`); + }, + onKeyword: (ki) => { + markKeywordComplete('linkedin', search.name, keywordStart + ki); } }); console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`); diff --git a/lib/linkedin.mjs b/lib/linkedin.mjs index d9f2f31..3cfb9df 100644 --- a/lib/linkedin.mjs +++ b/lib/linkedin.mjs @@ -16,7 +16,8 @@ export async function verifyLogin(page) { return page.url().includes('/feed'); } -export async function searchLinkedIn(page, search, { onPage } = {}) { +export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) { + const callbacks = { onPage, onKeyword }; const jobs = []; const seenIds = new Set(); @@ -124,7 +125,7 @@ export async function searchLinkedIn(page, search, { onPage } = {}) { jobs.push(job); } - if (pageJobs.length > 0 && onPage) onPage(pageJobs); + if (pageJobs.length > 0 && callbacks.onPage) callbacks.onPage(pageJobs); const nextBtn = await page.$('button[aria-label="View next page"]'); if (!nextBtn) break; @@ -132,6 +133,8 @@ export async function searchLinkedIn(page, search, { onPage } = {}) { await page.waitForTimeout(PAGE_LOAD_WAIT); pageNum++; } + // Mark keyword complete after all its pages are done + callbacks.onKeyword?.(ki); } return jobs; diff --git a/lib/search_progress.mjs b/lib/search_progress.mjs index a414187..09c58a9 100644 --- a/lib/search_progress.mjs +++ b/lib/search_progress.mjs @@ -1,6 +1,6 @@ /** * search_progress.mjs — Track which searches have completed - * Enables resume on restart without re-running finished searches + * Enables resume on restart — skips completed tracks and completed keywords within a track */ import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'fs'; @@ -12,24 +12,22 @@ export function initProgress(dataDir, lookbackDays) { if (existsSync(progressPath)) { const saved = JSON.parse(readFileSync(progressPath, 'utf8')); - // Only resume if same lookback window if (saved.lookback_days === lookbackDays) { progress = saved; - const done = progress.completed.length; + const done = progress.completed?.length ?? 0; if (done > 0) { - console.log(`🔁 Resuming — skipping already-completed: ${progress.completed.join(', ')}\n`); + console.log(`🔁 Resuming — completed tracks: ${progress.completed.join(', ')}\n`); } return progress; } console.log(`🆕 New lookback window (${lookbackDays}d), starting fresh\n`); } - // Fresh start progress = { lookback_days: lookbackDays, started_at: Date.now(), completed: [], - pending: [], + keyword_progress: {}, // key: "platform:track" → last completed keyword index (0-based) }; save(); return progress; @@ -40,11 +38,29 @@ export function isCompleted(platform, track) { return progress.completed.includes(`${platform}:${track}`); } +/** Returns the index of the first keyword to run (skips already-completed ones) */ +export function getKeywordStart(platform, track) { + if (!progress) return 0; + const key = `${platform}:${track}`; + const last = progress.keyword_progress?.[key] ?? -1; + return last + 1; // resume from next keyword after last completed +} + +/** Call after each keyword completes */ +export function markKeywordComplete(platform, track, keywordIndex) { + if (!progress) return; + const key = `${platform}:${track}`; + if (!progress.keyword_progress) progress.keyword_progress = {}; + progress.keyword_progress[key] = keywordIndex; + save(); +} + export function markComplete(platform, track, stats) { if (!progress) return; const key = `${platform}:${track}`; - progress.pending = progress.pending.filter(k => k !== key); if (!progress.completed.includes(key)) progress.completed.push(key); + // Clean up per-keyword progress for completed track + if (progress.keyword_progress) delete progress.keyword_progress[key]; progress[`stats:${key}`] = { ...stats, completed_at: Date.now() }; save(); }