feat: flush jobs to queue per-page — no data loss on crash; live progress output

This commit is contained in:
2026-03-06 00:17:33 +00:00
parent 234820ad91
commit f9fa36b47c
4 changed files with 29 additions and 12 deletions

1
.gitignore vendored
View File

@@ -11,3 +11,4 @@ config/answers.json
config/search_config.json config/search_config.json
# Templates are committed instead (see config/*.example.json) # Templates are committed instead (see config/*.example.json)
data/*.lock

View File

@@ -72,11 +72,18 @@ async function main() {
const effectiveSearch = lookbackDays const effectiveSearch = lookbackDays
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } } ? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
: search; : search;
const jobs = await searchLinkedIn(liBrowser.page, effectiveSearch); let queryFound = 0, queryAdded = 0;
const added = addJobs(jobs); await searchLinkedIn(liBrowser.page, effectiveSearch, {
totalAdded += added; onPage: (pageJobs) => {
totalSeen += jobs.length; const added = addJobs(pageJobs);
console.log(` [${search.name}] ${jobs.length} found, ${added} new`); totalAdded += added;
totalSeen += pageJobs.length;
queryFound += pageJobs.length;
queryAdded += added;
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
}
});
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
} }
platformsRun.push('LinkedIn'); platformsRun.push('LinkedIn');
@@ -101,11 +108,18 @@ async function main() {
const effectiveSearch = lookbackDays const effectiveSearch = lookbackDays
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } } ? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
: search; : search;
const jobs = await searchWellfound(wfBrowser.page, effectiveSearch); let queryFound = 0, queryAdded = 0;
const added = addJobs(jobs); await searchWellfound(wfBrowser.page, effectiveSearch, {
totalAdded += added; onPage: (pageJobs) => {
totalSeen += jobs.length; const added = addJobs(pageJobs);
console.log(` [${search.name}] ${jobs.length} found, ${added} new`); totalAdded += added;
totalSeen += pageJobs.length;
queryFound += pageJobs.length;
queryAdded += added;
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
}
});
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
} }
platformsRun.push('Wellfound'); platformsRun.push('Wellfound');

View File

@@ -19,7 +19,7 @@ export async function verifyLogin(page) {
return page.url().includes('/feed'); return page.url().includes('/feed');
} }
export async function searchLinkedIn(page, search) { export async function searchLinkedIn(page, search, { onPage } = {}) {
const jobs = []; const jobs = [];
for (const keyword of search.keywords) { for (const keyword of search.keywords) {
@@ -69,6 +69,7 @@ export async function searchLinkedIn(page, search) {
}, { track: search.track, excludes: search.exclude_keywords || [] }); }, { track: search.track, excludes: search.exclude_keywords || [] });
jobs.push(...found); jobs.push(...found);
if (found.length > 0 && onPage) onPage(found);
// Click next page button // Click next page button
const nextBtn = await page.$('button[aria-label="View next page"]'); const nextBtn = await page.$('button[aria-label="View next page"]');

View File

@@ -18,7 +18,7 @@ export async function verifyLogin(page) {
return loggedIn; return loggedIn;
} }
export async function searchWellfound(page, search) { export async function searchWellfound(page, search, { onPage } = {}) {
const jobs = []; const jobs = [];
for (const keyword of search.keywords) { for (const keyword of search.keywords) {
@@ -77,6 +77,7 @@ export async function searchWellfound(page, search) {
}, { track: search.track, excludes: search.exclude_keywords || [], maxResults: SEARCH_RESULTS_MAX }); }, { track: search.track, excludes: search.exclude_keywords || [], maxResults: SEARCH_RESULTS_MAX });
jobs.push(...found); jobs.push(...found);
if (found.length > 0 && onPage) onPage(found);
} }
// Dedupe by URL // Dedupe by URL