feat: flush jobs to queue per-page — no data loss on crash; live progress output
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,3 +11,4 @@ config/answers.json
|
||||
config/search_config.json
|
||||
|
||||
# Templates are committed instead (see config/*.example.json)
|
||||
data/*.lock
|
||||
|
||||
@@ -72,11 +72,18 @@ async function main() {
|
||||
const effectiveSearch = lookbackDays
|
||||
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
||||
: search;
|
||||
const jobs = await searchLinkedIn(liBrowser.page, effectiveSearch);
|
||||
const added = addJobs(jobs);
|
||||
let queryFound = 0, queryAdded = 0;
|
||||
await searchLinkedIn(liBrowser.page, effectiveSearch, {
|
||||
onPage: (pageJobs) => {
|
||||
const added = addJobs(pageJobs);
|
||||
totalAdded += added;
|
||||
totalSeen += jobs.length;
|
||||
console.log(` [${search.name}] ${jobs.length} found, ${added} new`);
|
||||
totalSeen += pageJobs.length;
|
||||
queryFound += pageJobs.length;
|
||||
queryAdded += added;
|
||||
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
|
||||
}
|
||||
});
|
||||
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||
}
|
||||
|
||||
platformsRun.push('LinkedIn');
|
||||
@@ -101,11 +108,18 @@ async function main() {
|
||||
const effectiveSearch = lookbackDays
|
||||
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
||||
: search;
|
||||
const jobs = await searchWellfound(wfBrowser.page, effectiveSearch);
|
||||
const added = addJobs(jobs);
|
||||
let queryFound = 0, queryAdded = 0;
|
||||
await searchWellfound(wfBrowser.page, effectiveSearch, {
|
||||
onPage: (pageJobs) => {
|
||||
const added = addJobs(pageJobs);
|
||||
totalAdded += added;
|
||||
totalSeen += jobs.length;
|
||||
console.log(` [${search.name}] ${jobs.length} found, ${added} new`);
|
||||
totalSeen += pageJobs.length;
|
||||
queryFound += pageJobs.length;
|
||||
queryAdded += added;
|
||||
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
|
||||
}
|
||||
});
|
||||
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||
}
|
||||
|
||||
platformsRun.push('Wellfound');
|
||||
|
||||
@@ -19,7 +19,7 @@ export async function verifyLogin(page) {
|
||||
return page.url().includes('/feed');
|
||||
}
|
||||
|
||||
export async function searchLinkedIn(page, search) {
|
||||
export async function searchLinkedIn(page, search, { onPage } = {}) {
|
||||
const jobs = [];
|
||||
|
||||
for (const keyword of search.keywords) {
|
||||
@@ -69,6 +69,7 @@ export async function searchLinkedIn(page, search) {
|
||||
}, { track: search.track, excludes: search.exclude_keywords || [] });
|
||||
|
||||
jobs.push(...found);
|
||||
if (found.length > 0 && onPage) onPage(found);
|
||||
|
||||
// Click next page button
|
||||
const nextBtn = await page.$('button[aria-label="View next page"]');
|
||||
|
||||
@@ -18,7 +18,7 @@ export async function verifyLogin(page) {
|
||||
return loggedIn;
|
||||
}
|
||||
|
||||
export async function searchWellfound(page, search) {
|
||||
export async function searchWellfound(page, search, { onPage } = {}) {
|
||||
const jobs = [];
|
||||
|
||||
for (const keyword of search.keywords) {
|
||||
@@ -77,6 +77,7 @@ export async function searchWellfound(page, search) {
|
||||
}, { track: search.track, excludes: search.exclude_keywords || [], maxResults: SEARCH_RESULTS_MAX });
|
||||
|
||||
jobs.push(...found);
|
||||
if (found.length > 0 && onPage) onPage(found);
|
||||
}
|
||||
|
||||
// Dedupe by URL
|
||||
|
||||
Reference in New Issue
Block a user