feat: flush jobs to queue per-page — no data loss on crash; live progress output
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,3 +11,4 @@ config/answers.json
|
|||||||
config/search_config.json
|
config/search_config.json
|
||||||
|
|
||||||
# Templates are committed instead (see config/*.example.json)
|
# Templates are committed instead (see config/*.example.json)
|
||||||
|
data/*.lock
|
||||||
|
|||||||
@@ -72,11 +72,18 @@ async function main() {
|
|||||||
const effectiveSearch = lookbackDays
|
const effectiveSearch = lookbackDays
|
||||||
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
||||||
: search;
|
: search;
|
||||||
const jobs = await searchLinkedIn(liBrowser.page, effectiveSearch);
|
let queryFound = 0, queryAdded = 0;
|
||||||
const added = addJobs(jobs);
|
await searchLinkedIn(liBrowser.page, effectiveSearch, {
|
||||||
|
onPage: (pageJobs) => {
|
||||||
|
const added = addJobs(pageJobs);
|
||||||
totalAdded += added;
|
totalAdded += added;
|
||||||
totalSeen += jobs.length;
|
totalSeen += pageJobs.length;
|
||||||
console.log(` [${search.name}] ${jobs.length} found, ${added} new`);
|
queryFound += pageJobs.length;
|
||||||
|
queryAdded += added;
|
||||||
|
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||||
}
|
}
|
||||||
|
|
||||||
platformsRun.push('LinkedIn');
|
platformsRun.push('LinkedIn');
|
||||||
@@ -101,11 +108,18 @@ async function main() {
|
|||||||
const effectiveSearch = lookbackDays
|
const effectiveSearch = lookbackDays
|
||||||
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
||||||
: search;
|
: search;
|
||||||
const jobs = await searchWellfound(wfBrowser.page, effectiveSearch);
|
let queryFound = 0, queryAdded = 0;
|
||||||
const added = addJobs(jobs);
|
await searchWellfound(wfBrowser.page, effectiveSearch, {
|
||||||
|
onPage: (pageJobs) => {
|
||||||
|
const added = addJobs(pageJobs);
|
||||||
totalAdded += added;
|
totalAdded += added;
|
||||||
totalSeen += jobs.length;
|
totalSeen += pageJobs.length;
|
||||||
console.log(` [${search.name}] ${jobs.length} found, ${added} new`);
|
queryFound += pageJobs.length;
|
||||||
|
queryAdded += added;
|
||||||
|
process.stdout.write(`\r [${search.name}] ${queryFound} found, ${queryAdded} new...`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
console.log(`\r [${search.name}] ${queryFound} found, ${queryAdded} new`);
|
||||||
}
|
}
|
||||||
|
|
||||||
platformsRun.push('Wellfound');
|
platformsRun.push('Wellfound');
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ export async function verifyLogin(page) {
|
|||||||
return page.url().includes('/feed');
|
return page.url().includes('/feed');
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function searchLinkedIn(page, search) {
|
export async function searchLinkedIn(page, search, { onPage } = {}) {
|
||||||
const jobs = [];
|
const jobs = [];
|
||||||
|
|
||||||
for (const keyword of search.keywords) {
|
for (const keyword of search.keywords) {
|
||||||
@@ -69,6 +69,7 @@ export async function searchLinkedIn(page, search) {
|
|||||||
}, { track: search.track, excludes: search.exclude_keywords || [] });
|
}, { track: search.track, excludes: search.exclude_keywords || [] });
|
||||||
|
|
||||||
jobs.push(...found);
|
jobs.push(...found);
|
||||||
|
if (found.length > 0 && onPage) onPage(found);
|
||||||
|
|
||||||
// Click next page button
|
// Click next page button
|
||||||
const nextBtn = await page.$('button[aria-label="View next page"]');
|
const nextBtn = await page.$('button[aria-label="View next page"]');
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ export async function verifyLogin(page) {
|
|||||||
return loggedIn;
|
return loggedIn;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function searchWellfound(page, search) {
|
export async function searchWellfound(page, search, { onPage } = {}) {
|
||||||
const jobs = [];
|
const jobs = [];
|
||||||
|
|
||||||
for (const keyword of search.keywords) {
|
for (const keyword of search.keywords) {
|
||||||
@@ -77,6 +77,7 @@ export async function searchWellfound(page, search) {
|
|||||||
}, { track: search.track, excludes: search.exclude_keywords || [], maxResults: SEARCH_RESULTS_MAX });
|
}, { track: search.track, excludes: search.exclude_keywords || [], maxResults: SEARCH_RESULTS_MAX });
|
||||||
|
|
||||||
jobs.push(...found);
|
jobs.push(...found);
|
||||||
|
if (found.length > 0 && onPage) onPage(found);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dedupe by URL
|
// Dedupe by URL
|
||||||
|
|||||||
Reference in New Issue
Block a user