fix: location scraper — use .tvm__text selector, also capture work_type (Remote/Hybrid/On-site)
This commit is contained in:
@@ -77,13 +77,16 @@ export async function searchLinkedIn(page, search, { onPage, onKeyword } = {}) {
|
||||
const title = document.querySelector('.job-details-jobs-unified-top-card__job-title, h1[class*="title"]')?.textContent?.trim()
|
||||
|| document.querySelector('.jobs-unified-top-card__job-title')?.textContent?.trim() || '';
|
||||
const company = document.querySelector('.job-details-jobs-unified-top-card__company-name a, .jobs-unified-top-card__company-name a')?.textContent?.trim() || '';
|
||||
const location = document.querySelector('.job-details-jobs-unified-top-card__bullet, .jobs-unified-top-card__bullet')?.textContent?.trim() || '';
|
||||
// .tvm__text spans contain: location, "·", time ago, "·", applicants, work type, etc.
|
||||
const tvmTexts = Array.from(document.querySelectorAll('.tvm__text')).map(e => e.textContent.trim()).filter(s => s && s !== '·');
|
||||
const location = tvmTexts[0] || ''; // first non-separator is location
|
||||
const workType = tvmTexts.find(t => ['Remote', 'Hybrid', 'On-site'].includes(t)) || '';
|
||||
|
||||
const tl = title.toLowerCase(), cl = company.toLowerCase();
|
||||
for (const ex of excludes) {
|
||||
if (tl.includes(ex.toLowerCase()) || cl.includes(ex.toLowerCase())) return null;
|
||||
}
|
||||
return { title, company, location };
|
||||
return { title, company, location, work_type: workType };
|
||||
}, { id: jobId, track: search.track, excludes: search.exclude_keywords || [] });
|
||||
|
||||
if (!meta) { seenIds.add(jobId); continue; } // excluded
|
||||
|
||||
Reference in New Issue
Block a user