Fix classifyExternalJobs to extract URL from redirect link
External Apply buttons are <a> tags with LinkedIn redirect URLs, not <button> elements. Extract the real URL from the redirect's query parameter instead of clicking and waiting for a new tab. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -164,24 +164,26 @@ export async function classifyExternalJobs(page, jobs, onClassified) {
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
|
||||
await page.waitForTimeout(2000);
|
||||
await page.waitForTimeout(PAGE_LOAD_WAIT);
|
||||
|
||||
// Click the Apply button and catch the new tab
|
||||
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|
||||
|| await page.$('a.jobs-apply-button');
|
||||
if (!applyBtn) { remaining++; continue; }
|
||||
// External jobs use an <a> tag with a LinkedIn redirect URL containing the real destination
|
||||
const externalUrl = await page.evaluate(() => {
|
||||
// Find the Apply link (not Easy Apply button, not similar job links)
|
||||
const applyLinks = Array.from(document.querySelectorAll('a'))
|
||||
.filter(a => {
|
||||
const text = a.textContent?.trim();
|
||||
return text === 'Apply' && a.href?.includes('/redir/redirect/');
|
||||
});
|
||||
if (applyLinks.length === 0) return null;
|
||||
|
||||
const [newPage] = await Promise.all([
|
||||
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
|
||||
applyBtn.click(),
|
||||
]);
|
||||
|
||||
let externalUrl = null;
|
||||
if (newPage) {
|
||||
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
|
||||
externalUrl = newPage.url();
|
||||
await newPage.close().catch(() => {});
|
||||
}
|
||||
// Extract the real URL from the redirect wrapper
|
||||
try {
|
||||
const redirectUrl = new URL(applyLinks[0].href);
|
||||
return redirectUrl.searchParams.get('url') || applyLinks[0].href;
|
||||
} catch {
|
||||
return applyLinks[0].href;
|
||||
}
|
||||
});
|
||||
|
||||
if (!externalUrl || externalUrl.includes('linkedin.com')) {
|
||||
remaining++;
|
||||
|
||||
Reference in New Issue
Block a user