Fix classifyExternalJobs to extract URL from redirect link

External Apply buttons are <a> tags with LinkedIn redirect URLs, not
<button> elements. Extract the real URL from the redirect's query
parameter instead of clicking and waiting for a new tab.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 18:51:38 -08:00
parent 90894301c1
commit c33fb2ba0d

View File

@@ -164,24 +164,26 @@ export async function classifyExternalJobs(page, jobs, onClassified) {
try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(2000);
await page.waitForTimeout(PAGE_LOAD_WAIT);
// Click the Apply button and catch the new tab
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|| await page.$('a.jobs-apply-button');
if (!applyBtn) { remaining++; continue; }
// External jobs use an <a> tag with a LinkedIn redirect URL containing the real destination
const externalUrl = await page.evaluate(() => {
// Find the Apply link (not Easy Apply button, not similar job links)
const applyLinks = Array.from(document.querySelectorAll('a'))
.filter(a => {
const text = a.textContent?.trim();
return text === 'Apply' && a.href?.includes('/redir/redirect/');
});
if (applyLinks.length === 0) return null;
const [newPage] = await Promise.all([
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
applyBtn.click(),
]);
let externalUrl = null;
if (newPage) {
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
externalUrl = newPage.url();
await newPage.close().catch(() => {});
// Extract the real URL from the redirect wrapper
try {
const redirectUrl = new URL(applyLinks[0].href);
return redirectUrl.searchParams.get('url') || applyLinks[0].href;
} catch {
return applyLinks[0].href;
}
});
if (!externalUrl || externalUrl.includes('linkedin.com')) {
remaining++;