Fix classifyExternalJobs to extract URL from redirect link

External Apply buttons are <a> tags with LinkedIn redirect URLs, not
<button> elements. Extract the real URL from the redirect's query
parameter instead of clicking and waiting for a new tab.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 18:51:38 -08:00
parent 90894301c1
commit c33fb2ba0d

View File

@@ -164,24 +164,26 @@ export async function classifyExternalJobs(page, jobs, onClassified) {
try { try {
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT }); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
await page.waitForTimeout(2000); await page.waitForTimeout(PAGE_LOAD_WAIT);
// Click the Apply button and catch the new tab // External jobs use an <a> tag with a LinkedIn redirect URL containing the real destination
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])') const externalUrl = await page.evaluate(() => {
|| await page.$('a.jobs-apply-button'); // Find the Apply link (not Easy Apply button, not similar job links)
if (!applyBtn) { remaining++; continue; } const applyLinks = Array.from(document.querySelectorAll('a'))
.filter(a => {
const text = a.textContent?.trim();
return text === 'Apply' && a.href?.includes('/redir/redirect/');
});
if (applyLinks.length === 0) return null;
const [newPage] = await Promise.all([ // Extract the real URL from the redirect wrapper
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null), try {
applyBtn.click(), const redirectUrl = new URL(applyLinks[0].href);
]); return redirectUrl.searchParams.get('url') || applyLinks[0].href;
} catch {
let externalUrl = null; return applyLinks[0].href;
if (newPage) { }
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {}); });
externalUrl = newPage.url();
await newPage.close().catch(() => {});
}
if (!externalUrl || externalUrl.includes('linkedin.com')) { if (!externalUrl || externalUrl.includes('linkedin.com')) {
remaining++; remaining++;