Fix classifyExternalJobs to extract URL from redirect link
External Apply buttons are <a> tags with LinkedIn redirect URLs, not <button> elements. Extract the real URL from the redirect's query parameter instead of clicking and waiting for a new tab. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -164,24 +164,26 @@ export async function classifyExternalJobs(page, jobs, onClassified) {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: NAVIGATION_TIMEOUT });
|
||||||
await page.waitForTimeout(2000);
|
await page.waitForTimeout(PAGE_LOAD_WAIT);
|
||||||
|
|
||||||
// Click the Apply button and catch the new tab
|
// External jobs use an <a> tag with a LinkedIn redirect URL containing the real destination
|
||||||
const applyBtn = await page.$('button.jobs-apply-button:not([aria-label*="Easy Apply"])')
|
const externalUrl = await page.evaluate(() => {
|
||||||
|| await page.$('a.jobs-apply-button');
|
// Find the Apply link (not Easy Apply button, not similar job links)
|
||||||
if (!applyBtn) { remaining++; continue; }
|
const applyLinks = Array.from(document.querySelectorAll('a'))
|
||||||
|
.filter(a => {
|
||||||
|
const text = a.textContent?.trim();
|
||||||
|
return text === 'Apply' && a.href?.includes('/redir/redirect/');
|
||||||
|
});
|
||||||
|
if (applyLinks.length === 0) return null;
|
||||||
|
|
||||||
const [newPage] = await Promise.all([
|
// Extract the real URL from the redirect wrapper
|
||||||
page.context().waitForEvent('page', { timeout: 8000 }).catch(() => null),
|
try {
|
||||||
applyBtn.click(),
|
const redirectUrl = new URL(applyLinks[0].href);
|
||||||
]);
|
return redirectUrl.searchParams.get('url') || applyLinks[0].href;
|
||||||
|
} catch {
|
||||||
let externalUrl = null;
|
return applyLinks[0].href;
|
||||||
if (newPage) {
|
}
|
||||||
await newPage.waitForLoadState('domcontentloaded', { timeout: 8000 }).catch(() => {});
|
});
|
||||||
externalUrl = newPage.url();
|
|
||||||
await newPage.close().catch(() => {});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!externalUrl || externalUrl.includes('linkedin.com')) {
|
if (!externalUrl || externalUrl.includes('linkedin.com')) {
|
||||||
remaining++;
|
remaining++;
|
||||||
|
|||||||
Reference in New Issue
Block a user