diff --git a/config/search_config.example.json b/config/search_config.example.json new file mode 100644 index 0000000..674cafc --- /dev/null +++ b/config/search_config.example.json @@ -0,0 +1,41 @@ +{ + "_note": "Configure your job searches here. Each search runs on both listed platforms.", + "first_run_days": 90, + "searches": [ + { + "name": "Founding GTM", + "track": "gtm", + "keywords": [ + "founding account executive", + "first sales hire", + "first GTM hire", + "founding AE", + "head of sales startup remote" + ], + "platforms": ["linkedin", "wellfound"], + "filters": { + "remote": true, + "posted_within_days": 2 + }, + "exclude_keywords": ["BDR", "SDR", "staffing", "insurance", "retail", "consumer", "recruiter", "DataAnnotation"], + "salary_min": 130000 + }, + { + "name": "Enterprise AE", + "track": "ae", + "keywords": [ + "enterprise account executive SaaS remote", + "senior account executive technical SaaS remote", + "enterprise AE data infrastructure cloud" + ], + "platforms": ["linkedin"], + "filters": { + "remote": true, + "posted_within_days": 2, + "easy_apply_only": true + }, + "exclude_keywords": ["BDR", "SDR", "SMB", "staffing", "retail", "DataAnnotation"], + "salary_min": 150000 + } + ] +} diff --git a/config/search_config.json b/config/search_config.json index a3ec4cc..674cafc 100644 --- a/config/search_config.json +++ b/config/search_config.json @@ -1,5 +1,6 @@ { "_note": "Configure your job searches here. Each search runs on both listed platforms.", + "first_run_days": 90, "searches": [ { "name": "Founding GTM", diff --git a/job_searcher.mjs b/job_searcher.mjs index 7d0e3b2..908d602 100644 --- a/job_searcher.mjs +++ b/job_searcher.mjs @@ -11,7 +11,7 @@ import { fileURLToPath } from 'url'; const __dir = dirname(fileURLToPath(import.meta.url)); const cfg = p => JSON.parse(readFileSync(resolve(__dir, p), 'utf8')); -import { addJobs } from './lib/queue.mjs'; +import { addJobs, loadQueue } from './lib/queue.mjs'; import { createBrowser } from './lib/browser.mjs'; import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs'; import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs'; @@ -24,6 +24,11 @@ async function main() { const settings = cfg('config/settings.json'); const searchConfig = cfg('config/search_config.json'); + // First run detection: if queue is empty, use first_run_days lookback + const isFirstRun = loadQueue().length === 0; + const lookbackDays = isFirstRun ? (searchConfig.first_run_days || 90) : null; + if (isFirstRun) console.log(`📅 First run — looking back ${lookbackDays} days\n`); + let totalAdded = 0; let totalSeen = 0; const platformsRun = []; @@ -43,7 +48,10 @@ async function main() { console.log(' ✅ Logged in'); for (const search of liSearches) { - const jobs = await searchLinkedIn(liBrowser.page, search); + const effectiveSearch = lookbackDays + ? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } } + : search; + const jobs = await searchLinkedIn(liBrowser.page, effectiveSearch); const added = addJobs(jobs); totalAdded += added; totalSeen += jobs.length; @@ -69,7 +77,10 @@ async function main() { else console.log(' ✅ Logged in'); for (const search of wfSearches) { - const jobs = await searchWellfound(wfBrowser.page, search); + const effectiveSearch = lookbackDays + ? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } } + : search; + const jobs = await searchWellfound(wfBrowser.page, effectiveSearch); const added = addJobs(jobs); totalAdded += added; totalSeen += jobs.length; diff --git a/lib/browser.mjs b/lib/browser.mjs index f87a121..2070a1a 100644 --- a/lib/browser.mjs +++ b/lib/browser.mjs @@ -2,30 +2,44 @@ * browser.mjs — Browser factory * Creates Kernel stealth browsers or falls back to local Playwright */ -import { chromium } from 'playwright'; +// Use configured playwright path or fall back to npm global +let _chromium; +async function getChromium(playwrightPath) { + if (_chromium) return _chromium; + const paths = [ + playwrightPath, + '/home/ubuntu/.npm-global/lib/node_modules/playwright/index.mjs', + 'playwright' + ].filter(Boolean); + for (const p of paths) { + try { const m = await import(p); _chromium = m.chromium; return _chromium; } catch {} + } + throw new Error('Playwright not found — install with: npm install -g playwright'); +} export async function createBrowser(settings, profileKey) { const { provider, playwright_path } = settings.browser || {}; const kernelConfig = settings.kernel || {}; + const pwPath = settings.browser?.playwright_path; + if (provider === 'local') { - return createLocalBrowser(); + return createLocalBrowser(pwPath); } // Default: Kernel try { - return await createKernelBrowser(kernelConfig, profileKey); + return await createKernelBrowser(kernelConfig, profileKey, pwPath); } catch (e) { console.warn(`[browser] Kernel failed (${e.message}), falling back to local`); - return createLocalBrowser(); + return createLocalBrowser(pwPath); } } -async function createKernelBrowser(kernelConfig, profileKey) { - // Dynamic import so it doesn't crash if not installed +async function createKernelBrowser(kernelConfig, profileKey, playwrightPath) { let Kernel; try { - const mod = await import('@onkernel/sdk'); + const mod = await import('/home/ubuntu/.openclaw/workspace/node_modules/@onkernel/sdk/index.js'); Kernel = mod.default; } catch { throw new Error('Kernel SDK not installed — run: npm install @onkernel/sdk'); @@ -41,14 +55,7 @@ async function createKernelBrowser(kernelConfig, profileKey) { if (kernelConfig.proxy_id) opts.proxy = { id: kernelConfig.proxy_id }; const kb = await kernel.browsers.create(opts); - - // Use system playwright or configured path - let pw = chromium; - if (kernelConfig.playwright_path) { - const mod = await import(kernelConfig.playwright_path); - pw = mod.chromium; - } - + const pw = await getChromium(playwrightPath); const browser = await pw.connectOverCDP(kb.cdp_ws_url); const ctx = browser.contexts()[0] || await browser.newContext(); const page = ctx.pages()[0] || await ctx.newPage(); @@ -56,7 +63,8 @@ async function createKernelBrowser(kernelConfig, profileKey) { return { browser, page, type: 'kernel' }; } -async function createLocalBrowser() { +async function createLocalBrowser(playwrightPath) { + const chromium = await getChromium(playwrightPath); const browser = await chromium.launch({ headless: true }); const ctx = await browser.newContext({ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' diff --git a/lib/linkedin.mjs b/lib/linkedin.mjs index 15efc29..2459ba3 100644 --- a/lib/linkedin.mjs +++ b/lib/linkedin.mjs @@ -29,7 +29,7 @@ export async function searchLinkedIn(page, search) { await page.evaluate(() => window.scrollBy(0, 2000)); await page.waitForTimeout(1500); - const found = await page.evaluate((track, excludes) => { + const found = await page.evaluate(({ track, excludes }) => { const ids = [...new Set( Array.from(document.querySelectorAll('a[href*="/jobs/view/"]')) .map(a => a.href.match(/\/jobs\/view\/(\d+)/)?.[1]) @@ -44,7 +44,6 @@ export async function searchLinkedIn(page, search) { const company = container?.querySelector('[class*="company"], [class*="subtitle"], h4')?.textContent?.trim() || ''; const location = container?.querySelector('[class*="location"]')?.textContent?.trim() || ''; - // Basic exclusion filter const titleLower = title.toLowerCase(); const companyLower = company.toLowerCase(); for (const ex of excludes) { @@ -54,7 +53,7 @@ export async function searchLinkedIn(page, search) { return { id: `li_${id}`, platform: 'linkedin', track, title, company, location, url: `https://www.linkedin.com/jobs/view/${id}/`, jobId: id }; }).filter(Boolean); - }, search.track, search.exclude_keywords || []); + }, { track: search.track, excludes: search.exclude_keywords || [] }); jobs.push(...found); } diff --git a/lib/wellfound.mjs b/lib/wellfound.mjs index e8be1de..f67bdd8 100644 --- a/lib/wellfound.mjs +++ b/lib/wellfound.mjs @@ -21,7 +21,7 @@ export async function searchWellfound(page, search) { await page.evaluate(() => window.scrollBy(0, 3000)); await page.waitForTimeout(2000); - const found = await page.evaluate((track, excludes) => { + const found = await page.evaluate(({ track, excludes }) => { const seen = new Set(); const results = []; @@ -57,7 +57,7 @@ export async function searchWellfound(page, search) { }); return results.slice(0, 30); - }, search.track, search.exclude_keywords || []); + }, { track: search.track, excludes: search.exclude_keywords || [] }); jobs.push(...found); } diff --git a/node_modules b/node_modules new file mode 120000 index 0000000..867c978 --- /dev/null +++ b/node_modules @@ -0,0 +1 @@ +/home/ubuntu/.openclaw/workspace/node_modules \ No newline at end of file