feat: first_run_days config — defaults to 90 days on first run, then posted_within_days after

This commit is contained in:
2026-03-05 23:38:21 +00:00
parent 931112e7cf
commit cb7a401aff
7 changed files with 85 additions and 24 deletions

View File

@@ -0,0 +1,41 @@
{
"_note": "Configure your job searches here. Each search runs on both listed platforms.",
"first_run_days": 90,
"searches": [
{
"name": "Founding GTM",
"track": "gtm",
"keywords": [
"founding account executive",
"first sales hire",
"first GTM hire",
"founding AE",
"head of sales startup remote"
],
"platforms": ["linkedin", "wellfound"],
"filters": {
"remote": true,
"posted_within_days": 2
},
"exclude_keywords": ["BDR", "SDR", "staffing", "insurance", "retail", "consumer", "recruiter", "DataAnnotation"],
"salary_min": 130000
},
{
"name": "Enterprise AE",
"track": "ae",
"keywords": [
"enterprise account executive SaaS remote",
"senior account executive technical SaaS remote",
"enterprise AE data infrastructure cloud"
],
"platforms": ["linkedin"],
"filters": {
"remote": true,
"posted_within_days": 2,
"easy_apply_only": true
},
"exclude_keywords": ["BDR", "SDR", "SMB", "staffing", "retail", "DataAnnotation"],
"salary_min": 150000
}
]
}

View File

@@ -1,5 +1,6 @@
{
"_note": "Configure your job searches here. Each search runs on both listed platforms.",
"first_run_days": 90,
"searches": [
{
"name": "Founding GTM",

View File

@@ -11,7 +11,7 @@ import { fileURLToPath } from 'url';
const __dir = dirname(fileURLToPath(import.meta.url));
const cfg = p => JSON.parse(readFileSync(resolve(__dir, p), 'utf8'));
import { addJobs } from './lib/queue.mjs';
import { addJobs, loadQueue } from './lib/queue.mjs';
import { createBrowser } from './lib/browser.mjs';
import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs';
import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
@@ -24,6 +24,11 @@ async function main() {
const settings = cfg('config/settings.json');
const searchConfig = cfg('config/search_config.json');
// First run detection: if queue is empty, use first_run_days lookback
const isFirstRun = loadQueue().length === 0;
const lookbackDays = isFirstRun ? (searchConfig.first_run_days || 90) : null;
if (isFirstRun) console.log(`📅 First run — looking back ${lookbackDays} days\n`);
let totalAdded = 0;
let totalSeen = 0;
const platformsRun = [];
@@ -43,7 +48,10 @@ async function main() {
console.log(' ✅ Logged in');
for (const search of liSearches) {
const jobs = await searchLinkedIn(liBrowser.page, search);
const effectiveSearch = lookbackDays
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
: search;
const jobs = await searchLinkedIn(liBrowser.page, effectiveSearch);
const added = addJobs(jobs);
totalAdded += added;
totalSeen += jobs.length;
@@ -69,7 +77,10 @@ async function main() {
else console.log(' ✅ Logged in');
for (const search of wfSearches) {
const jobs = await searchWellfound(wfBrowser.page, search);
const effectiveSearch = lookbackDays
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
: search;
const jobs = await searchWellfound(wfBrowser.page, effectiveSearch);
const added = addJobs(jobs);
totalAdded += added;
totalSeen += jobs.length;

View File

@@ -2,30 +2,44 @@
* browser.mjs — Browser factory
* Creates Kernel stealth browsers or falls back to local Playwright
*/
import { chromium } from 'playwright';
// Use configured playwright path or fall back to npm global
let _chromium;
async function getChromium(playwrightPath) {
if (_chromium) return _chromium;
const paths = [
playwrightPath,
'/home/ubuntu/.npm-global/lib/node_modules/playwright/index.mjs',
'playwright'
].filter(Boolean);
for (const p of paths) {
try { const m = await import(p); _chromium = m.chromium; return _chromium; } catch {}
}
throw new Error('Playwright not found — install with: npm install -g playwright');
}
export async function createBrowser(settings, profileKey) {
const { provider, playwright_path } = settings.browser || {};
const kernelConfig = settings.kernel || {};
const pwPath = settings.browser?.playwright_path;
if (provider === 'local') {
return createLocalBrowser();
return createLocalBrowser(pwPath);
}
// Default: Kernel
try {
return await createKernelBrowser(kernelConfig, profileKey);
return await createKernelBrowser(kernelConfig, profileKey, pwPath);
} catch (e) {
console.warn(`[browser] Kernel failed (${e.message}), falling back to local`);
return createLocalBrowser();
return createLocalBrowser(pwPath);
}
}
async function createKernelBrowser(kernelConfig, profileKey) {
// Dynamic import so it doesn't crash if not installed
async function createKernelBrowser(kernelConfig, profileKey, playwrightPath) {
let Kernel;
try {
const mod = await import('@onkernel/sdk');
const mod = await import('/home/ubuntu/.openclaw/workspace/node_modules/@onkernel/sdk/index.js');
Kernel = mod.default;
} catch {
throw new Error('Kernel SDK not installed — run: npm install @onkernel/sdk');
@@ -41,14 +55,7 @@ async function createKernelBrowser(kernelConfig, profileKey) {
if (kernelConfig.proxy_id) opts.proxy = { id: kernelConfig.proxy_id };
const kb = await kernel.browsers.create(opts);
// Use system playwright or configured path
let pw = chromium;
if (kernelConfig.playwright_path) {
const mod = await import(kernelConfig.playwright_path);
pw = mod.chromium;
}
const pw = await getChromium(playwrightPath);
const browser = await pw.connectOverCDP(kb.cdp_ws_url);
const ctx = browser.contexts()[0] || await browser.newContext();
const page = ctx.pages()[0] || await ctx.newPage();
@@ -56,7 +63,8 @@ async function createKernelBrowser(kernelConfig, profileKey) {
return { browser, page, type: 'kernel' };
}
async function createLocalBrowser() {
async function createLocalBrowser(playwrightPath) {
const chromium = await getChromium(playwrightPath);
const browser = await chromium.launch({ headless: true });
const ctx = await browser.newContext({
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'

View File

@@ -29,7 +29,7 @@ export async function searchLinkedIn(page, search) {
await page.evaluate(() => window.scrollBy(0, 2000));
await page.waitForTimeout(1500);
const found = await page.evaluate((track, excludes) => {
const found = await page.evaluate(({ track, excludes }) => {
const ids = [...new Set(
Array.from(document.querySelectorAll('a[href*="/jobs/view/"]'))
.map(a => a.href.match(/\/jobs\/view\/(\d+)/)?.[1])
@@ -44,7 +44,6 @@ export async function searchLinkedIn(page, search) {
const company = container?.querySelector('[class*="company"], [class*="subtitle"], h4')?.textContent?.trim() || '';
const location = container?.querySelector('[class*="location"]')?.textContent?.trim() || '';
// Basic exclusion filter
const titleLower = title.toLowerCase();
const companyLower = company.toLowerCase();
for (const ex of excludes) {
@@ -54,7 +53,7 @@ export async function searchLinkedIn(page, search) {
return { id: `li_${id}`, platform: 'linkedin', track, title, company, location,
url: `https://www.linkedin.com/jobs/view/${id}/`, jobId: id };
}).filter(Boolean);
}, search.track, search.exclude_keywords || []);
}, { track: search.track, excludes: search.exclude_keywords || [] });
jobs.push(...found);
}

View File

@@ -21,7 +21,7 @@ export async function searchWellfound(page, search) {
await page.evaluate(() => window.scrollBy(0, 3000));
await page.waitForTimeout(2000);
const found = await page.evaluate((track, excludes) => {
const found = await page.evaluate(({ track, excludes }) => {
const seen = new Set();
const results = [];
@@ -57,7 +57,7 @@ export async function searchWellfound(page, search) {
});
return results.slice(0, 30);
}, search.track, search.exclude_keywords || []);
}, { track: search.track, excludes: search.exclude_keywords || [] });
jobs.push(...found);
}

1
node_modules Symbolic link
View File

@@ -0,0 +1 @@
/home/ubuntu/.openclaw/workspace/node_modules