feat: first_run_days config — defaults to 90 days on first run, then posted_within_days after
This commit is contained in:
41
config/search_config.example.json
Normal file
41
config/search_config.example.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"_note": "Configure your job searches here. Each search runs on both listed platforms.",
|
||||
"first_run_days": 90,
|
||||
"searches": [
|
||||
{
|
||||
"name": "Founding GTM",
|
||||
"track": "gtm",
|
||||
"keywords": [
|
||||
"founding account executive",
|
||||
"first sales hire",
|
||||
"first GTM hire",
|
||||
"founding AE",
|
||||
"head of sales startup remote"
|
||||
],
|
||||
"platforms": ["linkedin", "wellfound"],
|
||||
"filters": {
|
||||
"remote": true,
|
||||
"posted_within_days": 2
|
||||
},
|
||||
"exclude_keywords": ["BDR", "SDR", "staffing", "insurance", "retail", "consumer", "recruiter", "DataAnnotation"],
|
||||
"salary_min": 130000
|
||||
},
|
||||
{
|
||||
"name": "Enterprise AE",
|
||||
"track": "ae",
|
||||
"keywords": [
|
||||
"enterprise account executive SaaS remote",
|
||||
"senior account executive technical SaaS remote",
|
||||
"enterprise AE data infrastructure cloud"
|
||||
],
|
||||
"platforms": ["linkedin"],
|
||||
"filters": {
|
||||
"remote": true,
|
||||
"posted_within_days": 2,
|
||||
"easy_apply_only": true
|
||||
},
|
||||
"exclude_keywords": ["BDR", "SDR", "SMB", "staffing", "retail", "DataAnnotation"],
|
||||
"salary_min": 150000
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"_note": "Configure your job searches here. Each search runs on both listed platforms.",
|
||||
"first_run_days": 90,
|
||||
"searches": [
|
||||
{
|
||||
"name": "Founding GTM",
|
||||
|
||||
@@ -11,7 +11,7 @@ import { fileURLToPath } from 'url';
|
||||
const __dir = dirname(fileURLToPath(import.meta.url));
|
||||
const cfg = p => JSON.parse(readFileSync(resolve(__dir, p), 'utf8'));
|
||||
|
||||
import { addJobs } from './lib/queue.mjs';
|
||||
import { addJobs, loadQueue } from './lib/queue.mjs';
|
||||
import { createBrowser } from './lib/browser.mjs';
|
||||
import { verifyLogin as liLogin, searchLinkedIn } from './lib/linkedin.mjs';
|
||||
import { verifyLogin as wfLogin, searchWellfound } from './lib/wellfound.mjs';
|
||||
@@ -24,6 +24,11 @@ async function main() {
|
||||
const settings = cfg('config/settings.json');
|
||||
const searchConfig = cfg('config/search_config.json');
|
||||
|
||||
// First run detection: if queue is empty, use first_run_days lookback
|
||||
const isFirstRun = loadQueue().length === 0;
|
||||
const lookbackDays = isFirstRun ? (searchConfig.first_run_days || 90) : null;
|
||||
if (isFirstRun) console.log(`📅 First run — looking back ${lookbackDays} days\n`);
|
||||
|
||||
let totalAdded = 0;
|
||||
let totalSeen = 0;
|
||||
const platformsRun = [];
|
||||
@@ -43,7 +48,10 @@ async function main() {
|
||||
console.log(' ✅ Logged in');
|
||||
|
||||
for (const search of liSearches) {
|
||||
const jobs = await searchLinkedIn(liBrowser.page, search);
|
||||
const effectiveSearch = lookbackDays
|
||||
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
||||
: search;
|
||||
const jobs = await searchLinkedIn(liBrowser.page, effectiveSearch);
|
||||
const added = addJobs(jobs);
|
||||
totalAdded += added;
|
||||
totalSeen += jobs.length;
|
||||
@@ -69,7 +77,10 @@ async function main() {
|
||||
else console.log(' ✅ Logged in');
|
||||
|
||||
for (const search of wfSearches) {
|
||||
const jobs = await searchWellfound(wfBrowser.page, search);
|
||||
const effectiveSearch = lookbackDays
|
||||
? { ...search, filters: { ...search.filters, posted_within_days: lookbackDays } }
|
||||
: search;
|
||||
const jobs = await searchWellfound(wfBrowser.page, effectiveSearch);
|
||||
const added = addJobs(jobs);
|
||||
totalAdded += added;
|
||||
totalSeen += jobs.length;
|
||||
|
||||
@@ -2,30 +2,44 @@
|
||||
* browser.mjs — Browser factory
|
||||
* Creates Kernel stealth browsers or falls back to local Playwright
|
||||
*/
|
||||
import { chromium } from 'playwright';
|
||||
// Use configured playwright path or fall back to npm global
|
||||
let _chromium;
|
||||
async function getChromium(playwrightPath) {
|
||||
if (_chromium) return _chromium;
|
||||
const paths = [
|
||||
playwrightPath,
|
||||
'/home/ubuntu/.npm-global/lib/node_modules/playwright/index.mjs',
|
||||
'playwright'
|
||||
].filter(Boolean);
|
||||
for (const p of paths) {
|
||||
try { const m = await import(p); _chromium = m.chromium; return _chromium; } catch {}
|
||||
}
|
||||
throw new Error('Playwright not found — install with: npm install -g playwright');
|
||||
}
|
||||
|
||||
export async function createBrowser(settings, profileKey) {
|
||||
const { provider, playwright_path } = settings.browser || {};
|
||||
const kernelConfig = settings.kernel || {};
|
||||
|
||||
const pwPath = settings.browser?.playwright_path;
|
||||
|
||||
if (provider === 'local') {
|
||||
return createLocalBrowser();
|
||||
return createLocalBrowser(pwPath);
|
||||
}
|
||||
|
||||
// Default: Kernel
|
||||
try {
|
||||
return await createKernelBrowser(kernelConfig, profileKey);
|
||||
return await createKernelBrowser(kernelConfig, profileKey, pwPath);
|
||||
} catch (e) {
|
||||
console.warn(`[browser] Kernel failed (${e.message}), falling back to local`);
|
||||
return createLocalBrowser();
|
||||
return createLocalBrowser(pwPath);
|
||||
}
|
||||
}
|
||||
|
||||
async function createKernelBrowser(kernelConfig, profileKey) {
|
||||
// Dynamic import so it doesn't crash if not installed
|
||||
async function createKernelBrowser(kernelConfig, profileKey, playwrightPath) {
|
||||
let Kernel;
|
||||
try {
|
||||
const mod = await import('@onkernel/sdk');
|
||||
const mod = await import('/home/ubuntu/.openclaw/workspace/node_modules/@onkernel/sdk/index.js');
|
||||
Kernel = mod.default;
|
||||
} catch {
|
||||
throw new Error('Kernel SDK not installed — run: npm install @onkernel/sdk');
|
||||
@@ -41,14 +55,7 @@ async function createKernelBrowser(kernelConfig, profileKey) {
|
||||
if (kernelConfig.proxy_id) opts.proxy = { id: kernelConfig.proxy_id };
|
||||
|
||||
const kb = await kernel.browsers.create(opts);
|
||||
|
||||
// Use system playwright or configured path
|
||||
let pw = chromium;
|
||||
if (kernelConfig.playwright_path) {
|
||||
const mod = await import(kernelConfig.playwright_path);
|
||||
pw = mod.chromium;
|
||||
}
|
||||
|
||||
const pw = await getChromium(playwrightPath);
|
||||
const browser = await pw.connectOverCDP(kb.cdp_ws_url);
|
||||
const ctx = browser.contexts()[0] || await browser.newContext();
|
||||
const page = ctx.pages()[0] || await ctx.newPage();
|
||||
@@ -56,7 +63,8 @@ async function createKernelBrowser(kernelConfig, profileKey) {
|
||||
return { browser, page, type: 'kernel' };
|
||||
}
|
||||
|
||||
async function createLocalBrowser() {
|
||||
async function createLocalBrowser(playwrightPath) {
|
||||
const chromium = await getChromium(playwrightPath);
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const ctx = await browser.newContext({
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
|
||||
|
||||
@@ -29,7 +29,7 @@ export async function searchLinkedIn(page, search) {
|
||||
await page.evaluate(() => window.scrollBy(0, 2000));
|
||||
await page.waitForTimeout(1500);
|
||||
|
||||
const found = await page.evaluate((track, excludes) => {
|
||||
const found = await page.evaluate(({ track, excludes }) => {
|
||||
const ids = [...new Set(
|
||||
Array.from(document.querySelectorAll('a[href*="/jobs/view/"]'))
|
||||
.map(a => a.href.match(/\/jobs\/view\/(\d+)/)?.[1])
|
||||
@@ -44,7 +44,6 @@ export async function searchLinkedIn(page, search) {
|
||||
const company = container?.querySelector('[class*="company"], [class*="subtitle"], h4')?.textContent?.trim() || '';
|
||||
const location = container?.querySelector('[class*="location"]')?.textContent?.trim() || '';
|
||||
|
||||
// Basic exclusion filter
|
||||
const titleLower = title.toLowerCase();
|
||||
const companyLower = company.toLowerCase();
|
||||
for (const ex of excludes) {
|
||||
@@ -54,7 +53,7 @@ export async function searchLinkedIn(page, search) {
|
||||
return { id: `li_${id}`, platform: 'linkedin', track, title, company, location,
|
||||
url: `https://www.linkedin.com/jobs/view/${id}/`, jobId: id };
|
||||
}).filter(Boolean);
|
||||
}, search.track, search.exclude_keywords || []);
|
||||
}, { track: search.track, excludes: search.exclude_keywords || [] });
|
||||
|
||||
jobs.push(...found);
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ export async function searchWellfound(page, search) {
|
||||
await page.evaluate(() => window.scrollBy(0, 3000));
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
const found = await page.evaluate((track, excludes) => {
|
||||
const found = await page.evaluate(({ track, excludes }) => {
|
||||
const seen = new Set();
|
||||
const results = [];
|
||||
|
||||
@@ -57,7 +57,7 @@ export async function searchWellfound(page, search) {
|
||||
});
|
||||
|
||||
return results.slice(0, 30);
|
||||
}, search.track, search.exclude_keywords || []);
|
||||
}, { track: search.track, excludes: search.exclude_keywords || [] });
|
||||
|
||||
jobs.push(...found);
|
||||
}
|
||||
|
||||
1
node_modules
Symbolic link
1
node_modules
Symbolic link
@@ -0,0 +1 @@
|
||||
/home/ubuntu/.openclaw/workspace/node_modules
|
||||
Reference in New Issue
Block a user