feat: one batch per track — separate GTM/AE batches with their own system prompts

- submitBatch → submitBatches: groups jobs by track, submits one batch each
- filter_state.json now stores batches[] array instead of single batch_id
- Collect waits for all batches to finish before processing
- Each track gets its own cached system prompt = better caching + cleaner scoring
- Idempotent collect: skips already-scored jobs
This commit is contained in:
2026-03-06 11:35:15 +00:00
parent aadec0704b
commit c88a71fc20
2 changed files with 103 additions and 100 deletions

View File

@@ -97,61 +97,63 @@ function apiHeaders(apiKey) {
}
/**
* Submit all jobs as a single Anthropic batch.
* System prompt is marked cache_control=ephemeral so it's cached across requests.
* Returns the batch ID.
* Submit one batch per track (one per job profile/search description).
* Each batch uses the system prompt for that track only — maximizes prompt caching.
* Returns array of { track, batchId, idMap, jobCount }
*/
export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candidateProfile, model, apiKey) {
const globalMin = searchConfig.filter_min_score ?? 5;
const requests = [];
const idMap = {}; // custom_id → job.id (handles truncation edge cases)
export async function submitBatches(jobs, jobProfilesByTrack, candidateProfile, model, apiKey) {
// Group jobs by track
const byTrack = {};
for (const job of jobs) {
const track = job.track || 'ae';
if (!jobProfilesByTrack[track]) continue; // no profile → skip
if (!byTrack[track]) byTrack[track] = [];
byTrack[track].push(job);
}
if (Object.keys(byTrack).length === 0) throw new Error('No jobs to submit — check job profiles are configured');
const submitted = [];
for (const [track, trackJobs] of Object.entries(byTrack)) {
const jobProfile = jobProfilesByTrack[track];
if (!jobProfile) continue; // no profile → skip (caller handles this)
const systemPrompt = buildSystemPrompt(jobProfile, candidateProfile);
// Anthropic custom_id max 64 chars
const customId = job.id.length <= 64 ? job.id : job.id.substring(0, 64);
idMap[customId] = job.id;
const idMap = {};
const requests = [];
requests.push({
custom_id: customId,
params: {
model,
max_tokens: 1024,
system: [
{
type: 'text',
text: systemPrompt,
cache_control: { type: 'ephemeral' }, // cache the shared context
}
],
messages: [
{ role: 'user', content: buildJobMessage(job) }
],
}
for (const job of trackJobs) {
// Anthropic custom_id max 64 chars
const customId = job.id.length <= 64 ? job.id : job.id.substring(0, 64);
idMap[customId] = job.id;
requests.push({
custom_id: customId,
params: {
model,
max_tokens: 1024,
system: [{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }],
messages: [{ role: 'user', content: buildJobMessage(job) }],
}
});
}
const res = await fetch(BATCH_API, {
method: 'POST',
headers: apiHeaders(apiKey),
body: JSON.stringify({ requests }),
});
if (!res.ok) {
const err = await res.text();
throw new Error(`Batch submit failed for track "${track}" ${res.status}: ${err}`);
}
const data = await res.json();
submitted.push({ track, batchId: data.id, idMap, jobCount: trackJobs.length });
console.log(` [${track}] ${trackJobs.length} jobs → batch ${data.id}`);
}
if (requests.length === 0) throw new Error('No requests to submit — check job profiles are configured');
// Return idMap alongside batch ID so collector can resolve truncated IDs
const res = await fetch(BATCH_API, {
method: 'POST',
headers: apiHeaders(apiKey),
body: JSON.stringify({ requests }),
});
if (!res.ok) {
const err = await res.text();
throw new Error(`Batch submit failed ${res.status}: ${err}`);
}
const data = await res.json();
return { batchId: data.id, idMap };
return submitted;
}
/**