feat: one batch per track — separate GTM/AE batches with their own system prompts

- submitBatch → submitBatches: groups jobs by track, submits one batch each - filter_state.json now stores batches[] array instead of single batch_id - Collect waits for all batches to finish before processing - Each track gets its own cached system prompt = better caching + cleaner scoring - Idempotent collect: skips already-scored jobs
2026-03-06 11:35:15 +00:00
parent aadec0704b
commit c88a71fc20
2 changed files with 103 additions and 100 deletions
--- a/lib/filter.mjs
+++ b/lib/filter.mjs
@@ -97,61 +97,63 @@ function apiHeaders(apiKey) {
 }

 /**
- * Submit all jobs as a single Anthropic batch.
- * System prompt is marked cache_control=ephemeral so it's cached across requests.
- * Returns the batch ID.
+ * Submit one batch per track (one per job profile/search description).
+ * Each batch uses the system prompt for that track only — maximizes prompt caching.
+ * Returns array of { track, batchId, idMap, jobCount }
 */
-export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candidateProfile, model, apiKey) {
-  const globalMin = searchConfig.filter_min_score ?? 5;
-
-  const requests = [];
-  const idMap = {}; // custom_id → job.id (handles truncation edge cases)
-
+export async function submitBatches(jobs, jobProfilesByTrack, candidateProfile, model, apiKey) {
+  // Group jobs by track
+  const byTrack = {};
  for (const job of jobs) {
    const track = job.track || 'ae';
+    if (!jobProfilesByTrack[track]) continue; // no profile → skip
+    if (!byTrack[track]) byTrack[track] = [];
+    byTrack[track].push(job);
+  }
+
+  if (Object.keys(byTrack).length === 0) throw new Error('No jobs to submit — check job profiles are configured');
+
+  const submitted = [];
+
+  for (const [track, trackJobs] of Object.entries(byTrack)) {
    const jobProfile = jobProfilesByTrack[track];
-    if (!jobProfile) continue; // no profile → skip (caller handles this)
-
    const systemPrompt = buildSystemPrompt(jobProfile, candidateProfile);
-    // Anthropic custom_id max 64 chars
-    const customId = job.id.length <= 64 ? job.id : job.id.substring(0, 64);
-    idMap[customId] = job.id;
+    const idMap = {};
+    const requests = [];

-    requests.push({
-      custom_id: customId,
-      params: {
-        model,
-        max_tokens: 1024,
-        system: [
-          {
-            type: 'text',
-            text: systemPrompt,
-            cache_control: { type: 'ephemeral' }, // cache the shared context
-          }
-        ],
-        messages: [
-          { role: 'user', content: buildJobMessage(job) }
-        ],
-      }
+    for (const job of trackJobs) {
+      // Anthropic custom_id max 64 chars
+      const customId = job.id.length <= 64 ? job.id : job.id.substring(0, 64);
+      idMap[customId] = job.id;
+
+      requests.push({
+        custom_id: customId,
+        params: {
+          model,
+          max_tokens: 1024,
+          system: [{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }],
+          messages: [{ role: 'user', content: buildJobMessage(job) }],
+        }
+      });
+    }
+
+    const res = await fetch(BATCH_API, {
+      method: 'POST',
+      headers: apiHeaders(apiKey),
+      body: JSON.stringify({ requests }),
    });
+
+    if (!res.ok) {
+      const err = await res.text();
+      throw new Error(`Batch submit failed for track "${track}" ${res.status}: ${err}`);
+    }
+
+    const data = await res.json();
+    submitted.push({ track, batchId: data.id, idMap, jobCount: trackJobs.length });
+    console.log(`  [${track}] ${trackJobs.length} jobs → batch ${data.id}`);
  }

-  if (requests.length === 0) throw new Error('No requests to submit — check job profiles are configured');
-
-  // Return idMap alongside batch ID so collector can resolve truncated IDs
-  const res = await fetch(BATCH_API, {
-    method: 'POST',
-    headers: apiHeaders(apiKey),
-    body: JSON.stringify({ requests }),
-  });
-
-  if (!res.ok) {
-    const err = await res.text();
-    throw new Error(`Batch submit failed ${res.status}: ${err}`);
-  }
-
-  const data = await res.json();
-  return { batchId: data.id, idMap };
+  return submitted;
 }

 /**