diff --git a/job_filter.mjs b/job_filter.mjs index bb18b53..461923e 100644 --- a/job_filter.mjs +++ b/job_filter.mjs @@ -100,7 +100,7 @@ async function collect(state, settings) { } console.log(` Batch ended. Downloading results...`); - const results = await downloadResults(state.batch_id, apiKey); + const results = await downloadResults(state.batch_id, apiKey, state.id_map || {}); const searchConfig = loadConfig(resolve(__dir, 'config/search_config.json')); const globalMin = searchConfig.filter_min_score ?? 5; @@ -195,7 +195,7 @@ async function submit(settings, searchConfig, candidateProfile) { const model = settings.filter?.model || DEFAULT_MODEL; console.log(`🚀 Submitting batch — ${filterable.length} jobs, model: ${model}`); - const batchId = await submitBatch(filterable, jobProfilesByTrack, searchConfig, candidateProfile, model, apiKey); + const { batchId, idMap } = await submitBatch(filterable, jobProfilesByTrack, searchConfig, candidateProfile, model, apiKey); const submittedAt = new Date().toISOString(); writeState({ @@ -204,6 +204,7 @@ async function submit(settings, searchConfig, candidateProfile) { job_count: filterable.length, model, tracks: Object.keys(jobProfilesByTrack), + id_map: idMap, }); console.log(` Batch submitted: ${batchId}`); diff --git a/lib/filter.mjs b/lib/filter.mjs index a92ec3f..12c4150 100644 --- a/lib/filter.mjs +++ b/lib/filter.mjs @@ -62,11 +62,16 @@ Penalize heavily for: Return ONLY a JSON object: {"score": <0-10>, "reason": ""}`; } +function sanitize(str) { + // Remove lone surrogates and other invalid Unicode that breaks JSON encoding + return (str || '').replace(/[\uD800-\uDFFF]/g, '').replace(/\0/g, ''); +} + function buildJobMessage(job) { - const desc = (job.description || '').substring(0, DESC_MAX_CHARS).replace(/\s+/g, ' ').trim(); - return `Title: ${job.title} -Company: ${job.company || 'Unknown'} -Location: ${job.location || 'Unknown'} + const desc = sanitize(job.description).substring(0, DESC_MAX_CHARS).replace(/\s+/g, ' ').trim(); + return `Title: ${sanitize(job.title)} +Company: ${sanitize(job.company) || 'Unknown'} +Location: ${sanitize(job.location) || 'Unknown'} Description: ${desc} Return ONLY: {"score": <0-10>, "reason": ""}`; @@ -94,6 +99,7 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid const globalMin = searchConfig.filter_min_score ?? 5; const requests = []; + const idMap = {}; // custom_id → job.id (handles truncation edge cases) for (const job of jobs) { const track = job.track || 'ae'; @@ -101,9 +107,12 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid if (!jobProfile) continue; // no profile → skip (caller handles this) const systemPrompt = buildSystemPrompt(jobProfile, candidateProfile); + // Anthropic custom_id max 64 chars + const customId = job.id.length <= 64 ? job.id : job.id.substring(0, 64); + idMap[customId] = job.id; requests.push({ - custom_id: job.id, + custom_id: customId, params: { model, max_tokens: 128, @@ -123,6 +132,7 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid if (requests.length === 0) throw new Error('No requests to submit — check job profiles are configured'); + // Return idMap alongside batch ID so collector can resolve truncated IDs const res = await fetch(BATCH_API, { method: 'POST', headers: apiHeaders(apiKey), @@ -135,7 +145,7 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid } const data = await res.json(); - return data.id; // msgbatch_... + return { batchId: data.id, idMap }; } /** @@ -159,7 +169,7 @@ export async function checkBatch(batchId, apiKey) { /** * Download and parse batch results. Returns array of { jobId, score, reason, error } */ -export async function downloadResults(batchId, apiKey) { +export async function downloadResults(batchId, apiKey, idMap = {}) { const res = await fetch(`${BATCH_API}/${batchId}/results`, { headers: apiHeaders(apiKey), }); @@ -173,7 +183,8 @@ export async function downloadResults(batchId, apiKey) { for (const line of lines) { try { const entry = JSON.parse(line); - const jobId = entry.custom_id; + // Resolve truncated custom_id back to original job ID + const jobId = idMap[entry.custom_id] || entry.custom_id; if (entry.result?.type === 'succeeded') { const content = entry.result.message?.content?.[0]?.text || '';