fix: sanitize Unicode surrogates in job descriptions, handle custom_id > 64 chars
This commit is contained in:
@@ -62,11 +62,16 @@ Penalize heavily for:
|
||||
Return ONLY a JSON object: {"score": <0-10>, "reason": "<one line>"}`;
|
||||
}
|
||||
|
||||
function sanitize(str) {
|
||||
// Remove lone surrogates and other invalid Unicode that breaks JSON encoding
|
||||
return (str || '').replace(/[\uD800-\uDFFF]/g, '').replace(/\0/g, '');
|
||||
}
|
||||
|
||||
function buildJobMessage(job) {
|
||||
const desc = (job.description || '').substring(0, DESC_MAX_CHARS).replace(/\s+/g, ' ').trim();
|
||||
return `Title: ${job.title}
|
||||
Company: ${job.company || 'Unknown'}
|
||||
Location: ${job.location || 'Unknown'}
|
||||
const desc = sanitize(job.description).substring(0, DESC_MAX_CHARS).replace(/\s+/g, ' ').trim();
|
||||
return `Title: ${sanitize(job.title)}
|
||||
Company: ${sanitize(job.company) || 'Unknown'}
|
||||
Location: ${sanitize(job.location) || 'Unknown'}
|
||||
Description: ${desc}
|
||||
|
||||
Return ONLY: {"score": <0-10>, "reason": "<one line>"}`;
|
||||
@@ -94,6 +99,7 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid
|
||||
const globalMin = searchConfig.filter_min_score ?? 5;
|
||||
|
||||
const requests = [];
|
||||
const idMap = {}; // custom_id → job.id (handles truncation edge cases)
|
||||
|
||||
for (const job of jobs) {
|
||||
const track = job.track || 'ae';
|
||||
@@ -101,9 +107,12 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid
|
||||
if (!jobProfile) continue; // no profile → skip (caller handles this)
|
||||
|
||||
const systemPrompt = buildSystemPrompt(jobProfile, candidateProfile);
|
||||
// Anthropic custom_id max 64 chars
|
||||
const customId = job.id.length <= 64 ? job.id : job.id.substring(0, 64);
|
||||
idMap[customId] = job.id;
|
||||
|
||||
requests.push({
|
||||
custom_id: job.id,
|
||||
custom_id: customId,
|
||||
params: {
|
||||
model,
|
||||
max_tokens: 128,
|
||||
@@ -123,6 +132,7 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid
|
||||
|
||||
if (requests.length === 0) throw new Error('No requests to submit — check job profiles are configured');
|
||||
|
||||
// Return idMap alongside batch ID so collector can resolve truncated IDs
|
||||
const res = await fetch(BATCH_API, {
|
||||
method: 'POST',
|
||||
headers: apiHeaders(apiKey),
|
||||
@@ -135,7 +145,7 @@ export async function submitBatch(jobs, jobProfilesByTrack, searchConfig, candid
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
return data.id; // msgbatch_...
|
||||
return { batchId: data.id, idMap };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -159,7 +169,7 @@ export async function checkBatch(batchId, apiKey) {
|
||||
/**
|
||||
* Download and parse batch results. Returns array of { jobId, score, reason, error }
|
||||
*/
|
||||
export async function downloadResults(batchId, apiKey) {
|
||||
export async function downloadResults(batchId, apiKey, idMap = {}) {
|
||||
const res = await fetch(`${BATCH_API}/${batchId}/results`, {
|
||||
headers: apiHeaders(apiKey),
|
||||
});
|
||||
@@ -173,7 +183,8 @@ export async function downloadResults(batchId, apiKey) {
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const entry = JSON.parse(line);
|
||||
const jobId = entry.custom_id;
|
||||
// Resolve truncated custom_id back to original job ID
|
||||
const jobId = idMap[entry.custom_id] || entry.custom_id;
|
||||
|
||||
if (entry.result?.type === 'succeeded') {
|
||||
const content = entry.result.message?.content?.[0]?.text || '';
|
||||
|
||||
Reference in New Issue
Block a user