Fix answer saving: normalize answers format, improve label dedup

- normalizeAnswers() handles both object {"q":"a"} and array [{pattern,answer}]
  formats — prevents silent failures when answers.json format varies
- getLabel() now strips "Required" suffix before dedup, uses smarter
  prefix-repeat detection instead of simple half-split
- telegram_answers.mjs also normalizes on load
- Cleaned existing answers.json on AWS: removed duplicated text in patterns,
  fixed bad AI answer for "Current company", generalized Geotab-specific patterns

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 12:24:10 -08:00
parent 4419363b3c
commit c5ebdc9362
2 changed files with 35 additions and 11 deletions

View File

@@ -11,10 +11,24 @@ import {
AUTOCOMPLETE_WAIT, AUTOCOMPLETE_TIMEOUT, ANTHROPIC_API_URL
} from './constants.mjs';
/**
* Normalize answers from either format:
* Object: { "question": "answer" } → [{ pattern: "question", answer: "answer" }]
* Array: [{ pattern, answer }] → as-is
*/
function normalizeAnswers(answers) {
if (!answers) return [];
if (Array.isArray(answers)) return answers;
if (typeof answers === 'object') {
return Object.entries(answers).map(([pattern, answer]) => ({ pattern, answer: String(answer) }));
}
return [];
}
export class FormFiller {
constructor(profile, answers, opts = {}) {
this.profile = profile;
this.answers = answers || []; // [{ pattern, answer }]
this.answers = normalizeAnswers(answers); // [{ pattern, answer }]
this.apiKey = opts.apiKey || null;
this.answersPath = opts.answersPath || null; // path to answers.json for saving
this.jobContext = opts.jobContext || {}; // { title, company }
@@ -157,16 +171,20 @@ export class FormFiller {
}
}
// Clean up — remove trailing * from required field labels
// Also deduplicate labels like "Phone country codePhone country code"
// Clean up label text
let raw = forLabel || ariaLabel || linked || ancestorLabel || node.placeholder || node.name || '';
// Normalize whitespace and remove trailing * from required field labels
raw = raw.replace(/\s+/g, ' ').replace(/\s*\*\s*$/, '').trim();
// Deduplicate repeated label text (LinkedIn renders label text twice sometimes)
// e.g. "Phone country codePhone country code" → "Phone country code"
if (raw.length > 4) {
const half = Math.ceil(raw.length / 2);
if (raw.slice(0, half) === raw.slice(half, half * 2)) raw = raw.slice(0, half).trim();
// Normalize whitespace, strip trailing *, strip "Required" suffix
raw = raw.replace(/\s+/g, ' ').replace(/\s*\*\s*$/, '').replace(/\s*Required\s*$/i, '').trim();
// Deduplicate repeated label text (LinkedIn renders label text twice)
// e.g. "First sales hire?First sales hire?" → "First sales hire?"
if (raw.length > 8) {
for (let len = Math.ceil(raw.length / 2); len >= 4; len--) {
const candidate = raw.slice(0, len);
if (raw.startsWith(candidate + candidate)) {
raw = candidate.trim();
break;
}
}
}
return raw;
}).catch(() => '');

View File

@@ -33,7 +33,13 @@ function saveOffset(offset) {
function loadAnswers(path) {
if (!existsSync(path)) return [];
return JSON.parse(readFileSync(path, 'utf8'));
const raw = JSON.parse(readFileSync(path, 'utf8'));
// Normalize: support both object {"q":"a"} and array [{pattern,answer}] formats
if (Array.isArray(raw)) return raw;
if (raw && typeof raw === 'object') {
return Object.entries(raw).map(([pattern, answer]) => ({ pattern, answer: String(answer) }));
}
return [];
}
function saveAnswers(path, answers) {