Fix label dedup for space-separated duplicates

LinkedIn renders label text twice in nested spans, producing
"Question? Question?" instead of "Question?". The old dedup only
caught exact concatenation (ABCABC); now also handles space-separated
duplicates by comparing left/right halves at the midpoint space.

Applied to all 4 copies: extractLabel, _extractLabel, normalizeLegend,
_normalizeLegend.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 16:28:51 -08:00
parent 05739a455b
commit aefd1f2023

View File

@@ -56,11 +56,18 @@ function extractLabel(node) {
raw = raw.replace(/\s+/g, ' ').replace(/\s*\*\s*$/, '').replace(/\s*Required\s*$/i, '').trim();
// Deduplicate repeated label text (LinkedIn renders label text twice)
if (raw.length > 8) {
for (let len = Math.ceil(raw.length / 2); len >= 4; len--) {
const candidate = raw.slice(0, len);
if (raw.startsWith(candidate + candidate)) {
raw = candidate.trim();
break;
const half = Math.ceil(raw.length / 2);
const firstHalf = raw.slice(0, half).trim();
const secondHalf = raw.slice(half).trim();
if (firstHalf === secondHalf) {
raw = firstHalf;
} else {
// Also check with space separator: "ABC ABC" -> "ABC"
const spaceIdx = raw.indexOf(' ', Math.floor(raw.length / 2) - 2);
if (spaceIdx > 0) {
const left = raw.slice(0, spaceIdx).trim();
const right = raw.slice(spaceIdx).trim();
if (left === right) raw = left;
}
}
}
@@ -95,9 +102,18 @@ function checkRequired(node) {
function normalizeLegend(el) {
let raw = (el.textContent || '').replace(/\s+/g, ' ').replace(/\s*\*\s*$/, '').replace(/\s*Required\s*$/i, '').trim();
if (raw.length > 8) {
for (let len = Math.ceil(raw.length / 2); len >= 4; len--) {
const candidate = raw.slice(0, len);
if (raw.startsWith(candidate + candidate)) { raw = candidate.trim(); break; }
const half = Math.ceil(raw.length / 2);
const firstHalf = raw.slice(0, half).trim();
const secondHalf = raw.slice(half).trim();
if (firstHalf === secondHalf) {
raw = firstHalf;
} else {
const spaceIdx = raw.indexOf(' ', Math.floor(raw.length / 2) - 2);
if (spaceIdx > 0) {
const left = raw.slice(0, spaceIdx).trim();
const right = raw.slice(spaceIdx).trim();
if (left === right) raw = left;
}
}
}
return raw;
@@ -354,11 +370,17 @@ Answer:`;
let raw = forLabel || ariaLabel || linked || ancestorLabel || node.placeholder || node.name || '';
raw = raw.replace(/\s+/g, ' ').replace(/\s*\*\s*$/, '').replace(/\s*Required\s*$/i, '').trim();
if (raw.length > 8) {
for (let len = Math.ceil(raw.length / 2); len >= 4; len--) {
const candidate = raw.slice(0, len);
if (raw.startsWith(candidate + candidate)) {
raw = candidate.trim();
break;
const half = Math.ceil(raw.length / 2);
const firstHalf = raw.slice(0, half).trim();
const secondHalf = raw.slice(half).trim();
if (firstHalf === secondHalf) {
raw = firstHalf;
} else {
const spaceIdx = raw.indexOf(' ', Math.floor(raw.length / 2) - 2);
if (spaceIdx > 0) {
const left = raw.slice(0, spaceIdx).trim();
const right = raw.slice(spaceIdx).trim();
if (left === right) raw = left;
}
}
}
@@ -387,9 +409,18 @@ Answer:`;
function _normalizeLegend(el) {
let raw = (el.textContent || '').replace(/\s+/g, ' ').replace(/\s*\*\s*$/, '').replace(/\s*Required\s*$/i, '').trim();
if (raw.length > 8) {
for (let len = Math.ceil(raw.length / 2); len >= 4; len--) {
const candidate = raw.slice(0, len);
if (raw.startsWith(candidate + candidate)) { raw = candidate.trim(); break; }
const half = Math.ceil(raw.length / 2);
const firstHalf = raw.slice(0, half).trim();
const secondHalf = raw.slice(half).trim();
if (firstHalf === secondHalf) {
raw = firstHalf;
} else {
const spaceIdx = raw.indexOf(' ', Math.floor(raw.length / 2) - 2);
if (spaceIdx > 0) {
const left = raw.slice(0, spaceIdx).trim();
const right = raw.slice(spaceIdx).trim();
if (left === right) raw = left;
}
}
}
return raw;