feat: find-all → filter → dedup flow

- addJobs: allows same job on multiple tracks (dedup key = track::id)
- Cross-track copies get composite id (job.id_track) to avoid batch collisions
- dedupeAfterFilter(): after collect, keeps highest-scored copy per URL, marks rest as 'duplicate'
- Called automatically at end of collect phase
This commit is contained in:
2026-03-06 15:55:00 +00:00
parent 2dfadbde99
commit c9b527c83a
2 changed files with 62 additions and 4 deletions

View File

@@ -23,7 +23,7 @@ import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'fs';
const __dir = dirname(fileURLToPath(import.meta.url));
import { getJobsByStatus, updateJobStatus, loadConfig, loadQueue, saveQueue } from './lib/queue.mjs';
import { getJobsByStatus, updateJobStatus, loadConfig, loadQueue, saveQueue, dedupeAfterFilter } from './lib/queue.mjs';
import { loadProfile, submitBatches, checkBatch, downloadResults } from './lib/filter.mjs';
import { sendTelegram } from './lib/notify.mjs';
@@ -152,6 +152,11 @@ async function collect(state, settings) {
}
saveQueue(queue);
// Dedup cross-track copies — keep highest-scoring version of each job
const duped = dedupeAfterFilter();
if (duped > 0) console.log(` Deduped ${duped} cross-track copies`);
clearState();
// Log run