feat: find-all → filter → dedup flow

- addJobs: allows same job on multiple tracks (dedup key = track::id) - Cross-track copies get composite id (job.id_track) to avoid batch collisions - dedupeAfterFilter(): after collect, keeps highest-scored copy per URL, marks rest as 'duplicate' - Called automatically at end of collect phase
2026-03-06 15:55:00 +00:00
parent 2dfadbde99
commit c9b527c83a
2 changed files with 62 additions and 4 deletions
--- a/job_filter.mjs
+++ b/job_filter.mjs
@@ -23,7 +23,7 @@ import { readFileSync, writeFileSync, existsSync, unlinkSync } from 'fs';

 const __dir = dirname(fileURLToPath(import.meta.url));

-import { getJobsByStatus, updateJobStatus, loadConfig, loadQueue, saveQueue } from './lib/queue.mjs';
+import { getJobsByStatus, updateJobStatus, loadConfig, loadQueue, saveQueue, dedupeAfterFilter } from './lib/queue.mjs';
 import { loadProfile, submitBatches, checkBatch, downloadResults } from './lib/filter.mjs';
 import { sendTelegram } from './lib/notify.mjs';

@@ -152,6 +152,11 @@ async function collect(state, settings) {
  }

  saveQueue(queue);
+
+  // Dedup cross-track copies — keep highest-scoring version of each job
+  const duped = dedupeAfterFilter();
+  if (duped > 0) console.log(`  Deduped ${duped} cross-track copies`);
+
  clearState();

  // Log run