code icon Code

Deduplicate JSON Array

Remove duplicate items by field value

Source Code

import fs from "fs";
import path from "path";

const [inputPath, field, keep = "first", outputPath] = process.argv.slice(2);

if (!inputPath || !field || !outputPath) {
  console.error("Usage: inputPath field [keep] outputPath");
  process.exit(1);
}

/**
 * Get nested field value using dot notation
 */
function getField(obj, fieldPath) {
  const parts = fieldPath.split(".");
  let value = obj;
  for (const part of parts) {
    if (value == null) return undefined;
    value = value[part];
  }
  return value;
}

try {
  console.log(`Reading ${inputPath}...`);
  const raw = fs.readFileSync(inputPath, "utf-8");
  const data = JSON.parse(raw);

  const items = Array.isArray(data)
    ? data
    : data.items || data.results || data.messages || [];

  if (!Array.isArray(items)) {
    console.error("Input must be a JSON array or object with array property");
    process.exit(1);
  }

  const keepLast = keep.toLowerCase() === "last";
  console.log(
    `Deduplicating ${items.length} items by ${field} (keeping ${keepLast ? "last" : "first"})...`
  );

  const seen = new Map();
  const deduped = [];

  if (keepLast) {
    // Process in reverse, then reverse result
    for (let i = items.length - 1; i >= 0; i--) {
      const item = items[i];
      const value = getField(item, field);
      const key = value != null ? String(value) : "(empty)";

      if (!seen.has(key)) {
        seen.set(key, true);
        deduped.unshift(item);
      }
    }
  } else {
    // Keep first occurrence
    for (const item of items) {
      const value = getField(item, field);
      const key = value != null ? String(value) : "(empty)";

      if (!seen.has(key)) {
        seen.set(key, true);
        deduped.push(item);
      }
    }
  }

  const duplicatesRemoved = items.length - deduped.length;

  // Ensure output directory exists
  const dir = path.dirname(outputPath);
  if (dir && dir !== ".") {
    fs.mkdirSync(dir, { recursive: true });
  }

  fs.writeFileSync(outputPath, JSON.stringify(deduped, null, 2));

  console.log(`\nāœ“ Deduplicated ${items.length} → ${deduped.length} items`);
  console.log(`  Field: ${field}`);
  console.log(`  Duplicates removed: ${duplicatesRemoved}`);
  console.log(`  Kept: ${keepLast ? "last" : "first"} occurrence`);
  console.log(`  Written to: ${outputPath}`);

  console.log(
    JSON.stringify({
      success: true,
      outputPath,
      inputCount: items.length,
      outputCount: deduped.length,
      duplicatesRemoved,
      field,
      keep: keepLast ? "last" : "first",
    })
  );
} catch (error) {
  console.error("Error:", error.message);
  process.exit(1);
}