code icon Code

Summarize JSON Array

Compute top-N counts by field, total count, and date range from a JSON array

Source Code

import fs from "fs";
import path from "path";

const [inputPath, groupByField, topNArg = "5", outputPath] =
  process.argv.slice(2);

if (!inputPath || !groupByField || !outputPath) {
  console.error(
    "Usage: inputPath groupByField [topN] outputPath"
  );
  process.exit(1);
}

const topN = parseInt(topNArg) || 5;

/**
 * Get nested field value using dot notation
 */
function getField(obj, fieldPath) {
  const parts = fieldPath.split(".");
  let value = obj;
  for (const part of parts) {
    if (value == null) return undefined;
    value = value[part];
  }
  return value;
}

/**
 * Try to parse a date from various formats
 */
function parseDate(value) {
  if (!value) return null;
  const date = new Date(value);
  return isNaN(date.getTime()) ? null : date;
}

/**
 * Find date fields in the data by checking first few items
 */
function findDateFields(items) {
  const dateFields = [];
  const sample = items.slice(0, 5);
  const checkedFields = new Set();

  function checkObject(obj, prefix = "") {
    for (const [key, value] of Object.entries(obj || {})) {
      const fieldPath = prefix ? `${prefix}.${key}` : key;
      if (checkedFields.has(fieldPath)) continue;
      checkedFields.add(fieldPath);

      if (typeof value === "string" && parseDate(value)) {
        dateFields.push(fieldPath);
      } else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
        checkObject(value, fieldPath);
      }
    }
  }

  for (const item of sample) {
    checkObject(item);
  }

  return dateFields;
}

try {
  console.log(`Reading ${inputPath}...`);
  const raw = fs.readFileSync(inputPath, "utf-8");
  const data = JSON.parse(raw);

  const items = Array.isArray(data) ? data : data.items || data.results || data.messages || [];

  if (!Array.isArray(items)) {
    console.error("Input must be a JSON array or object with array property");
    process.exit(1);
  }

  console.log(`Processing ${items.length} items...`);

  // Count by field
  const counts = {};
  for (const item of items) {
    const value = getField(item, groupByField);
    const key = value != null ? String(value) : "(empty)";
    counts[key] = (counts[key] || 0) + 1;
  }

  // Get top N
  const topItems = Object.entries(counts)
    .sort((a, b) => b[1] - a[1])
    .slice(0, topN)
    .map(([value, count]) => ({ value, count }));

  // Find date range
  let dateRange = null;
  const dateFields = findDateFields(items);

  if (dateFields.length > 0) {
    const dates = [];
    for (const item of items) {
      for (const field of dateFields) {
        const date = parseDate(getField(item, field));
        if (date) dates.push(date);
      }
    }

    if (dates.length > 0) {
      dates.sort((a, b) => a - b);
      dateRange = {
        oldest: dates[0].toISOString(),
        newest: dates[dates.length - 1].toISOString(),
        dateField: dateFields[0],
      };
    }
  }

  const summary = {
    total: items.length,
    groupByField,
    topN: topItems,
    dateRange,
  };

  // Ensure output directory exists
  const dir = path.dirname(outputPath);
  if (dir && dir !== ".") {
    fs.mkdirSync(dir, { recursive: true });
  }

  fs.writeFileSync(outputPath, JSON.stringify(summary, null, 2));

  console.log(`\nāœ“ Summarized ${items.length} items`);
  console.log(`  Grouped by: ${groupByField}`);
  console.log(`  Top ${topN}:`);
  for (const item of topItems) {
    console.log(`    - ${item.value}: ${item.count}`);
  }
  if (dateRange) {
    console.log(
      `  Date range: ${dateRange.oldest.split("T")[0]} to ${dateRange.newest.split("T")[0]}`
    );
  }
  console.log(`  Written to: ${outputPath}`);

  console.log(JSON.stringify({ success: true, outputPath, ...summary }));
} catch (error) {
  console.error("Error:", error.message);
  process.exit(1);
}