Summarize JSON Array
Compute top-N counts by field, total count, and date range from a JSON array
Source Code
import fs from "fs";
import path from "path";
const [inputPath, groupByField, topNArg = "5", outputPath] =
process.argv.slice(2);
if (!inputPath || !groupByField || !outputPath) {
console.error(
"Usage: inputPath groupByField [topN] outputPath"
);
process.exit(1);
}
const topN = parseInt(topNArg) || 5;
/**
* Get nested field value using dot notation
*/
function getField(obj, fieldPath) {
const parts = fieldPath.split(".");
let value = obj;
for (const part of parts) {
if (value == null) return undefined;
value = value[part];
}
return value;
}
/**
* Try to parse a date from various formats
*/
function parseDate(value) {
if (!value) return null;
const date = new Date(value);
return isNaN(date.getTime()) ? null : date;
}
/**
* Find date fields in the data by checking first few items
*/
function findDateFields(items) {
const dateFields = [];
const sample = items.slice(0, 5);
const checkedFields = new Set();
function checkObject(obj, prefix = "") {
for (const [key, value] of Object.entries(obj || {})) {
const fieldPath = prefix ? `${prefix}.${key}` : key;
if (checkedFields.has(fieldPath)) continue;
checkedFields.add(fieldPath);
if (typeof value === "string" && parseDate(value)) {
dateFields.push(fieldPath);
} else if (typeof value === "object" && value !== null && !Array.isArray(value)) {
checkObject(value, fieldPath);
}
}
}
for (const item of sample) {
checkObject(item);
}
return dateFields;
}
try {
console.log(`Reading ${inputPath}...`);
const raw = fs.readFileSync(inputPath, "utf-8");
const data = JSON.parse(raw);
const items = Array.isArray(data) ? data : data.items || data.results || data.messages || [];
if (!Array.isArray(items)) {
console.error("Input must be a JSON array or object with array property");
process.exit(1);
}
console.log(`Processing ${items.length} items...`);
// Count by field
const counts = {};
for (const item of items) {
const value = getField(item, groupByField);
const key = value != null ? String(value) : "(empty)";
counts[key] = (counts[key] || 0) + 1;
}
// Get top N
const topItems = Object.entries(counts)
.sort((a, b) => b[1] - a[1])
.slice(0, topN)
.map(([value, count]) => ({ value, count }));
// Find date range
let dateRange = null;
const dateFields = findDateFields(items);
if (dateFields.length > 0) {
const dates = [];
for (const item of items) {
for (const field of dateFields) {
const date = parseDate(getField(item, field));
if (date) dates.push(date);
}
}
if (dates.length > 0) {
dates.sort((a, b) => a - b);
dateRange = {
oldest: dates[0].toISOString(),
newest: dates[dates.length - 1].toISOString(),
dateField: dateFields[0],
};
}
}
const summary = {
total: items.length,
groupByField,
topN: topItems,
dateRange,
};
// Ensure output directory exists
const dir = path.dirname(outputPath);
if (dir && dir !== ".") {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(outputPath, JSON.stringify(summary, null, 2));
console.log(`\nā Summarized ${items.length} items`);
console.log(` Grouped by: ${groupByField}`);
console.log(` Top ${topN}:`);
for (const item of topItems) {
console.log(` - ${item.value}: ${item.count}`);
}
if (dateRange) {
console.log(
` Date range: ${dateRange.oldest.split("T")[0]} to ${dateRange.newest.split("T")[0]}`
);
}
console.log(` Written to: ${outputPath}`);
console.log(JSON.stringify({ success: true, outputPath, ...summary }));
} catch (error) {
console.error("Error:", error.message);
process.exit(1);
}