Group by Time Bucket
Group items by time period (hour, day, week, month, year)
Source Code
import fs from "fs";
import path from "path";
const [inputPath, dateField, bucket, outputPath] = process.argv.slice(2);
if (!inputPath || !dateField || !bucket || !outputPath) {
console.error("Usage: inputPath dateField bucket outputPath");
process.exit(1);
}
const validBuckets = ["hour", "day", "week", "month", "year"];
if (!validBuckets.includes(bucket)) {
console.error(`Invalid bucket: ${bucket}. Use: ${validBuckets.join(", ")}`);
process.exit(1);
}
/**
* Get nested field value using dot notation
*/
function getField(obj, fieldPath) {
const parts = fieldPath.split(".");
let value = obj;
for (const part of parts) {
if (value == null) return undefined;
value = value[part];
}
return value;
}
/**
* Get bucket key for a date
*/
function getBucketKey(date, bucket) {
const d = new Date(date);
if (isNaN(d.getTime())) return null;
const year = d.getFullYear();
const month = String(d.getMonth() + 1).padStart(2, "0");
const day = String(d.getDate()).padStart(2, "0");
const hour = String(d.getHours()).padStart(2, "0");
switch (bucket) {
case "hour":
return `${year}-${month}-${day}T${hour}`;
case "day":
return `${year}-${month}-${day}`;
case "week":
// Get Monday of the week
const dayOfWeek = d.getDay();
const diff = d.getDate() - dayOfWeek + (dayOfWeek === 0 ? -6 : 1);
const monday = new Date(d);
monday.setDate(diff);
const wYear = monday.getFullYear();
const wMonth = String(monday.getMonth() + 1).padStart(2, "0");
const wDay = String(monday.getDate()).padStart(2, "0");
return `${wYear}-${wMonth}-${wDay}`;
case "month":
return `${year}-${month}`;
case "year":
return `${year}`;
default:
return null;
}
}
try {
console.log(`Reading ${inputPath}...`);
const raw = fs.readFileSync(inputPath, "utf-8");
const data = JSON.parse(raw);
const items = Array.isArray(data)
? data
: data.items || data.results || data.messages || [];
if (!Array.isArray(items)) {
console.error("Input must be a JSON array or object with array property");
process.exit(1);
}
console.log(`Bucketing ${items.length} items by ${bucket} using field: ${dateField}...`);
const buckets = {};
let skipped = 0;
for (const item of items) {
const dateValue = getField(item, dateField);
const key = getBucketKey(dateValue, bucket);
if (!key) {
skipped++;
continue;
}
if (!buckets[key]) {
buckets[key] = [];
}
buckets[key].push(item);
}
const bucketCount = Object.keys(buckets).length;
// Sort buckets by key
const sortedBuckets = {};
for (const key of Object.keys(buckets).sort()) {
sortedBuckets[key] = buckets[key];
}
// Ensure output directory exists
const dir = path.dirname(outputPath);
if (dir && dir !== ".") {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(outputPath, JSON.stringify(sortedBuckets, null, 2));
console.log(`\nā Bucketed ${items.length - skipped} items into ${bucketCount} ${bucket}s`);
console.log(` Date field: ${dateField}`);
console.log(` Bucket type: ${bucket}`);
if (skipped > 0) {
console.log(` Skipped (invalid date): ${skipped}`);
}
console.log(` Written to: ${outputPath}`);
// Show bucket summary
const bucketSummary = Object.entries(sortedBuckets)
.map(([key, items]) => ({ key, count: items.length }))
.slice(0, 5);
console.log(` Sample buckets:`);
for (const { key, count } of bucketSummary) {
console.log(` - ${key}: ${count} items`);
}
if (bucketCount > 5) {
console.log(` ... and ${bucketCount - 5} more buckets`);
}
console.log(
JSON.stringify({
success: true,
outputPath,
totalItems: items.length,
bucketedItems: items.length - skipped,
skipped,
bucketCount,
bucket,
dateField,
})
);
} catch (error) {
console.error("Error:", error.message);
process.exit(1);
}