Fetch Gmail Messages
Fetch emails using batch API for speed. 100 messages per batch request instead of 100 sequential calls.
Source Code
import fs from "fs";
import path from "path";
const [query = "", maxResults = "50", outputPath = "session/emails.json"] =
process.argv.slice(2);
const maxResultsNum = Math.min(parseInt(maxResults) || 50, 500);
console.log(
`Fetching up to ${maxResultsNum} messages${
query ? ` matching: ${query}` : ""
}`
);
/**
* Fetch message details in batches using Gmail's batch API.
* Up to 100 requests per batch = dramatically faster than sequential.
*/
async function batchFetchMessages(messageIds, authHeader) {
const BATCH_SIZE = 100;
const results = [];
const batches = [];
// Split into batches of 100
for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
batches.push(messageIds.slice(i, i + BATCH_SIZE));
}
console.log(
`Fetching ${messageIds.length} messages in ${batches.length} batch request(s)...`
);
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
const batch = batches[batchIdx];
const boundary = `batch_gmail_${Date.now()}_${batchIdx}`;
// Build multipart batch request body
let body = "";
for (let i = 0; i < batch.length; i++) {
const msgId = batch[i];
body += `--${boundary}\r\n`;
body += `Content-Type: application/http\r\n`;
body += `Content-ID: <msg${i}>\r\n\r\n`;
body += `GET /gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc\r\n\r\n`;
}
body += `--${boundary}--`;
const batchRes = await fetch("https://www.googleapis.com/batch/gmail/v1", {
method: "POST",
headers: {
Authorization: authHeader,
"Content-Type": `multipart/mixed; boundary=${boundary}`,
},
body,
});
if (!batchRes.ok) {
const errorText = await batchRes.text();
console.error(`Batch ${batchIdx + 1} failed: ${batchRes.status}`);
console.error(errorText);
continue;
}
// Parse multipart response
const responseText = await batchRes.text();
const responseBoundary = batchRes.headers
.get("content-type")
?.match(/boundary=(.+)/)?.[1];
if (!responseBoundary) {
console.error(`Batch ${batchIdx + 1}: Could not parse response boundary`);
continue;
}
// Split response into parts and extract JSON from each
const parts = responseText.split(`--${responseBoundary}`);
for (const part of parts) {
if (part.trim() === "" || part.trim() === "--") continue;
// Find the JSON body in each part (after the blank line following headers)
const jsonMatch = part.match(/\r\n\r\n.*?\r\n\r\n(.+)/s);
if (!jsonMatch) continue;
try {
const detail = JSON.parse(jsonMatch[1].trim());
if (detail.id) {
results.push(detail);
}
} catch {
// Skip malformed responses
}
}
console.log(
` Batch ${batchIdx + 1}/${batches.length}: got ${batch.length} messages`
);
}
return results;
}
try {
const messages = [];
let pageToken = null;
let allMessageIds = [];
// Phase 1: List all message IDs (fast, just IDs)
console.log("Phase 1: Listing message IDs...");
while (allMessageIds.length < maxResultsNum) {
const remaining = maxResultsNum - allMessageIds.length;
const pageSize = Math.min(remaining, 100);
const url = new URL(
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
);
url.searchParams.set("maxResults", pageSize.toString());
if (query) url.searchParams.set("q", query);
if (pageToken) url.searchParams.set("pageToken", pageToken);
const listRes = await fetch(url.toString(), {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!listRes.ok) {
const errorText = await listRes.text();
console.error(`Gmail API error: ${listRes.status}`);
console.error(errorText);
throw new Error(`Gmail API failed: ${listRes.status}`);
}
const listData = await listRes.json();
if (!listData.messages || listData.messages.length === 0) {
console.log("No more messages found");
break;
}
const newIds = listData.messages.map((m) => m.id).slice(0, remaining);
allMessageIds.push(...newIds);
console.log(` Found ${allMessageIds.length} messages so far...`);
pageToken = listData.nextPageToken;
if (!pageToken) break;
}
if (allMessageIds.length === 0) {
console.log("No messages found matching query");
console.log(JSON.stringify({ count: 0, messages: [] }));
process.exit(0);
}
// Phase 2: Batch fetch all message details
console.log(
`\nPhase 2: Fetching details for ${allMessageIds.length} messages...`
);
const details = await batchFetchMessages(
allMessageIds,
"Bearer PLACEHOLDER_TOKEN"
);
// Transform to our format
for (const detail of details) {
const getHeader = (name) => {
const header = detail.payload?.headers?.find(
(h) => h.name.toLowerCase() === name.toLowerCase()
);
return header ? header.value : "";
};
messages.push({
id: detail.id,
threadId: detail.threadId,
subject: getHeader("Subject"),
from: getHeader("From"),
to: getHeader("To"),
cc: getHeader("Cc"),
date: getHeader("Date"),
snippet: detail.snippet,
labelIds: detail.labelIds || [],
sizeEstimate: detail.sizeEstimate,
});
}
// Compute summary stats
const senderCounts = {};
const dates = [];
for (const msg of messages) {
const sender = extractEmail(msg.from);
senderCounts[sender] = (senderCounts[sender] || 0) + 1;
if (msg.date) {
const parsed = new Date(msg.date);
if (!isNaN(parsed.getTime())) dates.push(parsed);
}
}
const topSenders = Object.entries(senderCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([email, count]) => ({ email, count }));
dates.sort((a, b) => a - b);
const dateRange =
dates.length > 0
? {
oldest: dates[0].toISOString(),
newest: dates[dates.length - 1].toISOString(),
}
: null;
// Ensure output directory exists
const dir = path.dirname(outputPath);
if (dir && dir !== ".") {
fs.mkdirSync(dir, { recursive: true });
}
// Write full results to file
const output = {
query: query || null,
fetchedAt: new Date().toISOString(),
count: messages.length,
dateRange,
messages,
};
fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));
// Log summary for agent context
console.log(`\n✓ Fetched ${messages.length} messages`);
console.log(` Written to: ${outputPath}`);
if (dateRange) {
console.log(
` Date range: ${dateRange.oldest.split("T")[0]} to ${
dateRange.newest.split("T")[0]
}`
);
}
console.log(` Top senders:`);
for (const s of topSenders) {
console.log(` - ${s.email}: ${s.count} messages`);
}
// Return minimal summary object
console.log(
JSON.stringify({
success: true,
outputPath,
count: messages.length,
dateRange,
topSenders,
})
);
} catch (error) {
console.error("Error fetching messages:", error.message);
throw error;
}
function extractEmail(fromHeader) {
if (!fromHeader) return "unknown";
const match = fromHeader.match(/<([^>]+)>/);
return match ? match[1].toLowerCase() : fromHeader.toLowerCase().trim();
} import fs from "fs";
import path from "path";
const [query = "", maxResults = "50", outputPath = "session/emails.json"] =
process.argv.slice(2);
const maxResultsNum = Math.min(parseInt(maxResults) || 50, 500);
console.log(
`Fetching up to ${maxResultsNum} messages${
query ? ` matching: ${query}` : ""
}`
);
/**
* Fetch message details in batches using Gmail's batch API.
* Up to 100 requests per batch = dramatically faster than sequential.
*/
async function batchFetchMessages(messageIds, authHeader) {
const BATCH_SIZE = 100;
const results = [];
const batches = [];
// Split into batches of 100
for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
batches.push(messageIds.slice(i, i + BATCH_SIZE));
}
console.log(
`Fetching ${messageIds.length} messages in ${batches.length} batch request(s)...`
);
for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
const batch = batches[batchIdx];
const boundary = `batch_gmail_${Date.now()}_${batchIdx}`;
// Build multipart batch request body
let body = "";
for (let i = 0; i < batch.length; i++) {
const msgId = batch[i];
body += `--${boundary}\r\n`;
body += `Content-Type: application/http\r\n`;
body += `Content-ID: \r\n\r\n`;
body += `GET /gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc\r\n\r\n`;
}
body += `--${boundary}--`;
const batchRes = await fetch("https://www.googleapis.com/batch/gmail/v1", {
method: "POST",
headers: {
Authorization: authHeader,
"Content-Type": `multipart/mixed; boundary=${boundary}`,
},
body,
});
if (!batchRes.ok) {
const errorText = await batchRes.text();
console.error(`Batch ${batchIdx + 1} failed: ${batchRes.status}`);
console.error(errorText);
continue;
}
// Parse multipart response
const responseText = await batchRes.text();
const responseBoundary = batchRes.headers
.get("content-type")
?.match(/boundary=(.+)/)?.[1];
if (!responseBoundary) {
console.error(`Batch ${batchIdx + 1}: Could not parse response boundary`);
continue;
}
// Split response into parts and extract JSON from each
const parts = responseText.split(`--${responseBoundary}`);
for (const part of parts) {
if (part.trim() === "" || part.trim() === "--") continue;
// Find the JSON body in each part (after the blank line following headers)
const jsonMatch = part.match(/\r\n\r\n.*?\r\n\r\n(.+)/s);
if (!jsonMatch) continue;
try {
const detail = JSON.parse(jsonMatch[1].trim());
if (detail.id) {
results.push(detail);
}
} catch {
// Skip malformed responses
}
}
console.log(
` Batch ${batchIdx + 1}/${batches.length}: got ${batch.length} messages`
);
}
return results;
}
try {
const messages = [];
let pageToken = null;
let allMessageIds = [];
// Phase 1: List all message IDs (fast, just IDs)
console.log("Phase 1: Listing message IDs...");
while (allMessageIds.length < maxResultsNum) {
const remaining = maxResultsNum - allMessageIds.length;
const pageSize = Math.min(remaining, 100);
const url = new URL(
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
);
url.searchParams.set("maxResults", pageSize.toString());
if (query) url.searchParams.set("q", query);
if (pageToken) url.searchParams.set("pageToken", pageToken);
const listRes = await fetch(url.toString(), {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!listRes.ok) {
const errorText = await listRes.text();
console.error(`Gmail API error: ${listRes.status}`);
console.error(errorText);
throw new Error(`Gmail API failed: ${listRes.status}`);
}
const listData = await listRes.json();
if (!listData.messages || listData.messages.length === 0) {
console.log("No more messages found");
break;
}
const newIds = listData.messages.map((m) => m.id).slice(0, remaining);
allMessageIds.push(...newIds);
console.log(` Found ${allMessageIds.length} messages so far...`);
pageToken = listData.nextPageToken;
if (!pageToken) break;
}
if (allMessageIds.length === 0) {
console.log("No messages found matching query");
console.log(JSON.stringify({ count: 0, messages: [] }));
process.exit(0);
}
// Phase 2: Batch fetch all message details
console.log(
`\nPhase 2: Fetching details for ${allMessageIds.length} messages...`
);
const details = await batchFetchMessages(
allMessageIds,
"Bearer PLACEHOLDER_TOKEN"
);
// Transform to our format
for (const detail of details) {
const getHeader = (name) => {
const header = detail.payload?.headers?.find(
(h) => h.name.toLowerCase() === name.toLowerCase()
);
return header ? header.value : "";
};
messages.push({
id: detail.id,
threadId: detail.threadId,
subject: getHeader("Subject"),
from: getHeader("From"),
to: getHeader("To"),
cc: getHeader("Cc"),
date: getHeader("Date"),
snippet: detail.snippet,
labelIds: detail.labelIds || [],
sizeEstimate: detail.sizeEstimate,
});
}
// Compute summary stats
const senderCounts = {};
const dates = [];
for (const msg of messages) {
const sender = extractEmail(msg.from);
senderCounts[sender] = (senderCounts[sender] || 0) + 1;
if (msg.date) {
const parsed = new Date(msg.date);
if (!isNaN(parsed.getTime())) dates.push(parsed);
}
}
const topSenders = Object.entries(senderCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([email, count]) => ({ email, count }));
dates.sort((a, b) => a - b);
const dateRange =
dates.length > 0
? {
oldest: dates[0].toISOString(),
newest: dates[dates.length - 1].toISOString(),
}
: null;
// Ensure output directory exists
const dir = path.dirname(outputPath);
if (dir && dir !== ".") {
fs.mkdirSync(dir, { recursive: true });
}
// Write full results to file
const output = {
query: query || null,
fetchedAt: new Date().toISOString(),
count: messages.length,
dateRange,
messages,
};
fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));
// Log summary for agent context
console.log(`\n✓ Fetched ${messages.length} messages`);
console.log(` Written to: ${outputPath}`);
if (dateRange) {
console.log(
` Date range: ${dateRange.oldest.split("T")[0]} to ${
dateRange.newest.split("T")[0]
}`
);
}
console.log(` Top senders:`);
for (const s of topSenders) {
console.log(` - ${s.email}: ${s.count} messages`);
}
// Return minimal summary object
console.log(
JSON.stringify({
success: true,
outputPath,
count: messages.length,
dateRange,
topSenders,
})
);
} catch (error) {
console.error("Error fetching messages:", error.message);
throw error;
}
function extractEmail(fromHeader) {
if (!fromHeader) return "unknown";
const match = fromHeader.match(/<([^>]+)>/);
return match ? match[1].toLowerCase() : fromHeader.toLowerCase().trim();
}