code icon Code

Fetch Gmail Messages

Fetch emails using batch API for speed. 100 messages per batch request instead of 100 sequential calls.

Source Code

import fs from "fs";
import path from "path";

const [query = "", maxResults = "50", outputPath = "session/emails.json"] =
  process.argv.slice(2);
const maxResultsNum = Math.min(parseInt(maxResults) || 50, 500);

console.log(
  `Fetching up to ${maxResultsNum} messages${
    query ? ` matching: ${query}` : ""
  }`
);

/**
 * Fetch message details in batches using Gmail's batch API.
 * Up to 100 requests per batch = dramatically faster than sequential.
 */
async function batchFetchMessages(messageIds, authHeader) {
  const BATCH_SIZE = 100;
  const results = [];
  const batches = [];

  // Split into batches of 100
  for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
    batches.push(messageIds.slice(i, i + BATCH_SIZE));
  }

  console.log(
    `Fetching ${messageIds.length} messages in ${batches.length} batch request(s)...`
  );

  for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
    const batch = batches[batchIdx];
    const boundary = `batch_gmail_${Date.now()}_${batchIdx}`;

    // Build multipart batch request body
    let body = "";
    for (let i = 0; i < batch.length; i++) {
      const msgId = batch[i];
      body += `--${boundary}\r\n`;
      body += `Content-Type: application/http\r\n`;
      body += `Content-ID: <msg${i}>\r\n\r\n`;
      body += `GET /gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc\r\n\r\n`;
    }
    body += `--${boundary}--`;

    const batchRes = await fetch("https://www.googleapis.com/batch/gmail/v1", {
      method: "POST",
      headers: {
        Authorization: authHeader,
        "Content-Type": `multipart/mixed; boundary=${boundary}`,
      },
      body,
    });

    if (!batchRes.ok) {
      const errorText = await batchRes.text();
      console.error(`Batch ${batchIdx + 1} failed: ${batchRes.status}`);
      console.error(errorText);
      continue;
    }

    // Parse multipart response
    const responseText = await batchRes.text();
    const responseBoundary = batchRes.headers
      .get("content-type")
      ?.match(/boundary=(.+)/)?.[1];

    if (!responseBoundary) {
      console.error(`Batch ${batchIdx + 1}: Could not parse response boundary`);
      continue;
    }

    // Split response into parts and extract JSON from each
    const parts = responseText.split(`--${responseBoundary}`);
    for (const part of parts) {
      if (part.trim() === "" || part.trim() === "--") continue;

      // Find the JSON body in each part (after the blank line following headers)
      const jsonMatch = part.match(/\r\n\r\n.*?\r\n\r\n(.+)/s);
      if (!jsonMatch) continue;

      try {
        const detail = JSON.parse(jsonMatch[1].trim());
        if (detail.id) {
          results.push(detail);
        }
      } catch {
        // Skip malformed responses
      }
    }

    console.log(
      `  Batch ${batchIdx + 1}/${batches.length}: got ${batch.length} messages`
    );
  }

  return results;
}

try {
  const messages = [];
  let pageToken = null;
  let allMessageIds = [];

  // Phase 1: List all message IDs (fast, just IDs)
  console.log("Phase 1: Listing message IDs...");
  while (allMessageIds.length < maxResultsNum) {
    const remaining = maxResultsNum - allMessageIds.length;
    const pageSize = Math.min(remaining, 100);

    const url = new URL(
      "https://gmail.googleapis.com/gmail/v1/users/me/messages"
    );
    url.searchParams.set("maxResults", pageSize.toString());
    if (query) url.searchParams.set("q", query);
    if (pageToken) url.searchParams.set("pageToken", pageToken);

    const listRes = await fetch(url.toString(), {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
    });

    if (!listRes.ok) {
      const errorText = await listRes.text();
      console.error(`Gmail API error: ${listRes.status}`);
      console.error(errorText);
      throw new Error(`Gmail API failed: ${listRes.status}`);
    }

    const listData = await listRes.json();

    if (!listData.messages || listData.messages.length === 0) {
      console.log("No more messages found");
      break;
    }

    const newIds = listData.messages.map((m) => m.id).slice(0, remaining);
    allMessageIds.push(...newIds);
    console.log(`  Found ${allMessageIds.length} messages so far...`);

    pageToken = listData.nextPageToken;
    if (!pageToken) break;
  }

  if (allMessageIds.length === 0) {
    console.log("No messages found matching query");
    console.log(JSON.stringify({ count: 0, messages: [] }));
    process.exit(0);
  }

  // Phase 2: Batch fetch all message details
  console.log(
    `\nPhase 2: Fetching details for ${allMessageIds.length} messages...`
  );
  const details = await batchFetchMessages(
    allMessageIds,
    "Bearer PLACEHOLDER_TOKEN"
  );

  // Transform to our format
  for (const detail of details) {
    const getHeader = (name) => {
      const header = detail.payload?.headers?.find(
        (h) => h.name.toLowerCase() === name.toLowerCase()
      );
      return header ? header.value : "";
    };

    messages.push({
      id: detail.id,
      threadId: detail.threadId,
      subject: getHeader("Subject"),
      from: getHeader("From"),
      to: getHeader("To"),
      cc: getHeader("Cc"),
      date: getHeader("Date"),
      snippet: detail.snippet,
      labelIds: detail.labelIds || [],
      sizeEstimate: detail.sizeEstimate,
    });
  }

  // Compute summary stats
  const senderCounts = {};
  const dates = [];
  for (const msg of messages) {
    const sender = extractEmail(msg.from);
    senderCounts[sender] = (senderCounts[sender] || 0) + 1;
    if (msg.date) {
      const parsed = new Date(msg.date);
      if (!isNaN(parsed.getTime())) dates.push(parsed);
    }
  }

  const topSenders = Object.entries(senderCounts)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5)
    .map(([email, count]) => ({ email, count }));

  dates.sort((a, b) => a - b);
  const dateRange =
    dates.length > 0
      ? {
          oldest: dates[0].toISOString(),
          newest: dates[dates.length - 1].toISOString(),
        }
      : null;

  // Ensure output directory exists
  const dir = path.dirname(outputPath);
  if (dir && dir !== ".") {
    fs.mkdirSync(dir, { recursive: true });
  }

  // Write full results to file
  const output = {
    query: query || null,
    fetchedAt: new Date().toISOString(),
    count: messages.length,
    dateRange,
    messages,
  };

  fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));

  // Log summary for agent context
  console.log(`\n✓ Fetched ${messages.length} messages`);
  console.log(`  Written to: ${outputPath}`);
  if (dateRange) {
    console.log(
      `  Date range: ${dateRange.oldest.split("T")[0]} to ${
        dateRange.newest.split("T")[0]
      }`
    );
  }
  console.log(`  Top senders:`);
  for (const s of topSenders) {
    console.log(`    - ${s.email}: ${s.count} messages`);
  }

  // Return minimal summary object
  console.log(
    JSON.stringify({
      success: true,
      outputPath,
      count: messages.length,
      dateRange,
      topSenders,
    })
  );
} catch (error) {
  console.error("Error fetching messages:", error.message);
  throw error;
}

function extractEmail(fromHeader) {
  if (!fromHeader) return "unknown";
  const match = fromHeader.match(/<([^>]+)>/);
  return match ? match[1].toLowerCase() : fromHeader.toLowerCase().trim();
}
                  import fs from "fs";
import path from "path";

const [query = "", maxResults = "50", outputPath = "session/emails.json"] =
  process.argv.slice(2);
const maxResultsNum = Math.min(parseInt(maxResults) || 50, 500);

console.log(
  `Fetching up to ${maxResultsNum} messages${
    query ? ` matching: ${query}` : ""
  }`
);

/**
 * Fetch message details in batches using Gmail's batch API.
 * Up to 100 requests per batch = dramatically faster than sequential.
 */
async function batchFetchMessages(messageIds, authHeader) {
  const BATCH_SIZE = 100;
  const results = [];
  const batches = [];

  // Split into batches of 100
  for (let i = 0; i < messageIds.length; i += BATCH_SIZE) {
    batches.push(messageIds.slice(i, i + BATCH_SIZE));
  }

  console.log(
    `Fetching ${messageIds.length} messages in ${batches.length} batch request(s)...`
  );

  for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) {
    const batch = batches[batchIdx];
    const boundary = `batch_gmail_${Date.now()}_${batchIdx}`;

    // Build multipart batch request body
    let body = "";
    for (let i = 0; i < batch.length; i++) {
      const msgId = batch[i];
      body += `--${boundary}\r\n`;
      body += `Content-Type: application/http\r\n`;
      body += `Content-ID: \r\n\r\n`;
      body += `GET /gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc\r\n\r\n`;
    }
    body += `--${boundary}--`;

    const batchRes = await fetch("https://www.googleapis.com/batch/gmail/v1", {
      method: "POST",
      headers: {
        Authorization: authHeader,
        "Content-Type": `multipart/mixed; boundary=${boundary}`,
      },
      body,
    });

    if (!batchRes.ok) {
      const errorText = await batchRes.text();
      console.error(`Batch ${batchIdx + 1} failed: ${batchRes.status}`);
      console.error(errorText);
      continue;
    }

    // Parse multipart response
    const responseText = await batchRes.text();
    const responseBoundary = batchRes.headers
      .get("content-type")
      ?.match(/boundary=(.+)/)?.[1];

    if (!responseBoundary) {
      console.error(`Batch ${batchIdx + 1}: Could not parse response boundary`);
      continue;
    }

    // Split response into parts and extract JSON from each
    const parts = responseText.split(`--${responseBoundary}`);
    for (const part of parts) {
      if (part.trim() === "" || part.trim() === "--") continue;

      // Find the JSON body in each part (after the blank line following headers)
      const jsonMatch = part.match(/\r\n\r\n.*?\r\n\r\n(.+)/s);
      if (!jsonMatch) continue;

      try {
        const detail = JSON.parse(jsonMatch[1].trim());
        if (detail.id) {
          results.push(detail);
        }
      } catch {
        // Skip malformed responses
      }
    }

    console.log(
      `  Batch ${batchIdx + 1}/${batches.length}: got ${batch.length} messages`
    );
  }

  return results;
}

try {
  const messages = [];
  let pageToken = null;
  let allMessageIds = [];

  // Phase 1: List all message IDs (fast, just IDs)
  console.log("Phase 1: Listing message IDs...");
  while (allMessageIds.length < maxResultsNum) {
    const remaining = maxResultsNum - allMessageIds.length;
    const pageSize = Math.min(remaining, 100);

    const url = new URL(
      "https://gmail.googleapis.com/gmail/v1/users/me/messages"
    );
    url.searchParams.set("maxResults", pageSize.toString());
    if (query) url.searchParams.set("q", query);
    if (pageToken) url.searchParams.set("pageToken", pageToken);

    const listRes = await fetch(url.toString(), {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
    });

    if (!listRes.ok) {
      const errorText = await listRes.text();
      console.error(`Gmail API error: ${listRes.status}`);
      console.error(errorText);
      throw new Error(`Gmail API failed: ${listRes.status}`);
    }

    const listData = await listRes.json();

    if (!listData.messages || listData.messages.length === 0) {
      console.log("No more messages found");
      break;
    }

    const newIds = listData.messages.map((m) => m.id).slice(0, remaining);
    allMessageIds.push(...newIds);
    console.log(`  Found ${allMessageIds.length} messages so far...`);

    pageToken = listData.nextPageToken;
    if (!pageToken) break;
  }

  if (allMessageIds.length === 0) {
    console.log("No messages found matching query");
    console.log(JSON.stringify({ count: 0, messages: [] }));
    process.exit(0);
  }

  // Phase 2: Batch fetch all message details
  console.log(
    `\nPhase 2: Fetching details for ${allMessageIds.length} messages...`
  );
  const details = await batchFetchMessages(
    allMessageIds,
    "Bearer PLACEHOLDER_TOKEN"
  );

  // Transform to our format
  for (const detail of details) {
    const getHeader = (name) => {
      const header = detail.payload?.headers?.find(
        (h) => h.name.toLowerCase() === name.toLowerCase()
      );
      return header ? header.value : "";
    };

    messages.push({
      id: detail.id,
      threadId: detail.threadId,
      subject: getHeader("Subject"),
      from: getHeader("From"),
      to: getHeader("To"),
      cc: getHeader("Cc"),
      date: getHeader("Date"),
      snippet: detail.snippet,
      labelIds: detail.labelIds || [],
      sizeEstimate: detail.sizeEstimate,
    });
  }

  // Compute summary stats
  const senderCounts = {};
  const dates = [];
  for (const msg of messages) {
    const sender = extractEmail(msg.from);
    senderCounts[sender] = (senderCounts[sender] || 0) + 1;
    if (msg.date) {
      const parsed = new Date(msg.date);
      if (!isNaN(parsed.getTime())) dates.push(parsed);
    }
  }

  const topSenders = Object.entries(senderCounts)
    .sort((a, b) => b[1] - a[1])
    .slice(0, 5)
    .map(([email, count]) => ({ email, count }));

  dates.sort((a, b) => a - b);
  const dateRange =
    dates.length > 0
      ? {
          oldest: dates[0].toISOString(),
          newest: dates[dates.length - 1].toISOString(),
        }
      : null;

  // Ensure output directory exists
  const dir = path.dirname(outputPath);
  if (dir && dir !== ".") {
    fs.mkdirSync(dir, { recursive: true });
  }

  // Write full results to file
  const output = {
    query: query || null,
    fetchedAt: new Date().toISOString(),
    count: messages.length,
    dateRange,
    messages,
  };

  fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));

  // Log summary for agent context
  console.log(`\n✓ Fetched ${messages.length} messages`);
  console.log(`  Written to: ${outputPath}`);
  if (dateRange) {
    console.log(
      `  Date range: ${dateRange.oldest.split("T")[0]} to ${
        dateRange.newest.split("T")[0]
      }`
    );
  }
  console.log(`  Top senders:`);
  for (const s of topSenders) {
    console.log(`    - ${s.email}: ${s.count} messages`);
  }

  // Return minimal summary object
  console.log(
    JSON.stringify({
      success: true,
      outputPath,
      count: messages.length,
      dateRange,
      topSenders,
    })
  );
} catch (error) {
  console.error("Error fetching messages:", error.message);
  throw error;
}

function extractEmail(fromHeader) {
  if (!fromHeader) return "unknown";
  const match = fromHeader.match(/<([^>]+)>/);
  return match ? match[1].toLowerCase() : fromHeader.toLowerCase().trim();
}