code icon Code

Connect to Gmail

Collect inbox emails (90 days), sent emails (6 months), and run targeted discovery searches for profile analysis

Source Code

import fs from "fs";
import path from "path";

const [
  profileOutputPath = "session/gmail-profile-data.json",
  writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);

const INBOX_MAX_MESSAGES = 75;
const SENT_MAX_MESSAGES = 50;
const DISCOVERY_MAX_PER_QUERY = 15;
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const formatDate = (d) =>
  d.toLocaleDateString("en-US", { month: "short", day: "numeric" });

/**
 * Format date as relative time ago
 */
function formatTimeAgo(date) {
  if (!date || isNaN(date.getTime())) return "unknown";
  const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
  if (seconds < 60) return "just now";
  const minutes = Math.floor(seconds / 60);
  if (minutes < 60) return `${minutes}m ago`;
  const hours = Math.floor(minutes / 60);
  if (hours < 24) return `${hours}h ago`;
  const days = Math.floor(hours / 24);
  if (days < 7) return `${days}d ago`;
  if (days < 30) return `${Math.floor(days / 7)}w ago`;
  if (days < 365) return `${Math.floor(days / 30)}mo ago`;
  const years = Math.floor(days / 365);
  return `${years}y ago`;
}

// Targeted discovery searches to learn specific facts about the user
const DISCOVERY_QUERIES = [
  // Personal - search sent mail where USER writes about themselves (high confidence)
  { category: "children", query: 'in:sent ("my son" OR "my daughter" OR "my kids" OR "the kids")' },
  { category: "partner", query: 'in:sent ("my husband" OR "my wife" OR "my partner" OR "my boyfriend" OR "my girlfriend")' },
  { category: "pets", query: 'in:sent ("my dog" OR "my cat" OR "our dog" OR "our cat" OR "our pet")' },
  { category: "phone_numbers", query: 'in:sent ("my number is" OR "my cell is" OR "call me at" OR "text me at" OR "reach me at")' },
  // Location - shipping/delivery notifications
  { category: "location", query: '("shipped to" OR "delivered to" OR "delivery address:") ("your order" OR "your package")' },
  { category: "birthday", query: 'to:me subject:"happy birthday"' },
  // Contact discovery - verification emails (high confidence)
  { category: "whatsapp", query: 'from:whatsapp ("verification code" OR "your code" OR "new device")' },
  { category: "signal_app", query: "from:signal" },
  { category: "telegram", query: "from:telegram" },
  // Social media
  { category: "instagram", query: "from:instagram" },
  { category: "linkedin", query: "from:linkedin" },
  { category: "twitter", query: "from:twitter OR from:x.com" },
  { category: "github", query: "from:github" },
  { category: "facebook", query: "from:facebookmail" },
  { category: "tiktok", query: "from:tiktok" },
  { category: "youtube", query: "from:youtube" },
  { category: "reddit", query: "from:reddit" },
  // Professional
  { category: "work_calendar", query: "from:calendar-notification subject:invitation" },
  { category: "meetings", query: '"zoom.us" OR "meet.google.com" OR "teams.microsoft.com"' },
  { category: "slack", query: "from:slack" },
  // Commerce
  { category: "amazon", query: 'from:amazon "your order"' },
  { category: "subscriptions", query: 'subject:("subscription confirmed" OR "you subscribed" OR "thanks for subscribing" OR "welcome to")' },
  { category: "food_delivery", query: "from:doordash OR from:ubereats OR from:grubhub OR from:postmates" },
  { category: "retail", query: "from:target OR from:walmart OR from:costco OR from:bestbuy" },
  // Housing & Auto
  { category: "homeowner", query: '(from:quickenloans OR from:rocket OR "mortgage statement" OR "property tax bill" OR "hoa dues")' },
  { category: "renter", query: '("rent payment" OR "lease agreement" OR from:apartments OR from:zillow) (rent OR lease)' },
  { category: "vehicle", query: '"auto insurance" OR "car payment" OR "vehicle registration" OR from:dmv' },
  // Financial
  { category: "banking", query: '(from:chase OR from:wellsfargo OR from:bankofamerica OR from:citi OR from:capitalone) subject:statement' },
  { category: "credit", query: 'from:(creditkarma OR experian OR equifax OR transunion) OR subject:"credit score"' },
  { category: "investments", query: "from:fidelity OR from:schwab OR from:vanguard OR from:robinhood" },
  // Life events - search sent for user announcements (high confidence)
  { category: "moving", query: 'in:sent ("new address" OR "we moved" OR "moving to" OR "our new place")' },
  { category: "job_change", query: 'in:sent ("excited to announce" OR "happy to share" OR "new role" OR "I joined" OR "starting at")' },
  { category: "wedding", query: 'subject:("save the date" OR "wedding invitation" OR "you are invited" OR "rsvp") OR (subject:congratulations (engaged OR wedding))' },
  // Life
  { category: "travel", query: 'subject:("booking confirmed" OR "reservation confirmed" OR "flight confirmation" OR "itinerary") (from:airline OR from:hotel OR from:airbnb OR from:expedia OR from:booking)' },
  { category: "health", query: 'subject:("appointment confirmed" OR "appointment reminder" OR "your visit") (from:doctor OR from:health OR from:medical OR from:pharmacy)' },
  { category: "education", query: 'from:edu OR "tuition" OR "student loan" OR "financial aid"' },
  // Entertainment & Media
  { category: "spotify", query: "from:spotify" },
  { category: "netflix", query: "from:netflix" },
  { category: "apple", query: "from:apple.com" },
  { category: "discord", query: "from:discord" },
  { category: "gaming", query: "from:steam OR from:playstation OR from:xbox OR from:epicgames" },
  // Learning & Reading
  { category: "online_courses", query: "from:udemy OR from:coursera OR from:skillshare OR from:masterclass" },
  { category: "substack", query: "from:substack" },
  // Lifestyle
  { category: "fitness", query: "from:peloton OR from:strava OR from:myfitnesspal OR from:equinox" },
  { category: "venmo", query: "from:venmo OR from:paypal" },
  // Professional tools
  { category: "work_tools", query: "from:figma OR from:notion OR from:airtable OR from:linear" },
  // Deep character signals
  { category: "donations", query: '("thank you for your donation" OR "donation receipt" OR "tax-deductible gift") (from:charity OR from:nonprofit OR from:donate OR from:giving)' },
  { category: "books", query: "from:kindle OR from:audible OR from:goodreads OR from:libby" },
  { category: "side_business", query: "from:stripe OR from:shopify OR from:squarespace OR from:etsy" },
  { category: "kids_activities", query: '"practice schedule" OR "game schedule" OR from:teamsnap OR from:sportsengine' },
  { category: "professional_orgs", query: 'subject:(membership OR "member since" OR "annual dues") (association OR society OR institute OR organization)' },
];

console.log("Collecting Gmail data (inbox: 90 days, sent: 6 months)...");
console.log(`  Inbox: up to ${INBOX_MAX_MESSAGES} messages`);
console.log(`  Sent: up to ${SENT_MAX_MESSAGES} messages`);

/**
 * Fetch message IDs matching a query
 */
async function fetchMessageIds(query, maxResults) {
  const ids = [];
  let pageToken = null;

  while (ids.length < maxResults) {
    const remaining = maxResults - ids.length;
    const pageSize = Math.min(remaining, 100);

    const url = new URL(
      "https://gmail.googleapis.com/gmail/v1/users/me/messages"
    );
    url.searchParams.set("maxResults", pageSize.toString());
    if (query) url.searchParams.set("q", query);
    if (pageToken) url.searchParams.set("pageToken", pageToken);

    const res = await fetch(url.toString(), {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
    });

    const text = await res.text();
    if (!res.ok) {
      throw new Error(`Gmail API failed: ${res.status} - ${text}`);
    }

    let data;
    try {
      data = JSON.parse(text);
    } catch (e) {
      throw new Error(`Gmail API returned invalid JSON: ${text.slice(0, 200)}`);
    }
    if (!data.messages || data.messages.length === 0) break;

    ids.push(...data.messages.map((m) => m.id).slice(0, remaining));
    pageToken = data.nextPageToken;
    if (!pageToken) break;
  }

  return ids;
}

/**
 * Fetch message details with parallel requests
 */
async function fetchMessages(messageIds, format = "metadata") {
  const CONCURRENCY = 25;
  const results = [];

  for (let i = 0; i < messageIds.length; i += CONCURRENCY) {
    const batch = messageIds.slice(i, i + CONCURRENCY);
    const fetched = await Promise.all(
      batch.map(async (id) => {
        let url = `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=${format}`;
        if (format === "metadata") {
          url +=
            "&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc";
        }
        const res = await fetch(url, {
          headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
        });
        if (!res.ok) return null;
        try {
          return await res.json();
        } catch {
          return null; // Invalid JSON, skip this message
        }
      })
    );
    results.push(...fetched.filter(Boolean));

    console.log(
      `  Fetched ${Math.min(i + CONCURRENCY, messageIds.length)}/${
        messageIds.length
      }...`
    );
  }

  return results;
}

/**
 * Extract email address from header
 */
function extractEmail(header) {
  if (!header) return "unknown";
  const match = header.match(/<([^>]+)>/);
  return match ? match[1].toLowerCase() : header.toLowerCase().trim();
}

/**
 * Extract name from header
 */
function extractName(header) {
  if (!header) return "Unknown";
  const match = header.match(/^([^<]+)</);
  if (match) return match[1].trim().replace(/"/g, "");
  return header.split("@")[0];
}

/**
 * Get header value from message
 */
function getHeader(msg, name) {
  const header = msg.payload?.headers?.find(
    (h) => h.name.toLowerCase() === name.toLowerCase()
  );
  return header ? header.value : "";
}

/**
 * Extract plain text body from Gmail message payload
 */
function extractBodyText(payload) {
  if (!payload) return "";

  if (payload.body?.data) {
    try {
      return Buffer.from(payload.body.data, "base64").toString("utf-8");
    } catch {
      return "";
    }
  }

  if (payload.parts) {
    for (const part of payload.parts) {
      if (part.mimeType === "text/plain" && part.body?.data) {
        try {
          return Buffer.from(part.body.data, "base64").toString("utf-8");
        } catch {
          continue;
        }
      }
      if (part.parts) {
        for (const nested of part.parts) {
          if (nested.mimeType === "text/plain" && nested.body?.data) {
            try {
              return Buffer.from(nested.body.data, "base64").toString("utf-8");
            } catch {
              continue;
            }
          }
        }
      }
    }
  }

  return "";
}

try {
  // Fetch inbox and sent message IDs in parallel
  console.log("\nPhase 1: Listing message IDs...");
  const [inboxIds, sentIds] = await Promise.all([
    fetchMessageIds("newer_than:90d -in:sent -category:promotions -category:updates -category:social -category:forums", INBOX_MAX_MESSAGES),
    fetchMessageIds("in:sent newer_than:180d", SENT_MAX_MESSAGES), // 6 months for writing samples
  ]);

  console.log(`  Inbox: ${inboxIds.length} messages`);
  console.log(`  Sent: ${sentIds.length} messages`);

  if (inboxIds.length === 0 && sentIds.length === 0) {
    console.error("\n✗ No messages found in the last 90 days.");
    console.log(
      JSON.stringify({
        success: false,
        error: "no_messages_found",
      })
    );
    process.exit(1);
  }

  // Fetch message details in parallel
  console.log("\nPhase 2: Fetching message details...");
  const [inboxDetails, sentDetails] = await Promise.all([
    fetchMessages(inboxIds, "metadata"),
    fetchMessages(sentIds, "full"), // Full for writing samples
  ]);

  console.log(`  Inbox: ${inboxDetails.length} fetched`);
  console.log(`  Sent: ${sentDetails.length} fetched`);

  // Phase 3: Discovery searches
  console.log("\nPhase 3: Running discovery searches...");
  const discoveryResults = await Promise.all(
    DISCOVERY_QUERIES.map(async ({ category, query }) => {
      try {
        const ids = await fetchMessageIds(query, DISCOVERY_MAX_PER_QUERY);
        if (ids.length === 0) {
          return { category, query, count: 0, emails: [] };
        }
        const messages = await fetchMessages(ids, "metadata");
        return {
          category,
          query,
          count: messages.length,
          emails: messages.map((m) => {
            const dateStr = getHeader(m, "Date");
            return {
              id: m.id,
              threadId: m.threadId,
              from: getHeader(m, "From"),
              to: getHeader(m, "To"),
              subject: getHeader(m, "Subject"),
              snippet: m.snippet,
              timeAgo: formatTimeAgo(new Date(dateStr)),
            };
          }),
        };
      } catch (err) {
        console.log(`  Warning: ${category} search failed: ${err.message}`);
        return { category, query, count: 0, emails: [], error: err.message };
      }
    })
  );

  const discoveryWithResults = discoveryResults.filter((r) => r.count > 0);
  console.log(
    `  Discovery: ${discoveryWithResults.length}/${DISCOVERY_QUERIES.length} categories found`
  );
  for (const r of discoveryWithResults) {
    console.log(`    - ${r.category}: ${r.count} emails`);
  }

  // Process inbox messages
  const inboxMessages = [];
  const contactsReceived = new Map(); // email -> { name, count }
  const contactsSent = new Map(); // email -> { name, count }
  const labelCounts = {};
  const dateVolume = {};

  for (const msg of inboxDetails) {
    const from = getHeader(msg, "From");
    const to = getHeader(msg, "To");
    const date = getHeader(msg, "Date");
    const subject = getHeader(msg, "Subject");

    const senderEmail = extractEmail(from);
    const senderName = extractName(from);

    // Track contacts who email the user
    if (!contactsReceived.has(senderEmail)) {
      contactsReceived.set(senderEmail, { name: senderName, count: 0 });
    }
    contactsReceived.get(senderEmail).count++;

    // Track labels
    for (const label of msg.labelIds || []) {
      labelCounts[label] = (labelCounts[label] || 0) + 1;
    }

    // Track volume by date
    if (date) {
      const d = new Date(date);
      if (!isNaN(d.getTime())) {
        const dateKey = d.toISOString().split("T")[0];
        dateVolume[dateKey] = (dateVolume[dateKey] || 0) + 1;
      }
    }

    inboxMessages.push({
      id: msg.id,
      threadId: msg.threadId,
      from: from,
      to: to,
      subject: subject,
      date: date,
      snippet: msg.snippet,
      labelIds: msg.labelIds || [],
    });
  }

  // Process sent messages for contacts and writing samples
  const writingSamples = [];

  for (const msg of sentDetails) {
    const to = getHeader(msg, "To");
    const cc = getHeader(msg, "Cc");
    const date = getHeader(msg, "Date");
    const subject = getHeader(msg, "Subject");

    // Track contacts the user emails
    const recipients = [to, cc]
      .filter(Boolean)
      .join(",")
      .split(",")
      .map((r) => r.trim())
      .filter(Boolean);

    for (const recipient of recipients) {
      const email = extractEmail(recipient);
      const name = extractName(recipient);
      if (!contactsSent.has(email)) {
        contactsSent.set(email, { name: name, count: 0 });
      }
      contactsSent.get(email).count++;
    }

    // Extract body for writing samples
    const bodyText = extractBodyText(msg.payload);
    if (bodyText && bodyText.trim().length >= 50) {
      writingSamples.push({
        text: bodyText,
        metadata: {
          id: msg.id,
          date: date || new Date().toISOString(),
          subject: subject || "",
        },
      });
    }
  }

  // Build contacts with bidirectional signals
  const allContacts = new Map();

  for (const [email, data] of contactsReceived) {
    if (!allContacts.has(email)) {
      allContacts.set(email, {
        email,
        name: data.name,
        receivedFrom: 0,
        sentTo: 0,
      });
    }
    allContacts.get(email).receivedFrom = data.count;
  }

  for (const [email, data] of contactsSent) {
    if (!allContacts.has(email)) {
      allContacts.set(email, {
        email,
        name: data.name,
        receivedFrom: 0,
        sentTo: 0,
      });
    }
    const contact = allContacts.get(email);
    contact.sentTo = data.count;
    // Prefer name from sent (more likely to be accurate)
    if (data.name && data.name !== "Unknown") {
      contact.name = data.name;
    }
  }

  // Score contacts by signal strength
  const scoreContact = (contact) => {
    let score = 0;
    if (contact.bidirectional) score += 50; // Strong: two-way communication
    if (contact.sentTo > 2) score += 20; // User actively emails them
    if (contact.receivedFrom > 5) score += 10; // Frequent sender
    // Penalize obvious noise addresses
    const email = contact.email.toLowerCase();
    if (email.includes("noreply")) score -= 100;
    if (email.includes("no-reply")) score -= 100;
    if (email.includes("notifications")) score -= 100;
    if (email.includes("mailer-daemon")) score -= 100;
    if (email.includes("postmaster")) score -= 100;
    if (email.includes("donotreply")) score -= 100;
    if (email.includes("automated")) score -= 100;
    if (email.includes("newsletter")) score -= 100;
    return score;
  };

  // Build contacts with scores, filter out noise
  const contacts = [...allContacts.values()]
    .map((c) => ({
      ...c,
      totalInteractions: c.receivedFrom + c.sentTo,
      bidirectional: c.receivedFrom > 0 && c.sentTo > 0,
    }))
    .map((c) => ({ ...c, signalScore: scoreContact(c) }))
    .filter((c) => c.signalScore >= 0) // Remove obvious noise
    .sort((a, b) => b.signalScore - a.signalScore || b.totalInteractions - a.totalInteractions);

  // Compute date range
  const dates = Object.keys(dateVolume).sort();
  const dateRange =
    dates.length > 0
      ? { oldest: dates[0], newest: dates[dates.length - 1] }
      : null;

  // Ensure output directories exist
  const profileDir = path.dirname(profileOutputPath);
  const samplesDir = path.dirname(writingSamplesOutputPath);
  if (profileDir && profileDir !== ".") fs.mkdirSync(profileDir, { recursive: true });
  if (samplesDir && samplesDir !== ".") fs.mkdirSync(samplesDir, { recursive: true });

  // Write profile data
  const profileData = {
    period: `${formatDate(ninetyDaysAgo)} - ${formatDate(new Date())}`,
    summary: {
      inboxMessages: inboxMessages.length,
      sentMessages: sentDetails.length,
      uniqueContacts: contacts.length,
      bidirectionalContacts: contacts.filter((c) => c.bidirectional).length,
      discoveryCategories: discoveryWithResults.length,
    },
    contacts: contacts.slice(0, 50), // Top 50 contacts
    labels: Object.entries(labelCounts)
      .sort((a, b) => b[1] - a[1])
      .map(([label, count]) => ({ label, count })),
    volumeByDate: Object.entries(dateVolume)
      .sort((a, b) => a[0].localeCompare(b[0]))
      .map(([date, count]) => ({ date, count })),
    recentThreads: inboxMessages
      .slice(0, 20)
      .map((m) => ({ id: m.threadId, subject: m.subject, from: m.from })),
    // Discovery results for targeted profile extraction
    discovery: Object.fromEntries(
      discoveryWithResults.map((r) => [
        r.category,
        { query: r.query, count: r.count, emails: r.emails },
      ])
    ),
  };

  fs.writeFileSync(profileOutputPath, JSON.stringify(profileData, null, 2));
  console.log(`\n✓ Profile data written to: ${profileOutputPath}`);

  // Write writing samples
  const writingSamplesData = {
    source: "gmail",
    analyzedAt: new Date().toISOString(),
    context: {
      timePeriod: "180d",
      sampleCount: writingSamples.length,
      minLength: 50,
    },
    samples: writingSamples,
  };

  fs.writeFileSync(
    writingSamplesOutputPath,
    JSON.stringify(writingSamplesData, null, 2)
  );
  console.log(`✓ Writing samples written to: ${writingSamplesOutputPath}`);
  console.log(`  ${writingSamples.length} sent emails with analyzable content`);

  // Summary
  console.log(`\n✓ Gmail data collection complete`);
  console.log(`  Period: ${profileData.period}`);
  console.log(`  Inbox: ${inboxMessages.length} messages`);
  console.log(`  Sent: ${sentDetails.length} messages`);
  console.log(`  Contacts: ${contacts.length} (${profileData.summary.bidirectionalContacts} bidirectional)`);
  console.log(`  Discovery: ${discoveryWithResults.length} categories with matches`);

  if (contacts.length > 0) {
    console.log(`\n  Top contacts:`);
    contacts.slice(0, 5).forEach((c) => {
      const direction =
        c.bidirectional ? "↔" : c.receivedFrom > 0 ? "←" : "→";
      console.log(
        `    ${direction} ${c.name || c.email}: ${c.totalInteractions} emails`
      );
    });
  }

  console.log(
    JSON.stringify({
      success: true,
      profileOutputPath,
      writingSamplesOutputPath,
      inboxCount: inboxMessages.length,
      sentCount: sentDetails.length,
      contactCount: contacts.length,
      writingSampleCount: writingSamples.length,
      discoveryCategories: discoveryWithResults.length,
      discoveryHits: Object.keys(profileData.discovery),
    })
  );
} catch (error) {
  console.error("Failed:", error.message);
  throw error;
}