Connect to Gmail
Collect inbox emails (90 days), sent emails (6 months), and run targeted discovery searches for profile analysis
Source Code
import fs from "fs";
import path from "path";
const [
profileOutputPath = "session/gmail-profile-data.json",
writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);
const INBOX_MAX_MESSAGES = 75;
const SENT_MAX_MESSAGES = 50;
const DISCOVERY_MAX_PER_QUERY = 15;
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const formatDate = (d) =>
d.toLocaleDateString("en-US", { month: "short", day: "numeric" });
/**
* Format date as relative time ago
*/
function formatTimeAgo(date) {
if (!date || isNaN(date.getTime())) return "unknown";
const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
if (seconds < 60) return "just now";
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes}m ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours}h ago`;
const days = Math.floor(hours / 24);
if (days < 7) return `${days}d ago`;
if (days < 30) return `${Math.floor(days / 7)}w ago`;
if (days < 365) return `${Math.floor(days / 30)}mo ago`;
const years = Math.floor(days / 365);
return `${years}y ago`;
}
// Targeted discovery searches to learn specific facts about the user
const DISCOVERY_QUERIES = [
// Personal - search sent mail where USER writes about themselves (high confidence)
{ category: "children", query: 'in:sent ("my son" OR "my daughter" OR "my kids" OR "the kids")' },
{ category: "partner", query: 'in:sent ("my husband" OR "my wife" OR "my partner" OR "my boyfriend" OR "my girlfriend")' },
{ category: "pets", query: 'in:sent ("my dog" OR "my cat" OR "our dog" OR "our cat" OR "our pet")' },
{ category: "phone_numbers", query: 'in:sent ("my number is" OR "my cell is" OR "call me at" OR "text me at" OR "reach me at")' },
// Location - shipping/delivery notifications
{ category: "location", query: '("shipped to" OR "delivered to" OR "delivery address:") ("your order" OR "your package")' },
{ category: "birthday", query: 'to:me subject:"happy birthday"' },
// Contact discovery - verification emails (high confidence)
{ category: "whatsapp", query: 'from:whatsapp ("verification code" OR "your code" OR "new device")' },
{ category: "signal_app", query: "from:signal" },
{ category: "telegram", query: "from:telegram" },
// Social media
{ category: "instagram", query: "from:instagram" },
{ category: "linkedin", query: "from:linkedin" },
{ category: "twitter", query: "from:twitter OR from:x.com" },
{ category: "github", query: "from:github" },
{ category: "facebook", query: "from:facebookmail" },
{ category: "tiktok", query: "from:tiktok" },
{ category: "youtube", query: "from:youtube" },
{ category: "reddit", query: "from:reddit" },
// Professional
{ category: "work_calendar", query: "from:calendar-notification subject:invitation" },
{ category: "meetings", query: '"zoom.us" OR "meet.google.com" OR "teams.microsoft.com"' },
{ category: "slack", query: "from:slack" },
// Commerce
{ category: "amazon", query: 'from:amazon "your order"' },
{ category: "subscriptions", query: 'subject:("subscription confirmed" OR "you subscribed" OR "thanks for subscribing" OR "welcome to")' },
{ category: "food_delivery", query: "from:doordash OR from:ubereats OR from:grubhub OR from:postmates" },
{ category: "retail", query: "from:target OR from:walmart OR from:costco OR from:bestbuy" },
// Housing & Auto
{ category: "homeowner", query: '(from:quickenloans OR from:rocket OR "mortgage statement" OR "property tax bill" OR "hoa dues")' },
{ category: "renter", query: '("rent payment" OR "lease agreement" OR from:apartments OR from:zillow) (rent OR lease)' },
{ category: "vehicle", query: '"auto insurance" OR "car payment" OR "vehicle registration" OR from:dmv' },
// Financial
{ category: "banking", query: '(from:chase OR from:wellsfargo OR from:bankofamerica OR from:citi OR from:capitalone) subject:statement' },
{ category: "credit", query: 'from:(creditkarma OR experian OR equifax OR transunion) OR subject:"credit score"' },
{ category: "investments", query: "from:fidelity OR from:schwab OR from:vanguard OR from:robinhood" },
// Life events - search sent for user announcements (high confidence)
{ category: "moving", query: 'in:sent ("new address" OR "we moved" OR "moving to" OR "our new place")' },
{ category: "job_change", query: 'in:sent ("excited to announce" OR "happy to share" OR "new role" OR "I joined" OR "starting at")' },
{ category: "wedding", query: 'subject:("save the date" OR "wedding invitation" OR "you are invited" OR "rsvp") OR (subject:congratulations (engaged OR wedding))' },
// Life
{ category: "travel", query: 'subject:("booking confirmed" OR "reservation confirmed" OR "flight confirmation" OR "itinerary") (from:airline OR from:hotel OR from:airbnb OR from:expedia OR from:booking)' },
{ category: "health", query: 'subject:("appointment confirmed" OR "appointment reminder" OR "your visit") (from:doctor OR from:health OR from:medical OR from:pharmacy)' },
{ category: "education", query: 'from:edu OR "tuition" OR "student loan" OR "financial aid"' },
// Entertainment & Media
{ category: "spotify", query: "from:spotify" },
{ category: "netflix", query: "from:netflix" },
{ category: "apple", query: "from:apple.com" },
{ category: "discord", query: "from:discord" },
{ category: "gaming", query: "from:steam OR from:playstation OR from:xbox OR from:epicgames" },
// Learning & Reading
{ category: "online_courses", query: "from:udemy OR from:coursera OR from:skillshare OR from:masterclass" },
{ category: "substack", query: "from:substack" },
// Lifestyle
{ category: "fitness", query: "from:peloton OR from:strava OR from:myfitnesspal OR from:equinox" },
{ category: "venmo", query: "from:venmo OR from:paypal" },
// Professional tools
{ category: "work_tools", query: "from:figma OR from:notion OR from:airtable OR from:linear" },
// Deep character signals
{ category: "donations", query: '("thank you for your donation" OR "donation receipt" OR "tax-deductible gift") (from:charity OR from:nonprofit OR from:donate OR from:giving)' },
{ category: "books", query: "from:kindle OR from:audible OR from:goodreads OR from:libby" },
{ category: "side_business", query: "from:stripe OR from:shopify OR from:squarespace OR from:etsy" },
{ category: "kids_activities", query: '"practice schedule" OR "game schedule" OR from:teamsnap OR from:sportsengine' },
{ category: "professional_orgs", query: 'subject:(membership OR "member since" OR "annual dues") (association OR society OR institute OR organization)' },
];
console.log("Collecting Gmail data (inbox: 90 days, sent: 6 months)...");
console.log(` Inbox: up to ${INBOX_MAX_MESSAGES} messages`);
console.log(` Sent: up to ${SENT_MAX_MESSAGES} messages`);
/**
* Fetch message IDs matching a query
*/
async function fetchMessageIds(query, maxResults) {
const ids = [];
let pageToken = null;
while (ids.length < maxResults) {
const remaining = maxResults - ids.length;
const pageSize = Math.min(remaining, 100);
const url = new URL(
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
);
url.searchParams.set("maxResults", pageSize.toString());
if (query) url.searchParams.set("q", query);
if (pageToken) url.searchParams.set("pageToken", pageToken);
const res = await fetch(url.toString(), {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
const text = await res.text();
if (!res.ok) {
throw new Error(`Gmail API failed: ${res.status} - ${text}`);
}
let data;
try {
data = JSON.parse(text);
} catch (e) {
throw new Error(`Gmail API returned invalid JSON: ${text.slice(0, 200)}`);
}
if (!data.messages || data.messages.length === 0) break;
ids.push(...data.messages.map((m) => m.id).slice(0, remaining));
pageToken = data.nextPageToken;
if (!pageToken) break;
}
return ids;
}
/**
* Fetch message details with parallel requests
*/
async function fetchMessages(messageIds, format = "metadata") {
const CONCURRENCY = 25;
const results = [];
for (let i = 0; i < messageIds.length; i += CONCURRENCY) {
const batch = messageIds.slice(i, i + CONCURRENCY);
const fetched = await Promise.all(
batch.map(async (id) => {
let url = `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=${format}`;
if (format === "metadata") {
url +=
"&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc";
}
const res = await fetch(url, {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!res.ok) return null;
try {
return await res.json();
} catch {
return null; // Invalid JSON, skip this message
}
})
);
results.push(...fetched.filter(Boolean));
console.log(
` Fetched ${Math.min(i + CONCURRENCY, messageIds.length)}/${
messageIds.length
}...`
);
}
return results;
}
/**
* Extract email address from header
*/
function extractEmail(header) {
if (!header) return "unknown";
const match = header.match(/<([^>]+)>/);
return match ? match[1].toLowerCase() : header.toLowerCase().trim();
}
/**
* Extract name from header
*/
function extractName(header) {
if (!header) return "Unknown";
const match = header.match(/^([^<]+)</);
if (match) return match[1].trim().replace(/"/g, "");
return header.split("@")[0];
}
/**
* Get header value from message
*/
function getHeader(msg, name) {
const header = msg.payload?.headers?.find(
(h) => h.name.toLowerCase() === name.toLowerCase()
);
return header ? header.value : "";
}
/**
* Extract plain text body from Gmail message payload
*/
function extractBodyText(payload) {
if (!payload) return "";
if (payload.body?.data) {
try {
return Buffer.from(payload.body.data, "base64").toString("utf-8");
} catch {
return "";
}
}
if (payload.parts) {
for (const part of payload.parts) {
if (part.mimeType === "text/plain" && part.body?.data) {
try {
return Buffer.from(part.body.data, "base64").toString("utf-8");
} catch {
continue;
}
}
if (part.parts) {
for (const nested of part.parts) {
if (nested.mimeType === "text/plain" && nested.body?.data) {
try {
return Buffer.from(nested.body.data, "base64").toString("utf-8");
} catch {
continue;
}
}
}
}
}
}
return "";
}
try {
// Fetch inbox and sent message IDs in parallel
console.log("\nPhase 1: Listing message IDs...");
const [inboxIds, sentIds] = await Promise.all([
fetchMessageIds("newer_than:90d -in:sent -category:promotions -category:updates -category:social -category:forums", INBOX_MAX_MESSAGES),
fetchMessageIds("in:sent newer_than:180d", SENT_MAX_MESSAGES), // 6 months for writing samples
]);
console.log(` Inbox: ${inboxIds.length} messages`);
console.log(` Sent: ${sentIds.length} messages`);
if (inboxIds.length === 0 && sentIds.length === 0) {
console.error("\n✗ No messages found in the last 90 days.");
console.log(
JSON.stringify({
success: false,
error: "no_messages_found",
})
);
process.exit(1);
}
// Fetch message details in parallel
console.log("\nPhase 2: Fetching message details...");
const [inboxDetails, sentDetails] = await Promise.all([
fetchMessages(inboxIds, "metadata"),
fetchMessages(sentIds, "full"), // Full for writing samples
]);
console.log(` Inbox: ${inboxDetails.length} fetched`);
console.log(` Sent: ${sentDetails.length} fetched`);
// Phase 3: Discovery searches
console.log("\nPhase 3: Running discovery searches...");
const discoveryResults = await Promise.all(
DISCOVERY_QUERIES.map(async ({ category, query }) => {
try {
const ids = await fetchMessageIds(query, DISCOVERY_MAX_PER_QUERY);
if (ids.length === 0) {
return { category, query, count: 0, emails: [] };
}
const messages = await fetchMessages(ids, "metadata");
return {
category,
query,
count: messages.length,
emails: messages.map((m) => {
const dateStr = getHeader(m, "Date");
return {
id: m.id,
threadId: m.threadId,
from: getHeader(m, "From"),
to: getHeader(m, "To"),
subject: getHeader(m, "Subject"),
snippet: m.snippet,
timeAgo: formatTimeAgo(new Date(dateStr)),
};
}),
};
} catch (err) {
console.log(` Warning: ${category} search failed: ${err.message}`);
return { category, query, count: 0, emails: [], error: err.message };
}
})
);
const discoveryWithResults = discoveryResults.filter((r) => r.count > 0);
console.log(
` Discovery: ${discoveryWithResults.length}/${DISCOVERY_QUERIES.length} categories found`
);
for (const r of discoveryWithResults) {
console.log(` - ${r.category}: ${r.count} emails`);
}
// Process inbox messages
const inboxMessages = [];
const contactsReceived = new Map(); // email -> { name, count }
const contactsSent = new Map(); // email -> { name, count }
const labelCounts = {};
const dateVolume = {};
for (const msg of inboxDetails) {
const from = getHeader(msg, "From");
const to = getHeader(msg, "To");
const date = getHeader(msg, "Date");
const subject = getHeader(msg, "Subject");
const senderEmail = extractEmail(from);
const senderName = extractName(from);
// Track contacts who email the user
if (!contactsReceived.has(senderEmail)) {
contactsReceived.set(senderEmail, { name: senderName, count: 0 });
}
contactsReceived.get(senderEmail).count++;
// Track labels
for (const label of msg.labelIds || []) {
labelCounts[label] = (labelCounts[label] || 0) + 1;
}
// Track volume by date
if (date) {
const d = new Date(date);
if (!isNaN(d.getTime())) {
const dateKey = d.toISOString().split("T")[0];
dateVolume[dateKey] = (dateVolume[dateKey] || 0) + 1;
}
}
inboxMessages.push({
id: msg.id,
threadId: msg.threadId,
from: from,
to: to,
subject: subject,
date: date,
snippet: msg.snippet,
labelIds: msg.labelIds || [],
});
}
// Process sent messages for contacts and writing samples
const writingSamples = [];
for (const msg of sentDetails) {
const to = getHeader(msg, "To");
const cc = getHeader(msg, "Cc");
const date = getHeader(msg, "Date");
const subject = getHeader(msg, "Subject");
// Track contacts the user emails
const recipients = [to, cc]
.filter(Boolean)
.join(",")
.split(",")
.map((r) => r.trim())
.filter(Boolean);
for (const recipient of recipients) {
const email = extractEmail(recipient);
const name = extractName(recipient);
if (!contactsSent.has(email)) {
contactsSent.set(email, { name: name, count: 0 });
}
contactsSent.get(email).count++;
}
// Extract body for writing samples
const bodyText = extractBodyText(msg.payload);
if (bodyText && bodyText.trim().length >= 50) {
writingSamples.push({
text: bodyText,
metadata: {
id: msg.id,
date: date || new Date().toISOString(),
subject: subject || "",
},
});
}
}
// Build contacts with bidirectional signals
const allContacts = new Map();
for (const [email, data] of contactsReceived) {
if (!allContacts.has(email)) {
allContacts.set(email, {
email,
name: data.name,
receivedFrom: 0,
sentTo: 0,
});
}
allContacts.get(email).receivedFrom = data.count;
}
for (const [email, data] of contactsSent) {
if (!allContacts.has(email)) {
allContacts.set(email, {
email,
name: data.name,
receivedFrom: 0,
sentTo: 0,
});
}
const contact = allContacts.get(email);
contact.sentTo = data.count;
// Prefer name from sent (more likely to be accurate)
if (data.name && data.name !== "Unknown") {
contact.name = data.name;
}
}
// Score contacts by signal strength
const scoreContact = (contact) => {
let score = 0;
if (contact.bidirectional) score += 50; // Strong: two-way communication
if (contact.sentTo > 2) score += 20; // User actively emails them
if (contact.receivedFrom > 5) score += 10; // Frequent sender
// Penalize obvious noise addresses
const email = contact.email.toLowerCase();
if (email.includes("noreply")) score -= 100;
if (email.includes("no-reply")) score -= 100;
if (email.includes("notifications")) score -= 100;
if (email.includes("mailer-daemon")) score -= 100;
if (email.includes("postmaster")) score -= 100;
if (email.includes("donotreply")) score -= 100;
if (email.includes("automated")) score -= 100;
if (email.includes("newsletter")) score -= 100;
return score;
};
// Build contacts with scores, filter out noise
const contacts = [...allContacts.values()]
.map((c) => ({
...c,
totalInteractions: c.receivedFrom + c.sentTo,
bidirectional: c.receivedFrom > 0 && c.sentTo > 0,
}))
.map((c) => ({ ...c, signalScore: scoreContact(c) }))
.filter((c) => c.signalScore >= 0) // Remove obvious noise
.sort((a, b) => b.signalScore - a.signalScore || b.totalInteractions - a.totalInteractions);
// Compute date range
const dates = Object.keys(dateVolume).sort();
const dateRange =
dates.length > 0
? { oldest: dates[0], newest: dates[dates.length - 1] }
: null;
// Ensure output directories exist
const profileDir = path.dirname(profileOutputPath);
const samplesDir = path.dirname(writingSamplesOutputPath);
if (profileDir && profileDir !== ".") fs.mkdirSync(profileDir, { recursive: true });
if (samplesDir && samplesDir !== ".") fs.mkdirSync(samplesDir, { recursive: true });
// Write profile data
const profileData = {
period: `${formatDate(ninetyDaysAgo)} - ${formatDate(new Date())}`,
summary: {
inboxMessages: inboxMessages.length,
sentMessages: sentDetails.length,
uniqueContacts: contacts.length,
bidirectionalContacts: contacts.filter((c) => c.bidirectional).length,
discoveryCategories: discoveryWithResults.length,
},
contacts: contacts.slice(0, 50), // Top 50 contacts
labels: Object.entries(labelCounts)
.sort((a, b) => b[1] - a[1])
.map(([label, count]) => ({ label, count })),
volumeByDate: Object.entries(dateVolume)
.sort((a, b) => a[0].localeCompare(b[0]))
.map(([date, count]) => ({ date, count })),
recentThreads: inboxMessages
.slice(0, 20)
.map((m) => ({ id: m.threadId, subject: m.subject, from: m.from })),
// Discovery results for targeted profile extraction
discovery: Object.fromEntries(
discoveryWithResults.map((r) => [
r.category,
{ query: r.query, count: r.count, emails: r.emails },
])
),
};
fs.writeFileSync(profileOutputPath, JSON.stringify(profileData, null, 2));
console.log(`\n✓ Profile data written to: ${profileOutputPath}`);
// Write writing samples
const writingSamplesData = {
source: "gmail",
analyzedAt: new Date().toISOString(),
context: {
timePeriod: "180d",
sampleCount: writingSamples.length,
minLength: 50,
},
samples: writingSamples,
};
fs.writeFileSync(
writingSamplesOutputPath,
JSON.stringify(writingSamplesData, null, 2)
);
console.log(`✓ Writing samples written to: ${writingSamplesOutputPath}`);
console.log(` ${writingSamples.length} sent emails with analyzable content`);
// Summary
console.log(`\n✓ Gmail data collection complete`);
console.log(` Period: ${profileData.period}`);
console.log(` Inbox: ${inboxMessages.length} messages`);
console.log(` Sent: ${sentDetails.length} messages`);
console.log(` Contacts: ${contacts.length} (${profileData.summary.bidirectionalContacts} bidirectional)`);
console.log(` Discovery: ${discoveryWithResults.length} categories with matches`);
if (contacts.length > 0) {
console.log(`\n Top contacts:`);
contacts.slice(0, 5).forEach((c) => {
const direction =
c.bidirectional ? "↔" : c.receivedFrom > 0 ? "←" : "→";
console.log(
` ${direction} ${c.name || c.email}: ${c.totalInteractions} emails`
);
});
}
console.log(
JSON.stringify({
success: true,
profileOutputPath,
writingSamplesOutputPath,
inboxCount: inboxMessages.length,
sentCount: sentDetails.length,
contactCount: contacts.length,
writingSampleCount: writingSamples.length,
discoveryCategories: discoveryWithResults.length,
discoveryHits: Object.keys(profileData.discovery),
})
);
} catch (error) {
console.error("Failed:", error.message);
throw error;
}