Analyze Notion Workspace
Analyze Notion workspace activity from the last 90 days - documents, databases, collaborators, topics
Source Code
import fs from "fs";
const [
analysisOutputPath = "session/notion-profile-analysis.json",
writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);
const NOTION_API = "https://api.notion.com/v1";
const headers = {
Authorization: "Bearer PLACEHOLDER_TOKEN",
"Content-Type": "application/json",
"Notion-Version": "2022-06-28",
};
// 90 days ago as ISO string
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const startDate = ninetyDaysAgo;
const endDate = new Date();
const formatDate = (d) =>
d.toLocaleDateString("en-US", { month: "short", day: "numeric" });
/**
* Extract text from rich_text array
*/
function extractText(richText) {
if (!richText || !Array.isArray(richText)) return "";
return richText.map((t) => t.plain_text || "").join("");
}
/**
* Extract text content from blocks recursively
*/
function extractTextFromBlocks(blocks) {
const texts = [];
for (const block of blocks) {
if (block.text) {
texts.push(block.text);
}
if (block.children) {
texts.push(...extractTextFromBlocks(block.children));
}
}
return texts.join("\n");
}
console.log("Fetching Notion data (last 90 days)...");
try {
// 1. Get user info - for bot integrations, the owner is the actual user
console.log("Fetching workspace data...");
const meRes = await fetch(`${NOTION_API}/users/me`, { headers });
if (!meRes.ok) throw new Error(`Failed to get user: ${meRes.status}`);
const meData = await meRes.json();
// If it's a bot, get the owner user; otherwise use the direct user
let userName, userId;
if (meData.type === "bot" && meData.bot?.owner?.user) {
userName = meData.bot.owner.user.name || "Unknown User";
userId = meData.bot.owner.user.id;
} else {
userName = meData.name || "Unknown User";
userId = meData.id;
}
console.log(`✓ Connected as ${userName}`);
// 2. Collect ALL documents user writes to (database entries + standalone pages)
console.log("Finding all documents you've written to...");
const allDocuments = [];
const collaborators = new Map(); // userId -> { name, count }
const topics = new Map(); // topic -> { count, dates: [docDates] }
const contentTexts = []; // For writing style analysis
let totalPagesSeen = 0;
let pagesInDateRange = 0;
// Search for all pages edited in last 90 days
// Results are sorted by last_edited_time descending, so we can break early
let cursor = undefined;
let reachedCutoff = false;
const maxPages = 150; // Max qualifying pages to collect
while (!reachedCutoff && allDocuments.length < maxPages) {
const searchRes = await fetch(`${NOTION_API}/search`, {
method: "POST",
headers,
body: JSON.stringify({
filter: { property: "object", value: "page" },
sort: { direction: "descending", timestamp: "last_edited_time" },
page_size: 100,
...(cursor && { start_cursor: cursor }),
}),
});
if (!searchRes.ok) break;
const searchData = await searchRes.json();
for (const page of searchData.results) {
totalPagesSeen++;
const editedAt = new Date(page.last_edited_time);
// Since results are sorted by last_edited_time descending,
// once we hit a page older than 90 days, all subsequent pages will be older too
if (editedAt < ninetyDaysAgo) {
reachedCutoff = true;
break;
}
pagesInDateRange++;
// Include pages the user created OR last edited
// This captures the user's workspace activity
const userCreated = page.created_by?.id === userId;
const userLastEdited = page.last_edited_by?.id === userId;
if (!userCreated && !userLastEdited) continue;
// Track collaborators only on pages the user actually worked on
// This shows who the user collaborates with, not everyone in the workspace
if (page.created_by?.id && page.created_by.id !== userId) {
const collabId = page.created_by.id;
const existing = collaborators.get(collabId) || { count: 0 };
existing.count++;
collaborators.set(collabId, existing);
}
if (page.last_edited_by?.id && page.last_edited_by.id !== userId) {
const collabId = page.last_edited_by.id;
const existing = collaborators.get(collabId) || { count: 0 };
existing.count++;
collaborators.set(collabId, existing);
}
const titleProp = Object.values(page.properties || {}).find(
(p) => p.type === "title"
);
const title = titleProp?.title?.[0]?.plain_text || "Untitled";
// Extract icon (but don't use it as URL)
const icon =
page.icon?.type === "emoji"
? page.icon.emoji
: page.icon?.external?.url || page.icon?.file?.url || null;
const doc = {
id: page.id,
title,
icon,
url: page.url,
type:
page.parent?.type === "database_id"
? "database_entry"
: "standalone_page",
parentId: page.parent?.database_id || page.parent?.page_id || null,
createdTime: page.created_time,
lastEditedTime: page.last_edited_time,
createdBy: page.created_by?.id,
lastEditedBy: page.last_edited_by?.id,
userCreated,
};
allDocuments.push(doc);
if (allDocuments.length >= maxPages) break;
}
if (
!searchData.has_more ||
reachedCutoff ||
allDocuments.length >= maxPages
)
break;
cursor = searchData.next_cursor;
}
console.log(
` Scanned ${totalPagesSeen} pages (${pagesInDateRange} in last 90 days), found ${allDocuments.length} you created/edited`
);
// Fetch names for collaborators (parallel, limited to top 20 by count)
// We fetch more than needed because some will be filtered as bots
const topCollabIds = Array.from(collaborators.entries())
.sort((a, b) => b[1].count - a[1].count)
.slice(0, 20)
.map(([id]) => id);
if (topCollabIds.length > 0) {
console.log(
` Fetching info for ${topCollabIds.length} potential collaborators...`
);
const userPromises = topCollabIds.map(async (collabId) => {
try {
const userRes = await fetch(`${NOTION_API}/users/${collabId}`, {
headers,
});
if (userRes.ok) {
const userData = await userRes.json();
// Filter out bots - only return real people
if (userData.type === "bot") {
return { id: collabId, name: null, isBot: true };
}
return {
id: collabId,
name: userData.name || "Unknown",
isBot: false,
};
}
} catch (e) {
// Skip if can't fetch user info
}
return { id: collabId, name: null, isBot: false };
});
const userResults = await Promise.all(userPromises);
for (const { id, name, isBot } of userResults) {
if (isBot) {
// Remove bots from collaborators entirely
collaborators.delete(id);
} else if (name && collaborators.has(id)) {
collaborators.get(id).name = name;
}
}
}
// 3. Read content from sample documents for topic and style analysis (parallelized)
console.log("Analyzing content from substantial documents...");
// Analyze pages user created (standalone pages + database entries with substantial content)
const createdPages = allDocuments.filter((d) => d.userCreated);
// Prioritize standalone pages, but also include database entries
const standaloneForAnalysis = createdPages.filter(
(d) => d.type === "standalone_page"
);
const dbEntriesForAnalysis = createdPages.filter(
(d) => d.type === "database_entry"
);
// Read more pages (up to 30 total) to have a good pool, then filter by length
const sampleDocs = [
...standaloneForAnalysis.slice(0, 20),
...dbEntriesForAnalysis.slice(0, 10),
].slice(0, 30);
// Read all content in parallel
const contentPromises = sampleDocs.map(async (doc) => {
try {
// Read blocks directly via API
const blocksUrl = new URL(`${NOTION_API}/blocks/${doc.id}/children`);
blocksUrl.searchParams.set("page_size", "100");
const blocksRes = await fetch(blocksUrl.toString(), {
method: "GET",
headers,
});
if (blocksRes.ok) {
const blocksData = await blocksRes.json();
const blocks = blocksData.results || [];
const texts = [];
for (const block of blocks) {
const blockType = block.type;
const content = block[blockType];
if (content?.rich_text) {
texts.push(extractText(content.rich_text));
}
}
if (texts.length > 0) {
const totalLength = texts.join(" ").length;
return {
docId: doc.id,
title: doc.title,
texts,
totalLength,
blocks,
docType: doc.type,
};
}
}
} catch (e) {
// Skip if can't read content
return null;
}
return null;
});
const contentResults = await Promise.all(contentPromises);
// Filter to substantial content (at least 500 characters) and sort by length
// This filters out short database entries that are just metadata
const substantialContent = contentResults
.filter((result) => result && result.totalLength >= 500)
.sort((a, b) => b.totalLength - a.totalLength)
.slice(0, 15); // Take top 15 longest pages
console.log(
` Read ${contentResults.filter((r) => r).length} pages, analyzing ${
substantialContent.length
} substantial documents`
);
// Separate standalone pages from database entries for reporting
const substantialStandalone = substantialContent.filter(
(r) => r.docType === "standalone_page"
);
const substantialDbEntries = substantialContent.filter(
(r) => r.docType === "database_entry"
);
// Build doc lookup map for O(1) access
const docById = new Map(allDocuments.map((d) => [d.id, d]));
// Process results and extract topics with date tracking
for (const result of substantialContent) {
if (!result) continue;
contentTexts.push({
docId: result.docId,
title: result.title,
texts: result.texts,
});
// Get document date for topic recurrence tracking
const doc = docById.get(result.docId);
const docDate = doc?.lastEditedTime || doc?.createdTime || null;
// Extract topics (simple keyword extraction)
const allText = result.texts.join(" ").toLowerCase();
const words = allText.match(/\b[a-z]{4,}\b/g) || [];
const wordCounts = {};
for (const word of words) {
if (word.length > 4) {
wordCounts[word] = (wordCounts[word] || 0) + 1;
}
}
const topWords = Object.entries(wordCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([word, count]) => word);
for (const topic of topWords) {
const existing = topics.get(topic) || { count: 0, dates: [] };
existing.count++;
if (docDate) existing.dates.push(docDate);
topics.set(topic, existing);
}
}
console.log(
` Analyzed content from ${contentTexts.length} substantial documents`
);
// 4. Group documents by type and activity
const databaseEntries = allDocuments.filter(
(d) => d.type === "database_entry"
);
const standalonePages = allDocuments.filter(
(d) => d.type === "standalone_page"
);
// Group database entries by parent database
const databaseGroups = new Map();
for (const entry of databaseEntries) {
if (!entry.parentId) continue;
const existing = databaseGroups.get(entry.parentId) || [];
existing.push(entry);
databaseGroups.set(entry.parentId, existing);
}
// Get database info for groups (parallelized)
const databaseInfoPromises = Array.from(databaseGroups.entries()).map(
async ([dbId, entries]) => {
try {
const dbRes = await fetch(`${NOTION_API}/databases/${dbId}`, {
headers,
});
if (dbRes.ok) {
const dbData = await dbRes.json();
const dbTitle = dbData.title?.[0]?.plain_text || "Untitled Database";
const dbIcon =
dbData.icon?.type === "emoji"
? dbData.icon.emoji
: dbData.icon?.external?.url || dbData.icon?.file?.url || null;
return {
dbId,
info: {
title: dbTitle,
icon: dbIcon,
url: dbData.url, // Use actual database URL
entryCount: entries.length,
},
};
}
} catch (e) {
// Skip if can't fetch database info
}
return null;
}
);
const databaseInfoResults = await Promise.all(databaseInfoPromises);
const databaseInfo = new Map();
for (const result of databaseInfoResults) {
if (result) {
databaseInfo.set(result.dbId, result.info);
}
}
// 5. Save analysis for agent to interpret
// Ensure output directories exist
const analysisDir = analysisOutputPath.substring(
0,
analysisOutputPath.lastIndexOf("/")
);
const writingSamplesDir = writingSamplesOutputPath.substring(
0,
writingSamplesOutputPath.lastIndexOf("/")
);
if (analysisDir) fs.mkdirSync(analysisDir, { recursive: true });
if (writingSamplesDir) fs.mkdirSync(writingSamplesDir, { recursive: true });
// Write writing samples for style analysis (with source field for shared task)
const writingSamples = {
source: "notion",
samples: substantialContent.map((result) => {
const doc = docById.get(result.docId);
return {
id: result.docId,
title: result.title,
type: result.docType,
text: result.texts.join("\n\n"),
wordCount: result.texts.join(" ").split(/\s+/).length,
characterCount: result.totalLength,
createdBy: doc?.createdBy || null,
lastEditedBy: doc?.lastEditedBy || null,
userCreated: doc?.userCreated || false,
};
}),
};
fs.writeFileSync(
writingSamplesOutputPath,
JSON.stringify(writingSamples, null, 2)
);
console.log(
` Wrote ${writingSamples.samples.length} writing samples to ${writingSamplesOutputPath}`
);
const topCollaborators = Array.from(collaborators.entries())
.map(([id, data]) => ({ id, ...data }))
.sort((a, b) => b.count - a.count)
.slice(0, 10);
const topTopics = Array.from(topics.entries())
.map(([topic, data]) => {
const sortedDates = data.dates.sort();
const earliest = sortedDates[0] || null;
const latest = sortedDates[sortedDates.length - 1] || null;
// Calculate date spread in days
const spreadDays =
earliest && latest
? Math.round(
(new Date(latest) - new Date(earliest)) / (1000 * 60 * 60 * 24)
)
: 0;
return {
topic,
count: data.count,
earliest,
latest,
spreadDays,
};
})
.sort((a, b) => b.count - a.count)
.slice(0, 20);
const analysisData = {
user: userName,
userId,
period: `${formatDate(startDate)} - ${formatDate(endDate)}`,
documents: {
total: allDocuments.length,
databaseEntries: databaseEntries.length,
standalonePages: standalonePages.length,
substantialStandalone: substantialStandalone.length,
substantialDbEntries: substantialDbEntries.length,
},
databases: Array.from(databaseInfo.entries()).map(([id, info]) => ({
id,
...info,
})),
collaborators: topCollaborators,
topics: topTopics,
sampleDocuments: allDocuments.slice(0, 30).map((d) => ({
id: d.id,
title: d.title,
type: d.type,
url: d.url,
lastEdited: d.lastEditedTime,
})),
};
fs.writeFileSync(analysisOutputPath, JSON.stringify(analysisData, null, 2));
console.log(` Saved analysis data to ${analysisOutputPath}`);
console.log(`\n✓ Notion analysis complete`);
console.log(
JSON.stringify({
success: true,
analysisFile: analysisOutputPath,
writingSamplesFile: writingSamplesOutputPath,
user: userName,
documentCount: allDocuments.length,
databaseCount: databaseInfo.size,
collaboratorCount: topCollaborators.length,
topicsFound: topTopics.length,
substantialStandaloneCount: substantialStandalone.length,
})
);
} catch (error) {
console.error("Failed:", error.message);
throw error;
} import fs from "fs";
const [
analysisOutputPath = "session/notion-profile-analysis.json",
writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);
const NOTION_API = "https://api.notion.com/v1";
const headers = {
Authorization: "Bearer PLACEHOLDER_TOKEN",
"Content-Type": "application/json",
"Notion-Version": "2022-06-28",
};
// 90 days ago as ISO string
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const startDate = ninetyDaysAgo;
const endDate = new Date();
const formatDate = (d) =>
d.toLocaleDateString("en-US", { month: "short", day: "numeric" });
/**
* Extract text from rich_text array
*/
function extractText(richText) {
if (!richText || !Array.isArray(richText)) return "";
return richText.map((t) => t.plain_text || "").join("");
}
/**
* Extract text content from blocks recursively
*/
function extractTextFromBlocks(blocks) {
const texts = [];
for (const block of blocks) {
if (block.text) {
texts.push(block.text);
}
if (block.children) {
texts.push(...extractTextFromBlocks(block.children));
}
}
return texts.join("\n");
}
console.log("Fetching Notion data (last 90 days)...");
try {
// 1. Get user info - for bot integrations, the owner is the actual user
console.log("Fetching workspace data...");
const meRes = await fetch(`${NOTION_API}/users/me`, { headers });
if (!meRes.ok) throw new Error(`Failed to get user: ${meRes.status}`);
const meData = await meRes.json();
// If it's a bot, get the owner user; otherwise use the direct user
let userName, userId;
if (meData.type === "bot" && meData.bot?.owner?.user) {
userName = meData.bot.owner.user.name || "Unknown User";
userId = meData.bot.owner.user.id;
} else {
userName = meData.name || "Unknown User";
userId = meData.id;
}
console.log(`✓ Connected as ${userName}`);
// 2. Collect ALL documents user writes to (database entries + standalone pages)
console.log("Finding all documents you've written to...");
const allDocuments = [];
const collaborators = new Map(); // userId -> { name, count }
const topics = new Map(); // topic -> { count, dates: [docDates] }
const contentTexts = []; // For writing style analysis
let totalPagesSeen = 0;
let pagesInDateRange = 0;
// Search for all pages edited in last 90 days
// Results are sorted by last_edited_time descending, so we can break early
let cursor = undefined;
let reachedCutoff = false;
const maxPages = 150; // Max qualifying pages to collect
while (!reachedCutoff && allDocuments.length < maxPages) {
const searchRes = await fetch(`${NOTION_API}/search`, {
method: "POST",
headers,
body: JSON.stringify({
filter: { property: "object", value: "page" },
sort: { direction: "descending", timestamp: "last_edited_time" },
page_size: 100,
...(cursor && { start_cursor: cursor }),
}),
});
if (!searchRes.ok) break;
const searchData = await searchRes.json();
for (const page of searchData.results) {
totalPagesSeen++;
const editedAt = new Date(page.last_edited_time);
// Since results are sorted by last_edited_time descending,
// once we hit a page older than 90 days, all subsequent pages will be older too
if (editedAt < ninetyDaysAgo) {
reachedCutoff = true;
break;
}
pagesInDateRange++;
// Include pages the user created OR last edited
// This captures the user's workspace activity
const userCreated = page.created_by?.id === userId;
const userLastEdited = page.last_edited_by?.id === userId;
if (!userCreated && !userLastEdited) continue;
// Track collaborators only on pages the user actually worked on
// This shows who the user collaborates with, not everyone in the workspace
if (page.created_by?.id && page.created_by.id !== userId) {
const collabId = page.created_by.id;
const existing = collaborators.get(collabId) || { count: 0 };
existing.count++;
collaborators.set(collabId, existing);
}
if (page.last_edited_by?.id && page.last_edited_by.id !== userId) {
const collabId = page.last_edited_by.id;
const existing = collaborators.get(collabId) || { count: 0 };
existing.count++;
collaborators.set(collabId, existing);
}
const titleProp = Object.values(page.properties || {}).find(
(p) => p.type === "title"
);
const title = titleProp?.title?.[0]?.plain_text || "Untitled";
// Extract icon (but don't use it as URL)
const icon =
page.icon?.type === "emoji"
? page.icon.emoji
: page.icon?.external?.url || page.icon?.file?.url || null;
const doc = {
id: page.id,
title,
icon,
url: page.url,
type:
page.parent?.type === "database_id"
? "database_entry"
: "standalone_page",
parentId: page.parent?.database_id || page.parent?.page_id || null,
createdTime: page.created_time,
lastEditedTime: page.last_edited_time,
createdBy: page.created_by?.id,
lastEditedBy: page.last_edited_by?.id,
userCreated,
};
allDocuments.push(doc);
if (allDocuments.length >= maxPages) break;
}
if (
!searchData.has_more ||
reachedCutoff ||
allDocuments.length >= maxPages
)
break;
cursor = searchData.next_cursor;
}
console.log(
` Scanned ${totalPagesSeen} pages (${pagesInDateRange} in last 90 days), found ${allDocuments.length} you created/edited`
);
// Fetch names for collaborators (parallel, limited to top 20 by count)
// We fetch more than needed because some will be filtered as bots
const topCollabIds = Array.from(collaborators.entries())
.sort((a, b) => b[1].count - a[1].count)
.slice(0, 20)
.map(([id]) => id);
if (topCollabIds.length > 0) {
console.log(
` Fetching info for ${topCollabIds.length} potential collaborators...`
);
const userPromises = topCollabIds.map(async (collabId) => {
try {
const userRes = await fetch(`${NOTION_API}/users/${collabId}`, {
headers,
});
if (userRes.ok) {
const userData = await userRes.json();
// Filter out bots - only return real people
if (userData.type === "bot") {
return { id: collabId, name: null, isBot: true };
}
return {
id: collabId,
name: userData.name || "Unknown",
isBot: false,
};
}
} catch (e) {
// Skip if can't fetch user info
}
return { id: collabId, name: null, isBot: false };
});
const userResults = await Promise.all(userPromises);
for (const { id, name, isBot } of userResults) {
if (isBot) {
// Remove bots from collaborators entirely
collaborators.delete(id);
} else if (name && collaborators.has(id)) {
collaborators.get(id).name = name;
}
}
}
// 3. Read content from sample documents for topic and style analysis (parallelized)
console.log("Analyzing content from substantial documents...");
// Analyze pages user created (standalone pages + database entries with substantial content)
const createdPages = allDocuments.filter((d) => d.userCreated);
// Prioritize standalone pages, but also include database entries
const standaloneForAnalysis = createdPages.filter(
(d) => d.type === "standalone_page"
);
const dbEntriesForAnalysis = createdPages.filter(
(d) => d.type === "database_entry"
);
// Read more pages (up to 30 total) to have a good pool, then filter by length
const sampleDocs = [
...standaloneForAnalysis.slice(0, 20),
...dbEntriesForAnalysis.slice(0, 10),
].slice(0, 30);
// Read all content in parallel
const contentPromises = sampleDocs.map(async (doc) => {
try {
// Read blocks directly via API
const blocksUrl = new URL(`${NOTION_API}/blocks/${doc.id}/children`);
blocksUrl.searchParams.set("page_size", "100");
const blocksRes = await fetch(blocksUrl.toString(), {
method: "GET",
headers,
});
if (blocksRes.ok) {
const blocksData = await blocksRes.json();
const blocks = blocksData.results || [];
const texts = [];
for (const block of blocks) {
const blockType = block.type;
const content = block[blockType];
if (content?.rich_text) {
texts.push(extractText(content.rich_text));
}
}
if (texts.length > 0) {
const totalLength = texts.join(" ").length;
return {
docId: doc.id,
title: doc.title,
texts,
totalLength,
blocks,
docType: doc.type,
};
}
}
} catch (e) {
// Skip if can't read content
return null;
}
return null;
});
const contentResults = await Promise.all(contentPromises);
// Filter to substantial content (at least 500 characters) and sort by length
// This filters out short database entries that are just metadata
const substantialContent = contentResults
.filter((result) => result && result.totalLength >= 500)
.sort((a, b) => b.totalLength - a.totalLength)
.slice(0, 15); // Take top 15 longest pages
console.log(
` Read ${contentResults.filter((r) => r).length} pages, analyzing ${
substantialContent.length
} substantial documents`
);
// Separate standalone pages from database entries for reporting
const substantialStandalone = substantialContent.filter(
(r) => r.docType === "standalone_page"
);
const substantialDbEntries = substantialContent.filter(
(r) => r.docType === "database_entry"
);
// Build doc lookup map for O(1) access
const docById = new Map(allDocuments.map((d) => [d.id, d]));
// Process results and extract topics with date tracking
for (const result of substantialContent) {
if (!result) continue;
contentTexts.push({
docId: result.docId,
title: result.title,
texts: result.texts,
});
// Get document date for topic recurrence tracking
const doc = docById.get(result.docId);
const docDate = doc?.lastEditedTime || doc?.createdTime || null;
// Extract topics (simple keyword extraction)
const allText = result.texts.join(" ").toLowerCase();
const words = allText.match(/\b[a-z]{4,}\b/g) || [];
const wordCounts = {};
for (const word of words) {
if (word.length > 4) {
wordCounts[word] = (wordCounts[word] || 0) + 1;
}
}
const topWords = Object.entries(wordCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 5)
.map(([word, count]) => word);
for (const topic of topWords) {
const existing = topics.get(topic) || { count: 0, dates: [] };
existing.count++;
if (docDate) existing.dates.push(docDate);
topics.set(topic, existing);
}
}
console.log(
` Analyzed content from ${contentTexts.length} substantial documents`
);
// 4. Group documents by type and activity
const databaseEntries = allDocuments.filter(
(d) => d.type === "database_entry"
);
const standalonePages = allDocuments.filter(
(d) => d.type === "standalone_page"
);
// Group database entries by parent database
const databaseGroups = new Map();
for (const entry of databaseEntries) {
if (!entry.parentId) continue;
const existing = databaseGroups.get(entry.parentId) || [];
existing.push(entry);
databaseGroups.set(entry.parentId, existing);
}
// Get database info for groups (parallelized)
const databaseInfoPromises = Array.from(databaseGroups.entries()).map(
async ([dbId, entries]) => {
try {
const dbRes = await fetch(`${NOTION_API}/databases/${dbId}`, {
headers,
});
if (dbRes.ok) {
const dbData = await dbRes.json();
const dbTitle = dbData.title?.[0]?.plain_text || "Untitled Database";
const dbIcon =
dbData.icon?.type === "emoji"
? dbData.icon.emoji
: dbData.icon?.external?.url || dbData.icon?.file?.url || null;
return {
dbId,
info: {
title: dbTitle,
icon: dbIcon,
url: dbData.url, // Use actual database URL
entryCount: entries.length,
},
};
}
} catch (e) {
// Skip if can't fetch database info
}
return null;
}
);
const databaseInfoResults = await Promise.all(databaseInfoPromises);
const databaseInfo = new Map();
for (const result of databaseInfoResults) {
if (result) {
databaseInfo.set(result.dbId, result.info);
}
}
// 5. Save analysis for agent to interpret
// Ensure output directories exist
const analysisDir = analysisOutputPath.substring(
0,
analysisOutputPath.lastIndexOf("/")
);
const writingSamplesDir = writingSamplesOutputPath.substring(
0,
writingSamplesOutputPath.lastIndexOf("/")
);
if (analysisDir) fs.mkdirSync(analysisDir, { recursive: true });
if (writingSamplesDir) fs.mkdirSync(writingSamplesDir, { recursive: true });
// Write writing samples for style analysis (with source field for shared task)
const writingSamples = {
source: "notion",
samples: substantialContent.map((result) => {
const doc = docById.get(result.docId);
return {
id: result.docId,
title: result.title,
type: result.docType,
text: result.texts.join("\n\n"),
wordCount: result.texts.join(" ").split(/\s+/).length,
characterCount: result.totalLength,
createdBy: doc?.createdBy || null,
lastEditedBy: doc?.lastEditedBy || null,
userCreated: doc?.userCreated || false,
};
}),
};
fs.writeFileSync(
writingSamplesOutputPath,
JSON.stringify(writingSamples, null, 2)
);
console.log(
` Wrote ${writingSamples.samples.length} writing samples to ${writingSamplesOutputPath}`
);
const topCollaborators = Array.from(collaborators.entries())
.map(([id, data]) => ({ id, ...data }))
.sort((a, b) => b.count - a.count)
.slice(0, 10);
const topTopics = Array.from(topics.entries())
.map(([topic, data]) => {
const sortedDates = data.dates.sort();
const earliest = sortedDates[0] || null;
const latest = sortedDates[sortedDates.length - 1] || null;
// Calculate date spread in days
const spreadDays =
earliest && latest
? Math.round(
(new Date(latest) - new Date(earliest)) / (1000 * 60 * 60 * 24)
)
: 0;
return {
topic,
count: data.count,
earliest,
latest,
spreadDays,
};
})
.sort((a, b) => b.count - a.count)
.slice(0, 20);
const analysisData = {
user: userName,
userId,
period: `${formatDate(startDate)} - ${formatDate(endDate)}`,
documents: {
total: allDocuments.length,
databaseEntries: databaseEntries.length,
standalonePages: standalonePages.length,
substantialStandalone: substantialStandalone.length,
substantialDbEntries: substantialDbEntries.length,
},
databases: Array.from(databaseInfo.entries()).map(([id, info]) => ({
id,
...info,
})),
collaborators: topCollaborators,
topics: topTopics,
sampleDocuments: allDocuments.slice(0, 30).map((d) => ({
id: d.id,
title: d.title,
type: d.type,
url: d.url,
lastEdited: d.lastEditedTime,
})),
};
fs.writeFileSync(analysisOutputPath, JSON.stringify(analysisData, null, 2));
console.log(` Saved analysis data to ${analysisOutputPath}`);
console.log(`\n✓ Notion analysis complete`);
console.log(
JSON.stringify({
success: true,
analysisFile: analysisOutputPath,
writingSamplesFile: writingSamplesOutputPath,
user: userName,
documentCount: allDocuments.length,
databaseCount: databaseInfo.size,
collaboratorCount: topCollaborators.length,
topicsFound: topTopics.length,
substantialStandaloneCount: substantialStandalone.length,
})
);
} catch (error) {
console.error("Failed:", error.message);
throw error;
}