code icon Code

Analyze Notion Workspace

Analyze Notion workspace activity from the last 90 days - documents, databases, collaborators, topics

Source Code

import fs from "fs";

const [
  analysisOutputPath = "session/notion-profile-analysis.json",
  writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);

const NOTION_API = "https://api.notion.com/v1";
const headers = {
  Authorization: "Bearer PLACEHOLDER_TOKEN",
  "Content-Type": "application/json",
  "Notion-Version": "2022-06-28",
};

// 90 days ago as ISO string
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const startDate = ninetyDaysAgo;
const endDate = new Date();
const formatDate = (d) =>
  d.toLocaleDateString("en-US", { month: "short", day: "numeric" });

/**
 * Extract text from rich_text array
 */
function extractText(richText) {
  if (!richText || !Array.isArray(richText)) return "";
  return richText.map((t) => t.plain_text || "").join("");
}

/**
 * Extract text content from blocks recursively
 */
function extractTextFromBlocks(blocks) {
  const texts = [];
  for (const block of blocks) {
    if (block.text) {
      texts.push(block.text);
    }
    if (block.children) {
      texts.push(...extractTextFromBlocks(block.children));
    }
  }
  return texts.join("\n");
}

console.log("Fetching Notion data (last 90 days)...");

try {
  // 1. Get user info - for bot integrations, the owner is the actual user
  console.log("Fetching workspace data...");

  const meRes = await fetch(`${NOTION_API}/users/me`, { headers });
  if (!meRes.ok) throw new Error(`Failed to get user: ${meRes.status}`);

  const meData = await meRes.json();

  // If it's a bot, get the owner user; otherwise use the direct user
  let userName, userId;
  if (meData.type === "bot" && meData.bot?.owner?.user) {
    userName = meData.bot.owner.user.name || "Unknown User";
    userId = meData.bot.owner.user.id;
  } else {
    userName = meData.name || "Unknown User";
    userId = meData.id;
  }

  console.log(`✓ Connected as ${userName}`);

  // 2. Collect ALL documents user writes to (database entries + standalone pages)
  console.log("Finding all documents you've written to...");

  const allDocuments = [];
  const collaborators = new Map(); // userId -> { name, count }
  const topics = new Map(); // topic -> { count, dates: [docDates] }
  const contentTexts = []; // For writing style analysis
  let totalPagesSeen = 0;
  let pagesInDateRange = 0;

  // Search for all pages edited in last 90 days
  // Results are sorted by last_edited_time descending, so we can break early
  let cursor = undefined;
  let reachedCutoff = false;
  const maxPages = 150; // Max qualifying pages to collect

  while (!reachedCutoff && allDocuments.length < maxPages) {
    const searchRes = await fetch(`${NOTION_API}/search`, {
      method: "POST",
      headers,
      body: JSON.stringify({
        filter: { property: "object", value: "page" },
        sort: { direction: "descending", timestamp: "last_edited_time" },
        page_size: 100,
        ...(cursor && { start_cursor: cursor }),
      }),
    });

    if (!searchRes.ok) break;
    const searchData = await searchRes.json();

    for (const page of searchData.results) {
      totalPagesSeen++;
      const editedAt = new Date(page.last_edited_time);

      // Since results are sorted by last_edited_time descending,
      // once we hit a page older than 90 days, all subsequent pages will be older too
      if (editedAt < ninetyDaysAgo) {
        reachedCutoff = true;
        break;
      }
      pagesInDateRange++;

      // Include pages the user created OR last edited
      // This captures the user's workspace activity
      const userCreated = page.created_by?.id === userId;
      const userLastEdited = page.last_edited_by?.id === userId;

      if (!userCreated && !userLastEdited) continue;

      // Track collaborators only on pages the user actually worked on
      // This shows who the user collaborates with, not everyone in the workspace
      if (page.created_by?.id && page.created_by.id !== userId) {
        const collabId = page.created_by.id;
        const existing = collaborators.get(collabId) || { count: 0 };
        existing.count++;
        collaborators.set(collabId, existing);
      }
      if (page.last_edited_by?.id && page.last_edited_by.id !== userId) {
        const collabId = page.last_edited_by.id;
        const existing = collaborators.get(collabId) || { count: 0 };
        existing.count++;
        collaborators.set(collabId, existing);
      }

      const titleProp = Object.values(page.properties || {}).find(
        (p) => p.type === "title"
      );
      const title = titleProp?.title?.[0]?.plain_text || "Untitled";

      // Extract icon (but don't use it as URL)
      const icon =
        page.icon?.type === "emoji"
          ? page.icon.emoji
          : page.icon?.external?.url || page.icon?.file?.url || null;

      const doc = {
        id: page.id,
        title,
        icon,
        url: page.url,
        type:
          page.parent?.type === "database_id"
            ? "database_entry"
            : "standalone_page",
        parentId: page.parent?.database_id || page.parent?.page_id || null,
        createdTime: page.created_time,
        lastEditedTime: page.last_edited_time,
        createdBy: page.created_by?.id,
        lastEditedBy: page.last_edited_by?.id,
        userCreated,
      };

      allDocuments.push(doc);
      if (allDocuments.length >= maxPages) break;
    }

    if (
      !searchData.has_more ||
      reachedCutoff ||
      allDocuments.length >= maxPages
    )
      break;
    cursor = searchData.next_cursor;
  }

  console.log(
    `  Scanned ${totalPagesSeen} pages (${pagesInDateRange} in last 90 days), found ${allDocuments.length} you created/edited`
  );

  // Fetch names for collaborators (parallel, limited to top 20 by count)
  // We fetch more than needed because some will be filtered as bots
  const topCollabIds = Array.from(collaborators.entries())
    .sort((a, b) => b[1].count - a[1].count)
    .slice(0, 20)
    .map(([id]) => id);

  if (topCollabIds.length > 0) {
    console.log(
      `  Fetching info for ${topCollabIds.length} potential collaborators...`
    );
    const userPromises = topCollabIds.map(async (collabId) => {
      try {
        const userRes = await fetch(`${NOTION_API}/users/${collabId}`, {
          headers,
        });
        if (userRes.ok) {
          const userData = await userRes.json();
          // Filter out bots - only return real people
          if (userData.type === "bot") {
            return { id: collabId, name: null, isBot: true };
          }
          return {
            id: collabId,
            name: userData.name || "Unknown",
            isBot: false,
          };
        }
      } catch (e) {
        // Skip if can't fetch user info
      }
      return { id: collabId, name: null, isBot: false };
    });

    const userResults = await Promise.all(userPromises);
    for (const { id, name, isBot } of userResults) {
      if (isBot) {
        // Remove bots from collaborators entirely
        collaborators.delete(id);
      } else if (name && collaborators.has(id)) {
        collaborators.get(id).name = name;
      }
    }
  }

  // 3. Read content from sample documents for topic and style analysis (parallelized)
  console.log("Analyzing content from substantial documents...");

  // Analyze pages user created (standalone pages + database entries with substantial content)
  const createdPages = allDocuments.filter((d) => d.userCreated);

  // Prioritize standalone pages, but also include database entries
  const standaloneForAnalysis = createdPages.filter(
    (d) => d.type === "standalone_page"
  );
  const dbEntriesForAnalysis = createdPages.filter(
    (d) => d.type === "database_entry"
  );

  // Read more pages (up to 30 total) to have a good pool, then filter by length
  const sampleDocs = [
    ...standaloneForAnalysis.slice(0, 20),
    ...dbEntriesForAnalysis.slice(0, 10),
  ].slice(0, 30);

  // Read all content in parallel
  const contentPromises = sampleDocs.map(async (doc) => {
    try {
      // Read blocks directly via API
      const blocksUrl = new URL(`${NOTION_API}/blocks/${doc.id}/children`);
      blocksUrl.searchParams.set("page_size", "100");
      const blocksRes = await fetch(blocksUrl.toString(), {
        method: "GET",
        headers,
      });

      if (blocksRes.ok) {
        const blocksData = await blocksRes.json();
        const blocks = blocksData.results || [];
        const texts = [];

        for (const block of blocks) {
          const blockType = block.type;
          const content = block[blockType];
          if (content?.rich_text) {
            texts.push(extractText(content.rich_text));
          }
        }

        if (texts.length > 0) {
          const totalLength = texts.join(" ").length;
          return {
            docId: doc.id,
            title: doc.title,
            texts,
            totalLength,
            blocks,
            docType: doc.type,
          };
        }
      }
    } catch (e) {
      // Skip if can't read content
      return null;
    }
    return null;
  });

  const contentResults = await Promise.all(contentPromises);

  // Filter to substantial content (at least 500 characters) and sort by length
  // This filters out short database entries that are just metadata
  const substantialContent = contentResults
    .filter((result) => result && result.totalLength >= 500)
    .sort((a, b) => b.totalLength - a.totalLength)
    .slice(0, 15); // Take top 15 longest pages

  console.log(
    `  Read ${contentResults.filter((r) => r).length} pages, analyzing ${
      substantialContent.length
    } substantial documents`
  );

  // Separate standalone pages from database entries for reporting
  const substantialStandalone = substantialContent.filter(
    (r) => r.docType === "standalone_page"
  );
  const substantialDbEntries = substantialContent.filter(
    (r) => r.docType === "database_entry"
  );

  // Build doc lookup map for O(1) access
  const docById = new Map(allDocuments.map((d) => [d.id, d]));

  // Process results and extract topics with date tracking
  for (const result of substantialContent) {
    if (!result) continue;

    contentTexts.push({
      docId: result.docId,
      title: result.title,
      texts: result.texts,
    });

    // Get document date for topic recurrence tracking
    const doc = docById.get(result.docId);
    const docDate = doc?.lastEditedTime || doc?.createdTime || null;

    // Extract topics (simple keyword extraction)
    const allText = result.texts.join(" ").toLowerCase();
    const words = allText.match(/\b[a-z]{4,}\b/g) || [];
    const wordCounts = {};
    for (const word of words) {
      if (word.length > 4) {
        wordCounts[word] = (wordCounts[word] || 0) + 1;
      }
    }
    const topWords = Object.entries(wordCounts)
      .sort((a, b) => b[1] - a[1])
      .slice(0, 5)
      .map(([word, count]) => word);

    for (const topic of topWords) {
      const existing = topics.get(topic) || { count: 0, dates: [] };
      existing.count++;
      if (docDate) existing.dates.push(docDate);
      topics.set(topic, existing);
    }
  }

  console.log(
    `  Analyzed content from ${contentTexts.length} substantial documents`
  );

  // 4. Group documents by type and activity
  const databaseEntries = allDocuments.filter(
    (d) => d.type === "database_entry"
  );
  const standalonePages = allDocuments.filter(
    (d) => d.type === "standalone_page"
  );

  // Group database entries by parent database
  const databaseGroups = new Map();
  for (const entry of databaseEntries) {
    if (!entry.parentId) continue;
    const existing = databaseGroups.get(entry.parentId) || [];
    existing.push(entry);
    databaseGroups.set(entry.parentId, existing);
  }

  // Get database info for groups (parallelized)
  const databaseInfoPromises = Array.from(databaseGroups.entries()).map(
    async ([dbId, entries]) => {
      try {
        const dbRes = await fetch(`${NOTION_API}/databases/${dbId}`, {
          headers,
        });
        if (dbRes.ok) {
          const dbData = await dbRes.json();
          const dbTitle = dbData.title?.[0]?.plain_text || "Untitled Database";
          const dbIcon =
            dbData.icon?.type === "emoji"
              ? dbData.icon.emoji
              : dbData.icon?.external?.url || dbData.icon?.file?.url || null;
          return {
            dbId,
            info: {
              title: dbTitle,
              icon: dbIcon,
              url: dbData.url, // Use actual database URL
              entryCount: entries.length,
            },
          };
        }
      } catch (e) {
        // Skip if can't fetch database info
      }
      return null;
    }
  );

  const databaseInfoResults = await Promise.all(databaseInfoPromises);
  const databaseInfo = new Map();
  for (const result of databaseInfoResults) {
    if (result) {
      databaseInfo.set(result.dbId, result.info);
    }
  }

  // 5. Save analysis for agent to interpret
  // Ensure output directories exist
  const analysisDir = analysisOutputPath.substring(
    0,
    analysisOutputPath.lastIndexOf("/")
  );
  const writingSamplesDir = writingSamplesOutputPath.substring(
    0,
    writingSamplesOutputPath.lastIndexOf("/")
  );
  if (analysisDir) fs.mkdirSync(analysisDir, { recursive: true });
  if (writingSamplesDir) fs.mkdirSync(writingSamplesDir, { recursive: true });

  // Write writing samples for style analysis (with source field for shared task)
  const writingSamples = {
    source: "notion",
    samples: substantialContent.map((result) => {
      const doc = docById.get(result.docId);
      return {
        id: result.docId,
        title: result.title,
        type: result.docType,
        text: result.texts.join("\n\n"),
        wordCount: result.texts.join(" ").split(/\s+/).length,
        characterCount: result.totalLength,
        createdBy: doc?.createdBy || null,
        lastEditedBy: doc?.lastEditedBy || null,
        userCreated: doc?.userCreated || false,
      };
    }),
  };
  fs.writeFileSync(
    writingSamplesOutputPath,
    JSON.stringify(writingSamples, null, 2)
  );
  console.log(
    `  Wrote ${writingSamples.samples.length} writing samples to ${writingSamplesOutputPath}`
  );

  const topCollaborators = Array.from(collaborators.entries())
    .map(([id, data]) => ({ id, ...data }))
    .sort((a, b) => b.count - a.count)
    .slice(0, 10);

  const topTopics = Array.from(topics.entries())
    .map(([topic, data]) => {
      const sortedDates = data.dates.sort();
      const earliest = sortedDates[0] || null;
      const latest = sortedDates[sortedDates.length - 1] || null;
      // Calculate date spread in days
      const spreadDays =
        earliest && latest
          ? Math.round(
              (new Date(latest) - new Date(earliest)) / (1000 * 60 * 60 * 24)
            )
          : 0;
      return {
        topic,
        count: data.count,
        earliest,
        latest,
        spreadDays,
      };
    })
    .sort((a, b) => b.count - a.count)
    .slice(0, 20);

  const analysisData = {
    user: userName,
    userId,
    period: `${formatDate(startDate)} - ${formatDate(endDate)}`,
    documents: {
      total: allDocuments.length,
      databaseEntries: databaseEntries.length,
      standalonePages: standalonePages.length,
      substantialStandalone: substantialStandalone.length,
      substantialDbEntries: substantialDbEntries.length,
    },
    databases: Array.from(databaseInfo.entries()).map(([id, info]) => ({
      id,
      ...info,
    })),
    collaborators: topCollaborators,
    topics: topTopics,
    sampleDocuments: allDocuments.slice(0, 30).map((d) => ({
      id: d.id,
      title: d.title,
      type: d.type,
      url: d.url,
      lastEdited: d.lastEditedTime,
    })),
  };

  fs.writeFileSync(analysisOutputPath, JSON.stringify(analysisData, null, 2));
  console.log(`  Saved analysis data to ${analysisOutputPath}`);

  console.log(`\n✓ Notion analysis complete`);
  console.log(
    JSON.stringify({
      success: true,
      analysisFile: analysisOutputPath,
      writingSamplesFile: writingSamplesOutputPath,
      user: userName,
      documentCount: allDocuments.length,
      databaseCount: databaseInfo.size,
      collaboratorCount: topCollaborators.length,
      topicsFound: topTopics.length,
      substantialStandaloneCount: substantialStandalone.length,
    })
  );
} catch (error) {
  console.error("Failed:", error.message);
  throw error;
}
import fs from "fs";

const [
  analysisOutputPath = "session/notion-profile-analysis.json",
  writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);

const NOTION_API = "https://api.notion.com/v1";
const headers = {
  Authorization: "Bearer PLACEHOLDER_TOKEN",
  "Content-Type": "application/json",
  "Notion-Version": "2022-06-28",
};

// 90 days ago as ISO string
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const startDate = ninetyDaysAgo;
const endDate = new Date();
const formatDate = (d) =>
  d.toLocaleDateString("en-US", { month: "short", day: "numeric" });

/**
 * Extract text from rich_text array
 */
function extractText(richText) {
  if (!richText || !Array.isArray(richText)) return "";
  return richText.map((t) => t.plain_text || "").join("");
}

/**
 * Extract text content from blocks recursively
 */
function extractTextFromBlocks(blocks) {
  const texts = [];
  for (const block of blocks) {
    if (block.text) {
      texts.push(block.text);
    }
    if (block.children) {
      texts.push(...extractTextFromBlocks(block.children));
    }
  }
  return texts.join("\n");
}

console.log("Fetching Notion data (last 90 days)...");

try {
  // 1. Get user info - for bot integrations, the owner is the actual user
  console.log("Fetching workspace data...");

  const meRes = await fetch(`${NOTION_API}/users/me`, { headers });
  if (!meRes.ok) throw new Error(`Failed to get user: ${meRes.status}`);

  const meData = await meRes.json();

  // If it's a bot, get the owner user; otherwise use the direct user
  let userName, userId;
  if (meData.type === "bot" && meData.bot?.owner?.user) {
    userName = meData.bot.owner.user.name || "Unknown User";
    userId = meData.bot.owner.user.id;
  } else {
    userName = meData.name || "Unknown User";
    userId = meData.id;
  }

  console.log(`✓ Connected as ${userName}`);

  // 2. Collect ALL documents user writes to (database entries + standalone pages)
  console.log("Finding all documents you've written to...");

  const allDocuments = [];
  const collaborators = new Map(); // userId -> { name, count }
  const topics = new Map(); // topic -> { count, dates: [docDates] }
  const contentTexts = []; // For writing style analysis
  let totalPagesSeen = 0;
  let pagesInDateRange = 0;

  // Search for all pages edited in last 90 days
  // Results are sorted by last_edited_time descending, so we can break early
  let cursor = undefined;
  let reachedCutoff = false;
  const maxPages = 150; // Max qualifying pages to collect

  while (!reachedCutoff && allDocuments.length < maxPages) {
    const searchRes = await fetch(`${NOTION_API}/search`, {
      method: "POST",
      headers,
      body: JSON.stringify({
        filter: { property: "object", value: "page" },
        sort: { direction: "descending", timestamp: "last_edited_time" },
        page_size: 100,
        ...(cursor && { start_cursor: cursor }),
      }),
    });

    if (!searchRes.ok) break;
    const searchData = await searchRes.json();

    for (const page of searchData.results) {
      totalPagesSeen++;
      const editedAt = new Date(page.last_edited_time);

      // Since results are sorted by last_edited_time descending,
      // once we hit a page older than 90 days, all subsequent pages will be older too
      if (editedAt < ninetyDaysAgo) {
        reachedCutoff = true;
        break;
      }
      pagesInDateRange++;

      // Include pages the user created OR last edited
      // This captures the user's workspace activity
      const userCreated = page.created_by?.id === userId;
      const userLastEdited = page.last_edited_by?.id === userId;

      if (!userCreated && !userLastEdited) continue;

      // Track collaborators only on pages the user actually worked on
      // This shows who the user collaborates with, not everyone in the workspace
      if (page.created_by?.id && page.created_by.id !== userId) {
        const collabId = page.created_by.id;
        const existing = collaborators.get(collabId) || { count: 0 };
        existing.count++;
        collaborators.set(collabId, existing);
      }
      if (page.last_edited_by?.id && page.last_edited_by.id !== userId) {
        const collabId = page.last_edited_by.id;
        const existing = collaborators.get(collabId) || { count: 0 };
        existing.count++;
        collaborators.set(collabId, existing);
      }

      const titleProp = Object.values(page.properties || {}).find(
        (p) => p.type === "title"
      );
      const title = titleProp?.title?.[0]?.plain_text || "Untitled";

      // Extract icon (but don't use it as URL)
      const icon =
        page.icon?.type === "emoji"
          ? page.icon.emoji
          : page.icon?.external?.url || page.icon?.file?.url || null;

      const doc = {
        id: page.id,
        title,
        icon,
        url: page.url,
        type:
          page.parent?.type === "database_id"
            ? "database_entry"
            : "standalone_page",
        parentId: page.parent?.database_id || page.parent?.page_id || null,
        createdTime: page.created_time,
        lastEditedTime: page.last_edited_time,
        createdBy: page.created_by?.id,
        lastEditedBy: page.last_edited_by?.id,
        userCreated,
      };

      allDocuments.push(doc);
      if (allDocuments.length >= maxPages) break;
    }

    if (
      !searchData.has_more ||
      reachedCutoff ||
      allDocuments.length >= maxPages
    )
      break;
    cursor = searchData.next_cursor;
  }

  console.log(
    `  Scanned ${totalPagesSeen} pages (${pagesInDateRange} in last 90 days), found ${allDocuments.length} you created/edited`
  );

  // Fetch names for collaborators (parallel, limited to top 20 by count)
  // We fetch more than needed because some will be filtered as bots
  const topCollabIds = Array.from(collaborators.entries())
    .sort((a, b) => b[1].count - a[1].count)
    .slice(0, 20)
    .map(([id]) => id);

  if (topCollabIds.length > 0) {
    console.log(
      `  Fetching info for ${topCollabIds.length} potential collaborators...`
    );
    const userPromises = topCollabIds.map(async (collabId) => {
      try {
        const userRes = await fetch(`${NOTION_API}/users/${collabId}`, {
          headers,
        });
        if (userRes.ok) {
          const userData = await userRes.json();
          // Filter out bots - only return real people
          if (userData.type === "bot") {
            return { id: collabId, name: null, isBot: true };
          }
          return {
            id: collabId,
            name: userData.name || "Unknown",
            isBot: false,
          };
        }
      } catch (e) {
        // Skip if can't fetch user info
      }
      return { id: collabId, name: null, isBot: false };
    });

    const userResults = await Promise.all(userPromises);
    for (const { id, name, isBot } of userResults) {
      if (isBot) {
        // Remove bots from collaborators entirely
        collaborators.delete(id);
      } else if (name && collaborators.has(id)) {
        collaborators.get(id).name = name;
      }
    }
  }

  // 3. Read content from sample documents for topic and style analysis (parallelized)
  console.log("Analyzing content from substantial documents...");

  // Analyze pages user created (standalone pages + database entries with substantial content)
  const createdPages = allDocuments.filter((d) => d.userCreated);

  // Prioritize standalone pages, but also include database entries
  const standaloneForAnalysis = createdPages.filter(
    (d) => d.type === "standalone_page"
  );
  const dbEntriesForAnalysis = createdPages.filter(
    (d) => d.type === "database_entry"
  );

  // Read more pages (up to 30 total) to have a good pool, then filter by length
  const sampleDocs = [
    ...standaloneForAnalysis.slice(0, 20),
    ...dbEntriesForAnalysis.slice(0, 10),
  ].slice(0, 30);

  // Read all content in parallel
  const contentPromises = sampleDocs.map(async (doc) => {
    try {
      // Read blocks directly via API
      const blocksUrl = new URL(`${NOTION_API}/blocks/${doc.id}/children`);
      blocksUrl.searchParams.set("page_size", "100");
      const blocksRes = await fetch(blocksUrl.toString(), {
        method: "GET",
        headers,
      });

      if (blocksRes.ok) {
        const blocksData = await blocksRes.json();
        const blocks = blocksData.results || [];
        const texts = [];

        for (const block of blocks) {
          const blockType = block.type;
          const content = block[blockType];
          if (content?.rich_text) {
            texts.push(extractText(content.rich_text));
          }
        }

        if (texts.length > 0) {
          const totalLength = texts.join(" ").length;
          return {
            docId: doc.id,
            title: doc.title,
            texts,
            totalLength,
            blocks,
            docType: doc.type,
          };
        }
      }
    } catch (e) {
      // Skip if can't read content
      return null;
    }
    return null;
  });

  const contentResults = await Promise.all(contentPromises);

  // Filter to substantial content (at least 500 characters) and sort by length
  // This filters out short database entries that are just metadata
  const substantialContent = contentResults
    .filter((result) => result && result.totalLength >= 500)
    .sort((a, b) => b.totalLength - a.totalLength)
    .slice(0, 15); // Take top 15 longest pages

  console.log(
    `  Read ${contentResults.filter((r) => r).length} pages, analyzing ${
      substantialContent.length
    } substantial documents`
  );

  // Separate standalone pages from database entries for reporting
  const substantialStandalone = substantialContent.filter(
    (r) => r.docType === "standalone_page"
  );
  const substantialDbEntries = substantialContent.filter(
    (r) => r.docType === "database_entry"
  );

  // Build doc lookup map for O(1) access
  const docById = new Map(allDocuments.map((d) => [d.id, d]));

  // Process results and extract topics with date tracking
  for (const result of substantialContent) {
    if (!result) continue;

    contentTexts.push({
      docId: result.docId,
      title: result.title,
      texts: result.texts,
    });

    // Get document date for topic recurrence tracking
    const doc = docById.get(result.docId);
    const docDate = doc?.lastEditedTime || doc?.createdTime || null;

    // Extract topics (simple keyword extraction)
    const allText = result.texts.join(" ").toLowerCase();
    const words = allText.match(/\b[a-z]{4,}\b/g) || [];
    const wordCounts = {};
    for (const word of words) {
      if (word.length > 4) {
        wordCounts[word] = (wordCounts[word] || 0) + 1;
      }
    }
    const topWords = Object.entries(wordCounts)
      .sort((a, b) => b[1] - a[1])
      .slice(0, 5)
      .map(([word, count]) => word);

    for (const topic of topWords) {
      const existing = topics.get(topic) || { count: 0, dates: [] };
      existing.count++;
      if (docDate) existing.dates.push(docDate);
      topics.set(topic, existing);
    }
  }

  console.log(
    `  Analyzed content from ${contentTexts.length} substantial documents`
  );

  // 4. Group documents by type and activity
  const databaseEntries = allDocuments.filter(
    (d) => d.type === "database_entry"
  );
  const standalonePages = allDocuments.filter(
    (d) => d.type === "standalone_page"
  );

  // Group database entries by parent database
  const databaseGroups = new Map();
  for (const entry of databaseEntries) {
    if (!entry.parentId) continue;
    const existing = databaseGroups.get(entry.parentId) || [];
    existing.push(entry);
    databaseGroups.set(entry.parentId, existing);
  }

  // Get database info for groups (parallelized)
  const databaseInfoPromises = Array.from(databaseGroups.entries()).map(
    async ([dbId, entries]) => {
      try {
        const dbRes = await fetch(`${NOTION_API}/databases/${dbId}`, {
          headers,
        });
        if (dbRes.ok) {
          const dbData = await dbRes.json();
          const dbTitle = dbData.title?.[0]?.plain_text || "Untitled Database";
          const dbIcon =
            dbData.icon?.type === "emoji"
              ? dbData.icon.emoji
              : dbData.icon?.external?.url || dbData.icon?.file?.url || null;
          return {
            dbId,
            info: {
              title: dbTitle,
              icon: dbIcon,
              url: dbData.url, // Use actual database URL
              entryCount: entries.length,
            },
          };
        }
      } catch (e) {
        // Skip if can't fetch database info
      }
      return null;
    }
  );

  const databaseInfoResults = await Promise.all(databaseInfoPromises);
  const databaseInfo = new Map();
  for (const result of databaseInfoResults) {
    if (result) {
      databaseInfo.set(result.dbId, result.info);
    }
  }

  // 5. Save analysis for agent to interpret
  // Ensure output directories exist
  const analysisDir = analysisOutputPath.substring(
    0,
    analysisOutputPath.lastIndexOf("/")
  );
  const writingSamplesDir = writingSamplesOutputPath.substring(
    0,
    writingSamplesOutputPath.lastIndexOf("/")
  );
  if (analysisDir) fs.mkdirSync(analysisDir, { recursive: true });
  if (writingSamplesDir) fs.mkdirSync(writingSamplesDir, { recursive: true });

  // Write writing samples for style analysis (with source field for shared task)
  const writingSamples = {
    source: "notion",
    samples: substantialContent.map((result) => {
      const doc = docById.get(result.docId);
      return {
        id: result.docId,
        title: result.title,
        type: result.docType,
        text: result.texts.join("\n\n"),
        wordCount: result.texts.join(" ").split(/\s+/).length,
        characterCount: result.totalLength,
        createdBy: doc?.createdBy || null,
        lastEditedBy: doc?.lastEditedBy || null,
        userCreated: doc?.userCreated || false,
      };
    }),
  };
  fs.writeFileSync(
    writingSamplesOutputPath,
    JSON.stringify(writingSamples, null, 2)
  );
  console.log(
    `  Wrote ${writingSamples.samples.length} writing samples to ${writingSamplesOutputPath}`
  );

  const topCollaborators = Array.from(collaborators.entries())
    .map(([id, data]) => ({ id, ...data }))
    .sort((a, b) => b.count - a.count)
    .slice(0, 10);

  const topTopics = Array.from(topics.entries())
    .map(([topic, data]) => {
      const sortedDates = data.dates.sort();
      const earliest = sortedDates[0] || null;
      const latest = sortedDates[sortedDates.length - 1] || null;
      // Calculate date spread in days
      const spreadDays =
        earliest && latest
          ? Math.round(
              (new Date(latest) - new Date(earliest)) / (1000 * 60 * 60 * 24)
            )
          : 0;
      return {
        topic,
        count: data.count,
        earliest,
        latest,
        spreadDays,
      };
    })
    .sort((a, b) => b.count - a.count)
    .slice(0, 20);

  const analysisData = {
    user: userName,
    userId,
    period: `${formatDate(startDate)} - ${formatDate(endDate)}`,
    documents: {
      total: allDocuments.length,
      databaseEntries: databaseEntries.length,
      standalonePages: standalonePages.length,
      substantialStandalone: substantialStandalone.length,
      substantialDbEntries: substantialDbEntries.length,
    },
    databases: Array.from(databaseInfo.entries()).map(([id, info]) => ({
      id,
      ...info,
    })),
    collaborators: topCollaborators,
    topics: topTopics,
    sampleDocuments: allDocuments.slice(0, 30).map((d) => ({
      id: d.id,
      title: d.title,
      type: d.type,
      url: d.url,
      lastEdited: d.lastEditedTime,
    })),
  };

  fs.writeFileSync(analysisOutputPath, JSON.stringify(analysisData, null, 2));
  console.log(`  Saved analysis data to ${analysisOutputPath}`);

  console.log(`\n✓ Notion analysis complete`);
  console.log(
    JSON.stringify({
      success: true,
      analysisFile: analysisOutputPath,
      writingSamplesFile: writingSamplesOutputPath,
      user: userName,
      documentCount: allDocuments.length,
      databaseCount: databaseInfo.size,
      collaboratorCount: topCollaborators.length,
      topicsFound: topTopics.length,
      substantialStandaloneCount: substantialStandalone.length,
    })
  );
} catch (error) {
  console.error("Failed:", error.message);
  throw error;
}