Parse RSS
Parse RSS/Atom feeds into structured data
Source Code
import fs from "fs";
import path from "path";
import Parser from "rss-parser";
const [input, outputPath, maxItems = "0"] = process.argv.slice(2);
if (!input || !outputPath) {
console.error("Usage: input outputPath [maxItems]");
process.exit(1);
}
try {
console.log(`Parsing feed: ${input}...`);
const parser = new Parser({
customFields: {
item: [
["content:encoded", "contentEncoded"],
["dc:creator", "creator"],
],
},
});
let feed;
// Check if input is a URL or file path
if (input.startsWith("http://") || input.startsWith("https://")) {
feed = await parser.parseURL(input);
} else {
const xmlContent = fs.readFileSync(input, "utf-8");
feed = await parser.parseString(xmlContent);
}
const limit = parseInt(maxItems, 10);
let items = feed.items || [];
if (limit > 0) {
items = items.slice(0, limit);
}
const result = {
title: feed.title || null,
description: feed.description || null,
link: feed.link || null,
feedUrl: feed.feedUrl || null,
lastBuildDate: feed.lastBuildDate || null,
items: items.map((item) => ({
title: item.title || null,
link: item.link || null,
pubDate: item.pubDate || item.isoDate || null,
creator: item.creator || item["dc:creator"] || null,
content: item.contentEncoded || item.content || item.contentSnippet || null,
summary: item.contentSnippet || item.summary || null,
categories: item.categories || [],
guid: item.guid || item.id || null,
})),
};
// Ensure output directory exists
const dir = path.dirname(outputPath);
if (dir && dir !== ".") {
fs.mkdirSync(dir, { recursive: true });
}
fs.writeFileSync(outputPath, JSON.stringify(result, null, 2));
console.log(`\nā Parsed feed`);
console.log(` Title: ${result.title || "untitled"}`);
console.log(` Items: ${result.items.length}`);
console.log(` Written to: ${outputPath}`);
console.log(
JSON.stringify({
success: true,
input,
outputPath,
feedTitle: result.title,
itemCount: result.items.length,
})
);
} catch (error) {
console.error("Error:", error.message);
process.exit(1);
}