Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions src/pages/api/rss.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,28 @@ describe('parseRssOrAtom', () => {
},
]);
});

it('decodes numeric HTML entities in descriptions', () => {
const xml = `<?xml version="1.0"?>
<rss><channel>
<item>
<title>Encoded Description</title>
<link>https://example.com/encoded</link>
<description>Cloudflare&#8217;s edge roadmap&#8230;</description>
</item>
</channel></rss>`;

const items = parseRssOrAtom(xml, 5);

expect(items).toEqual([
{
title: 'Encoded Description',
link: 'https://example.com/encoded',
pubDate: '',
description: 'Cloudflare’s edge roadmap…',
},
]);
});
});

describe('GET /api/rss', () => {
Expand Down Expand Up @@ -319,4 +341,65 @@ describe('GET /api/rss', () => {
expect(response.status).toBe(200);
expect(payload).toEqual({ items: [] });
});

it('falls back to WordPress posts API when feed fetch returns html', async () => {
const fetchMock = vi
.spyOn(globalThis, 'fetch')
.mockResolvedValueOnce(
new Response('<html><body>challenge</body></html>', {
status: 200,
headers: { 'Content-Type': 'text/html; charset=UTF-8' },
})
)
.mockResolvedValueOnce(
new Response(
JSON.stringify([
{
link: 'https://blog.jaysonknight.com/2026/03/26/edge-first-architecture-why-cloudflare-workers-changes-how-you-think-about-design/',
title: { rendered: 'Edge-First Architecture' },
excerpt: { rendered: '<p>Cloudflare&#8217;s distributed model...</p>' },
date_gmt: '2026-03-27T02:43:10',
date: '2026-03-26T22:43:10',
},
]),
{
status: 200,
headers: { 'Content-Type': 'application/json; charset=utf-8' },
}
)
);

const response = await GET({
request: new Request('https://example.com/api/rss?url=https%3A%2F%2Fblog.jaysonknight.com%2Ffeed%2F&max=5'),
} as Parameters<typeof GET>[0]);
const payload = (await response.json()) as { items: Array<{ title: string; link: string; description: string }> };

expect(response.status).toBe(200);
expect(payload.items).toEqual([
{
title: 'Edge-First Architecture',
link: 'https://blog.jaysonknight.com/2026/03/26/edge-first-architecture-why-cloudflare-workers-changes-how-you-think-about-design/',
pubDate: 'Fri, 27 Mar 2026 02:43:10 GMT',
description: 'Cloudflare’s distributed model...',
},
]);

const fallbackCall = fetchMock.mock.calls[1];
expect(fallbackCall).toBeDefined();
const [fallbackUrl, fallbackOptions] = fallbackCall;
expect(fallbackUrl).toBeInstanceOf(URL);
expect((fallbackUrl as URL).toString()).toContain('https://blog.jaysonknight.com/wp-json/wp/v2/posts?');
expect((fallbackUrl as URL).searchParams.get('per_page')).toBe('5');
expect((fallbackUrl as URL).searchParams.get('_fields')).toBe('link,title.rendered,excerpt.rendered,date,date_gmt');
expect(fallbackOptions).toEqual(
expect.objectContaining({
cache: 'no-store',
signal: expect.any(AbortSignal),
headers: expect.objectContaining({
Accept: 'application/json',
'User-Agent': 'Mozilla/5.0 (compatible; JKcom-RSSBot/1.0; +https://jaysonknight.com)',
}),
})
);
});
});
144 changes: 134 additions & 10 deletions src/pages/api/rss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ type FeedItem = {

const extractTag = (block: string, tags: string[]): string => {
for (const tag of tags) {
const match = block.match(new RegExp(`<${tag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tag}>`, 'i'));
const match = block.match(new RegExp(`<${tag}(?:\s[^>]*)?>([\s\S]*?)<\/${tag}>`, 'i'));
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractTag builds a RegExp from a string that contains \s/\S escapes, but the string literal currently uses single backslashes (\s is written as \s in the source). In JS/TS string literals \s is not a valid escape and becomes just s, so the regex will not match whitespace/newlines and tag extraction will break. Use double-escaped backslashes (e.g. \\s, \\S) or String.raw so the intended regex escapes reach the RegExp constructor.

Suggested change
const match = block.match(new RegExp(`<${tag}(?:\s[^>]*)?>([\s\S]*?)<\/${tag}>`, 'i'));
const match = block.match(new RegExp(String.raw`<${tag}(?:\s[^>]*)?>([\s\S]*?)<\/${tag}>`, 'i'));

Copilot uses AI. Check for mistakes.
if (match?.[1]) {
return match[1].trim();
}
Expand All @@ -22,20 +22,34 @@ const extractTag = (block: string, tags: string[]): string => {

const stripCdata = (value: string): string => value.replace(/^<!\[CDATA\[(.*)\]\]>$/s, '$1').trim();

const safeFromCodePoint = (codePoint: number): string => {
if (codePoint >= 0 && codePoint <= 0x10ffff) {
return String.fromCodePoint(codePoint);
}
return '\uFFFD';
};
Comment on lines +25 to +30
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

safeFromCodePoint currently treats any value in 0..0x10FFFF as valid, which includes surrogate code points (0xD800–0xDFFF). Decoding numeric entities to lone surrogates can produce invalid/unpaired UTF-16 in JS strings and unexpected rendering. Consider rejecting surrogate ranges (and non-finite values) and returning U+FFFD for those as well.

Copilot uses AI. Check for mistakes.

const decodeXmlEntities = (value: string): string =>
value
.replace(/&#x([0-9a-f]+);/gi, (_match, hex: string) => safeFromCodePoint(Number.parseInt(hex, 16)))
.replace(/&#(\d+);/g, (_match, dec: string) => safeFromCodePoint(Number.parseInt(dec, 10)))
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&amp;/g, '&');

const cleanDescription = (value: string): string => {
const text = decodeXmlEntities(stripCdata(value))
/** Strips HTML tags, decodes entities, and normalises whitespace — without truncation. */
const cleanText = (value: string): string =>
decodeXmlEntities(stripCdata(value))
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim();

/** Like cleanText but truncates to 200 characters with an ellipsis. */
const cleanDescription = (value: string): string => {
const text = cleanText(value);

if (text.length <= 200) {
return text;
}
Expand Down Expand Up @@ -104,6 +118,74 @@ const getMax = (value: string | null): number => {
return Math.max(1, Math.min(parsed, 20));
};

/**
* Wraps fetch with a timeout. Uses AbortSignal.timeout when available; otherwise
* falls back to AbortController + setTimeout and always clears the timer in a
* finally block so the event loop is never kept alive by a stale timer.
*/
const fetchWithTimeout = async (
input: RequestInfo | URL,
init: Omit<RequestInit, 'signal'> = {},
timeoutMs = 8000
): Promise<Response> => {
if (typeof AbortSignal.timeout === 'function') {
return fetch(input, { ...init, signal: AbortSignal.timeout(timeoutMs) });
Comment on lines +131 to +132
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fetchWithTimeout assumes AbortSignal is always defined and references AbortSignal.timeout directly. In runtimes where AbortSignal is missing or not global, this will throw before falling back to AbortController. Consider guarding via globalThis.AbortSignal (e.g., typeof globalThis.AbortSignal?.timeout === 'function') to keep the timeout wrapper truly runtime-safe.

Suggested change
if (typeof AbortSignal.timeout === 'function') {
return fetch(input, { ...init, signal: AbortSignal.timeout(timeoutMs) });
const abortSignalCtor = globalThis.AbortSignal;
if (typeof abortSignalCtor?.timeout === 'function') {
return fetch(input, { ...init, signal: abortSignalCtor.timeout(timeoutMs) });

Copilot uses AI. Check for mistakes.
}

const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
try {
return await fetch(input, { ...init, signal: controller.signal });
} finally {
clearTimeout(timeoutId);
}
};

const parseWordPressPosts = (
payload: unknown
): Array<{ title: string; link: string; pubDate: string; description: string }> | null => {
if (!Array.isArray(payload)) {
return null;
}

const items = payload
.map((post) => {
const source = typeof post === 'object' && post ? (post as Record<string, unknown>) : null;
const link = typeof source?.link === 'string' ? source.link.trim() : '';
const titleRendered =
source?.title && typeof source.title === 'object'
? (source.title as Record<string, unknown>).rendered
: undefined;
const excerptRendered =
source?.excerpt && typeof source.excerpt === 'object'
? (source.excerpt as Record<string, unknown>).rendered
: undefined;
const dateGmt = typeof source?.date_gmt === 'string' ? source.date_gmt.trim() : '';
const date = typeof source?.date === 'string' ? source.date.trim() : '';
// Use cleanText (no truncation) for titles so long titles are not silently cut off.
const title = cleanText(typeof titleRendered === 'string' ? titleRendered : '') || 'Untitled';
const pubDateSource = dateGmt ? `${dateGmt}Z` : date;
const parsedPubDate = pubDateSource ? new Date(pubDateSource) : null;
const pubDate = parsedPubDate && !Number.isNaN(parsedPubDate.getTime()) ? parsedPubDate.toUTCString() : pubDateSource;

return {
title,
link,
pubDate,
description: cleanDescription(typeof excerptRendered === 'string' ? excerptRendered : ''),
};
})
.filter((item) => item.link);

return items;
};

const respondWithItems = (items: FeedItem[]): Response =>
new Response(JSON.stringify({ items }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});

export const GET: APIRoute = async ({ request }) => {
const requestUrl = new URL(request.url);
const urlParam = requestUrl.searchParams.get('url')?.trim() ?? '';
Expand All @@ -127,18 +209,45 @@ export const GET: APIRoute = async ({ request }) => {
});
}
const sanitizedFeedUrl = sanitizeUrlForLog(parsedFeedUrl);
const tryWordPressFallback = async (): Promise<FeedItem[] | null> => {
try {
const wpApiUrl = new URL('/wp-json/wp/v2/posts', `${parsedFeedUrl.origin}/`);
wpApiUrl.searchParams.set('per_page', String(max));
wpApiUrl.searchParams.set('_fields', 'link,title.rendered,excerpt.rendered,date,date_gmt');

const response = await fetchWithTimeout(wpApiUrl, {
headers: {
Accept: 'application/json',
'User-Agent': 'Mozilla/5.0 (compatible; JKcom-RSSBot/1.0; +https://jaysonknight.com)',
},
cache: 'no-store',
});
Comment on lines +212 to +224
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The WordPress fallback reuses parsedFeedUrl.origin derived from the user-controlled url parameter, which means the endpoint will now make an additional request to /<origin>/wp-json/wp/v2/posts for arbitrary hosts. This increases SSRF blast radius (extra internal endpoints hit on the same host) compared to the RSS-only fetch. Consider restricting allowed origins (e.g., a known blog host), enforcing http/https only, and/or blocking private IP ranges before issuing either fetch.

Copilot uses AI. Check for mistakes.

if (!response.ok) {
return null;
}

const items = parseWordPressPosts(await response.json());
return items?.slice(0, max) ?? null;
} catch {
return null;
}
};

try {
const response = await fetch(feedUrl, {
const response = await fetchWithTimeout(feedUrl, {
headers: {
Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*',
'User-Agent': 'Mozilla/5.0 (compatible; JKcom-RSSBot/1.0; +https://jaysonknight.com)',
},
cache: 'no-store',
signal: AbortSignal.timeout(8000),
});

if (!response.ok) {
const fallbackItems = await tryWordPressFallback();
if (fallbackItems) {
return respondWithItems(fallbackItems);
}
Comment thread
jaypatrick marked this conversation as resolved.
console.error('[api/rss] Failed to fetch feed with non-OK status:', response.status, 'for URL:', sanitizedFeedUrl);
return new Response(JSON.stringify({ error: `Failed to fetch feed (${response.status}).` }), {
Comment on lines 246 to 252
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fallback behavior is now triggered in several new cases (non-OK status, invalid feed XML, empty parsed items, thrown errors), but the new tests only cover the text/html trigger. Add at least one more test that demonstrates a successful fallback for another trigger (e.g., upstream 503 or invalid feed body) and a test that asserts no fallback request is made when the primary RSS/Atom parse yields items.

Copilot uses AI. Check for mistakes.
status: 502,
Expand All @@ -149,6 +258,10 @@ export const GET: APIRoute = async ({ request }) => {
const contentType = response.headers.get('Content-Type')?.toLowerCase() ?? '';
const mimeType = contentType.split(';', 1)[0]?.trim() ?? '';
if (mimeType === 'text/html') {
const fallbackItems = await tryWordPressFallback();
if (fallbackItems) {
return respondWithItems(fallbackItems);
}
console.error('[api/rss] Feed returned text/html — likely bot challenge for URL:', sanitizedFeedUrl);
return new Response(
JSON.stringify({ error: 'Feed returned an HTML page instead of XML (possible bot challenge or redirect)' }),
Expand All @@ -161,6 +274,10 @@ export const GET: APIRoute = async ({ request }) => {

const xml = await response.text();
if (!isValidFeedDocument(xml)) {
const fallbackItems = await tryWordPressFallback();
if (fallbackItems) {
return respondWithItems(fallbackItems);
}
console.error('[api/rss] Response body is not a valid RSS/Atom document for URL:', sanitizedFeedUrl);
return new Response(JSON.stringify({ error: 'Feed URL did not return a valid RSS or Atom document' }), {
status: 502,
Expand All @@ -169,12 +286,19 @@ export const GET: APIRoute = async ({ request }) => {
}

const items = parseRssOrAtom(xml, max);
if (items.length === 0) {
const fallbackItems = await tryWordPressFallback();
if (fallbackItems) {
return respondWithItems(fallbackItems);
}
}

return new Response(JSON.stringify({ items }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
return respondWithItems(items);
} catch (error) {
const fallbackItems = await tryWordPressFallback();
if (fallbackItems) {
return respondWithItems(fallbackItems);
}
const errorName = error instanceof Error ? error.name : 'UnknownError';
const errorMessage =
error instanceof Error
Expand All @@ -186,4 +310,4 @@ export const GET: APIRoute = async ({ request }) => {
headers: { 'Content-Type': 'application/json' },
});
}
};
};
Loading