diff --git a/src/pages/api/rss.test.ts b/src/pages/api/rss.test.ts index d2ce4da..4cd11ee 100644 --- a/src/pages/api/rss.test.ts +++ b/src/pages/api/rss.test.ts @@ -174,6 +174,28 @@ describe('parseRssOrAtom', () => { }, ]); }); + + it('decodes numeric HTML entities in descriptions', () => { + const xml = ` + + + Encoded Description + https://example.com/encoded + Cloudflare’s edge roadmap… + + `; + + const items = parseRssOrAtom(xml, 5); + + expect(items).toEqual([ + { + title: 'Encoded Description', + link: 'https://example.com/encoded', + pubDate: '', + description: 'Cloudflare’s edge roadmap…', + }, + ]); + }); }); describe('GET /api/rss', () => { @@ -319,4 +341,65 @@ describe('GET /api/rss', () => { expect(response.status).toBe(200); expect(payload).toEqual({ items: [] }); }); + + it('falls back to WordPress posts API when feed fetch returns html', async () => { + const fetchMock = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValueOnce( + new Response('challenge', { + status: 200, + headers: { 'Content-Type': 'text/html; charset=UTF-8' }, + }) + ) + .mockResolvedValueOnce( + new Response( + JSON.stringify([ + { + link: 'https://blog.jaysonknight.com/2026/03/26/edge-first-architecture-why-cloudflare-workers-changes-how-you-think-about-design/', + title: { rendered: 'Edge-First Architecture' }, + excerpt: { rendered: '

Cloudflare’s distributed model...

' }, + date_gmt: '2026-03-27T02:43:10', + date: '2026-03-26T22:43:10', + }, + ]), + { + status: 200, + headers: { 'Content-Type': 'application/json; charset=utf-8' }, + } + ) + ); + + const response = await GET({ + request: new Request('https://example.com/api/rss?url=https%3A%2F%2Fblog.jaysonknight.com%2Ffeed%2F&max=5'), + } as Parameters[0]); + const payload = (await response.json()) as { items: Array<{ title: string; link: string; description: string }> }; + + expect(response.status).toBe(200); + expect(payload.items).toEqual([ + { + title: 'Edge-First Architecture', + link: 'https://blog.jaysonknight.com/2026/03/26/edge-first-architecture-why-cloudflare-workers-changes-how-you-think-about-design/', + pubDate: 'Fri, 27 Mar 2026 02:43:10 GMT', + description: 'Cloudflare’s distributed model...', + }, + ]); + + const fallbackCall = fetchMock.mock.calls[1]; + expect(fallbackCall).toBeDefined(); + const [fallbackUrl, fallbackOptions] = fallbackCall; + expect(fallbackUrl).toBeInstanceOf(URL); + expect((fallbackUrl as URL).toString()).toContain('https://blog.jaysonknight.com/wp-json/wp/v2/posts?'); + expect((fallbackUrl as URL).searchParams.get('per_page')).toBe('5'); + expect((fallbackUrl as URL).searchParams.get('_fields')).toBe('link,title.rendered,excerpt.rendered,date,date_gmt'); + expect(fallbackOptions).toEqual( + expect.objectContaining({ + cache: 'no-store', + signal: expect.any(AbortSignal), + headers: expect.objectContaining({ + Accept: 'application/json', + 'User-Agent': 'Mozilla/5.0 (compatible; JKcom-RSSBot/1.0; +https://jaysonknight.com)', + }), + }) + ); + }); }); diff --git a/src/pages/api/rss.ts b/src/pages/api/rss.ts index 3df9bdb..fc0c130 100644 --- a/src/pages/api/rss.ts +++ b/src/pages/api/rss.ts @@ -11,7 +11,7 @@ type FeedItem = { const extractTag = (block: string, tags: string[]): string => { for (const tag of tags) { - const match = block.match(new RegExp(`<${tag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tag}>`, 'i')); + const match = block.match(new RegExp(`<${tag}(?:\s[^>]*)?>([\s\S]*?)<\/${tag}>`, 'i')); if (match?.[1]) { return match[1].trim(); } @@ -22,20 +22,34 @@ const extractTag = (block: string, tags: string[]): string => { const stripCdata = (value: string): string => value.replace(/^$/s, '$1').trim(); +const safeFromCodePoint = (codePoint: number): string => { + if (codePoint >= 0 && codePoint <= 0x10ffff) { + return String.fromCodePoint(codePoint); + } + return '\uFFFD'; +}; + const decodeXmlEntities = (value: string): string => value + .replace(/&#x([0-9a-f]+);/gi, (_match, hex: string) => safeFromCodePoint(Number.parseInt(hex, 16))) + .replace(/&#(\d+);/g, (_match, dec: string) => safeFromCodePoint(Number.parseInt(dec, 10))) .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/&/g, '&'); -const cleanDescription = (value: string): string => { - const text = decodeXmlEntities(stripCdata(value)) +/** Strips HTML tags, decodes entities, and normalises whitespace — without truncation. */ +const cleanText = (value: string): string => + decodeXmlEntities(stripCdata(value)) .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .trim(); +/** Like cleanText but truncates to 200 characters with an ellipsis. */ +const cleanDescription = (value: string): string => { + const text = cleanText(value); + if (text.length <= 200) { return text; } @@ -104,6 +118,74 @@ const getMax = (value: string | null): number => { return Math.max(1, Math.min(parsed, 20)); }; +/** + * Wraps fetch with a timeout. Uses AbortSignal.timeout when available; otherwise + * falls back to AbortController + setTimeout and always clears the timer in a + * finally block so the event loop is never kept alive by a stale timer. + */ +const fetchWithTimeout = async ( + input: RequestInfo | URL, + init: Omit = {}, + timeoutMs = 8000 +): Promise => { + if (typeof AbortSignal.timeout === 'function') { + return fetch(input, { ...init, signal: AbortSignal.timeout(timeoutMs) }); + } + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + try { + return await fetch(input, { ...init, signal: controller.signal }); + } finally { + clearTimeout(timeoutId); + } +}; + +const parseWordPressPosts = ( + payload: unknown +): Array<{ title: string; link: string; pubDate: string; description: string }> | null => { + if (!Array.isArray(payload)) { + return null; + } + + const items = payload + .map((post) => { + const source = typeof post === 'object' && post ? (post as Record) : null; + const link = typeof source?.link === 'string' ? source.link.trim() : ''; + const titleRendered = + source?.title && typeof source.title === 'object' + ? (source.title as Record).rendered + : undefined; + const excerptRendered = + source?.excerpt && typeof source.excerpt === 'object' + ? (source.excerpt as Record).rendered + : undefined; + const dateGmt = typeof source?.date_gmt === 'string' ? source.date_gmt.trim() : ''; + const date = typeof source?.date === 'string' ? source.date.trim() : ''; + // Use cleanText (no truncation) for titles so long titles are not silently cut off. + const title = cleanText(typeof titleRendered === 'string' ? titleRendered : '') || 'Untitled'; + const pubDateSource = dateGmt ? `${dateGmt}Z` : date; + const parsedPubDate = pubDateSource ? new Date(pubDateSource) : null; + const pubDate = parsedPubDate && !Number.isNaN(parsedPubDate.getTime()) ? parsedPubDate.toUTCString() : pubDateSource; + + return { + title, + link, + pubDate, + description: cleanDescription(typeof excerptRendered === 'string' ? excerptRendered : ''), + }; + }) + .filter((item) => item.link); + + return items; +}; + +const respondWithItems = (items: FeedItem[]): Response => + new Response(JSON.stringify({ items }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + export const GET: APIRoute = async ({ request }) => { const requestUrl = new URL(request.url); const urlParam = requestUrl.searchParams.get('url')?.trim() ?? ''; @@ -127,18 +209,45 @@ export const GET: APIRoute = async ({ request }) => { }); } const sanitizedFeedUrl = sanitizeUrlForLog(parsedFeedUrl); + const tryWordPressFallback = async (): Promise => { + try { + const wpApiUrl = new URL('/wp-json/wp/v2/posts', `${parsedFeedUrl.origin}/`); + wpApiUrl.searchParams.set('per_page', String(max)); + wpApiUrl.searchParams.set('_fields', 'link,title.rendered,excerpt.rendered,date,date_gmt'); + + const response = await fetchWithTimeout(wpApiUrl, { + headers: { + Accept: 'application/json', + 'User-Agent': 'Mozilla/5.0 (compatible; JKcom-RSSBot/1.0; +https://jaysonknight.com)', + }, + cache: 'no-store', + }); + + if (!response.ok) { + return null; + } + + const items = parseWordPressPosts(await response.json()); + return items?.slice(0, max) ?? null; + } catch { + return null; + } + }; try { - const response = await fetch(feedUrl, { + const response = await fetchWithTimeout(feedUrl, { headers: { Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*', 'User-Agent': 'Mozilla/5.0 (compatible; JKcom-RSSBot/1.0; +https://jaysonknight.com)', }, cache: 'no-store', - signal: AbortSignal.timeout(8000), }); if (!response.ok) { + const fallbackItems = await tryWordPressFallback(); + if (fallbackItems) { + return respondWithItems(fallbackItems); + } console.error('[api/rss] Failed to fetch feed with non-OK status:', response.status, 'for URL:', sanitizedFeedUrl); return new Response(JSON.stringify({ error: `Failed to fetch feed (${response.status}).` }), { status: 502, @@ -149,6 +258,10 @@ export const GET: APIRoute = async ({ request }) => { const contentType = response.headers.get('Content-Type')?.toLowerCase() ?? ''; const mimeType = contentType.split(';', 1)[0]?.trim() ?? ''; if (mimeType === 'text/html') { + const fallbackItems = await tryWordPressFallback(); + if (fallbackItems) { + return respondWithItems(fallbackItems); + } console.error('[api/rss] Feed returned text/html — likely bot challenge for URL:', sanitizedFeedUrl); return new Response( JSON.stringify({ error: 'Feed returned an HTML page instead of XML (possible bot challenge or redirect)' }), @@ -161,6 +274,10 @@ export const GET: APIRoute = async ({ request }) => { const xml = await response.text(); if (!isValidFeedDocument(xml)) { + const fallbackItems = await tryWordPressFallback(); + if (fallbackItems) { + return respondWithItems(fallbackItems); + } console.error('[api/rss] Response body is not a valid RSS/Atom document for URL:', sanitizedFeedUrl); return new Response(JSON.stringify({ error: 'Feed URL did not return a valid RSS or Atom document' }), { status: 502, @@ -169,12 +286,19 @@ export const GET: APIRoute = async ({ request }) => { } const items = parseRssOrAtom(xml, max); + if (items.length === 0) { + const fallbackItems = await tryWordPressFallback(); + if (fallbackItems) { + return respondWithItems(fallbackItems); + } + } - return new Response(JSON.stringify({ items }), { - status: 200, - headers: { 'Content-Type': 'application/json' }, - }); + return respondWithItems(items); } catch (error) { + const fallbackItems = await tryWordPressFallback(); + if (fallbackItems) { + return respondWithItems(fallbackItems); + } const errorName = error instanceof Error ? error.name : 'UnknownError'; const errorMessage = error instanceof Error @@ -186,4 +310,4 @@ export const GET: APIRoute = async ({ request }) => { headers: { 'Content-Type': 'application/json' }, }); } -}; +}; \ No newline at end of file