diff --git a/CHANGELOG.md b/CHANGELOG.md index 07a20138da..4d08cf3781 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -162,6 +162,7 @@ Breaking changes in this release: - Added Speech-to-Speech (S2S) support for real-time voice conversations, in PR [#5654](https://github.com/microsoft/BotFramework-WebChat/pull/5654), by [@pranavjoshi](https://github.com/pranavjoshi001) - Added core mute/unmute functionality for speech-to-speech via `useRecorder` hook (silent chunks keep server connection alive), in PR [#5688](https://github.com/microsoft/BotFramework-WebChat/pull/5688), by [@pranavjoshi](https://github.com/pranavjoshi001) - ๐Ÿงช Added incremental streaming Markdown renderer for livestreaming, in PR [#5799](https://github.com/microsoft/BotFramework-WebChat/pull/5799), by [@OEvgeny](https://github.com/OEvgeny) + - Fixed streaming Markdown renderer to preserve link reference definitions during incremental rendering and recover on error, in PR [#5808](https://github.com/microsoft/BotFramework-WebChat/pull/5808), by [@OEvgeny](https://github.com/OEvgeny) ### Changed diff --git a/packages/bundle/src/markdown/createStreamingRenderer.spec.ts b/packages/bundle/src/markdown/createStreamingRenderer.spec.ts index bcfd82f8c0..02f557d3e7 100644 --- a/packages/bundle/src/markdown/createStreamingRenderer.spec.ts +++ b/packages/bundle/src/markdown/createStreamingRenderer.spec.ts @@ -1,7 +1,7 @@ /** @jest-environment @happy-dom/jest-environment */ /// -import createStreamingRenderer from './createStreamingRenderer'; +import createStreamingRenderer, { STREAMING_ERROR } from './createStreamingRenderer'; const OPTIONS: Parameters[0] = { markdownRespectCRLF: false @@ -11,6 +11,8 @@ const INIT: Parameters[1] = { externalLinkAlt: 'Opens in a new window' }; +let currentContainer: HTMLElement | null = null; + function setup() { const container = document.createElement('div'); @@ -18,6 +20,8 @@ function setup() { const renderer = createStreamingRenderer(OPTIONS, INIT); + currentContainer = container; + const nextOptions = () => ({ container }); return { container, nextOptions, renderer }; @@ -77,6 +81,25 @@ function splitBySentinel(container: HTMLElement): [string, string] | null { } describe('createStreamingRenderer', () => { + afterEach(() => { + if (!currentContainer) { + return; + } + + const wrapper = currentContainer.firstElementChild as HTMLElement | null; + + // eslint-disable-next-line security/detect-object-injection + if (wrapper[STREAMING_ERROR]) { + // eslint-disable-next-line security/detect-object-injection + console.warn('Streaming renderer error gonna fail the test:', wrapper[STREAMING_ERROR]); + } + + expect(wrapper?.dataset.renderError).toBeUndefined(); + expect(wrapper?.dataset.renderErrorCount).toBeUndefined(); + + currentContainer = null; + }); + describe('single block', () => { test('should render a paragraph without sentinel', () => { const { container, nextOptions, renderer } = setup(); @@ -112,6 +135,21 @@ describe('createStreamingRenderer', () => { expect(split![1]).toBe('

Second paragraph

'); }); + test('should extract link definitions during full reparse path', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next('See [link][1]\n\n[1]: https://example.com "Example"', nextOptions()); + + expect(getSentinel(container)).not.toBeNull(); + expect(container.querySelector('a[href="https://example.com"]')?.textContent).toBe('link'); + + const { definitions } = renderer.finalize(nextOptions()); + + expect(definitions).toEqual([ + expect.objectContaining({ identifier: '1', url: 'https://example.com', title: 'Example' }) + ]); + }); + test('should preserve newline between paragraphs in textContent', () => { const { container, nextOptions, renderer } = setup(); @@ -222,6 +260,21 @@ describe('createStreamingRenderer', () => { expect(split2![1]).toBe('

Partial more text

'); }); + test('should resolve active references using committed definitions during active growth', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next('First paragraph\n\n[1]: https://example.com "Example"\n\nSecond [link', nextOptions()); + + expect(getSentinel(container)).not.toBeNull(); + expect(container.querySelector('a[href="https://example.com"]')).toBeNull(); + + renderer.next('][1]', nextOptions()); + + expect(getSentinel(container)).not.toBeNull(); + + expect(container.querySelector('a[href="https://example.com"]')?.textContent).toBe('link'); + }); + test('should commit additional blocks during incremental streaming', () => { const { container, nextOptions, renderer } = setup(); @@ -242,6 +295,126 @@ describe('createStreamingRenderer', () => { expect(split2![0]).toContain('Block B'); expect(split2![1]).toBe('

Block C

'); }); + + test('should keep incremental split when committed block references a later definition', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next('First paragraph [first][1]\n\nSecond paragraph [first][1]', nextOptions()); + + expect(getSentinel(container)).not.toBeNull(); + expect(container.querySelector('a[href="https://example.com"]')).toBeNull(); + + renderer.next('\n\n[1]: https://example.com "Example"', nextOptions()); + + expect(getSentinel(container)).not.toBeNull(); + expect(container.querySelectorAll('a[href="https://example.com"]').length).toBe(1); + expect(container.querySelector('a[href="https://example.com"]')?.textContent).toBe('first'); + }); + + test('should extract link definitions when active tail grows', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next('See [link][1]\n\n', nextOptions()); + renderer.next('[1]: https://example.com "Example"', nextOptions()); + + expect(getSentinel(container)).not.toBeNull(); + expect(container.querySelector('a[href="https://example.com"]')?.textContent).toBe('link'); + + const { definitions } = renderer.finalize(nextOptions()); + + expect(definitions).toEqual([ + expect.objectContaining({ identifier: '1', url: 'https://example.com', title: 'Example' }) + ]); + }); + + test('should extract link definitions when definition block becomes committed', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next('See [link][1]\n\n', nextOptions()); + renderer.next('[1]: https://example.com "Example"\n\nTrailing', nextOptions()); + + const split = splitBySentinel(container); + + expect(split).not.toBeNull(); + expect(split![1]).toBe('

Trailing

'); + expect(container.querySelector('a[href="https://example.com"]')?.textContent).toBe('link'); + + const { definitions } = renderer.finalize(nextOptions()); + + expect(definitions).toEqual([ + expect.objectContaining({ identifier: '1', url: 'https://example.com', title: 'Example' }) + ]); + }); + + test('should extract definitions across committed and active paragraphs with shared link definitions', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next( + 'First paragraph [first][1]\n\n[1]: https://example.com "Example"\n\n' + + 'Second paragraph [first][1] [second][2]\n\n', + nextOptions() + ); + + expect(getWrapperHTML(container)).toContain('First paragraph'); + expect(getWrapperHTML(container)).toContain('Second paragraph'); + + expect(container.querySelectorAll('a[href="https://example.com"]').length).toBe(2); + expect(container.querySelector('a[href="https://example.org"]')).toBeNull(); + + renderer.next('[2]: https://example.org "Other"', nextOptions()); + + expect(container.querySelector('a[href="https://example.org"]')?.textContent).toBe('second'); + + const { definitions } = renderer.finalize(nextOptions()); + + expect(definitions).toEqual([ + expect.objectContaining({ identifier: '1', url: 'https://example.com', title: 'Example' }), + expect.objectContaining({ identifier: '2', url: 'https://example.org', title: 'Other' }) + ]); + }); + + test('should preserve definitions for later blocks committed through incremental boundary', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next( + ['First [first][1]\n\n[1]: https://example.com "Example"\n\n', 'Second [first][1]\n\n'].join(''), + nextOptions() + ); + + expect(container.querySelectorAll('a[href="https://example.com"]').length).toBe(2); + expect(getSentinel(container)).not.toBeNull(); + + renderer.next('Third [first][1]', nextOptions()); + + expect(container.querySelectorAll('a[href="https://example.com"]').length).toBe(3); + + const split = splitBySentinel(container); + + expect(split).not.toBeNull(); + expect(split![0]).toContain('First'); + expect(split![0]).toContain('Second'); + expect(split![1]).toContain('Third'); + expect(container.querySelectorAll('a[href="https://example.com"]').length).toBe(3); + expect(container.querySelectorAll('a[href="https://example.com"]').item(0)?.textContent).toBe('first'); + }); + + test('should extract link definitions after streaming content', () => { + const { container, nextOptions, renderer } = setup(); + + renderer.next('See [link][1]\n\n', nextOptions()); + renderer.next('[1]: https://example.com "Example"', nextOptions()); + + const { definitions } = renderer.finalize(nextOptions()); + + expect(definitions).toEqual([ + expect.objectContaining({ identifier: '1', url: 'https://example.com', title: 'Example' }) + ]); + + const linkElement = container.querySelector('a[href="https://example.com"]'); + + expect(linkElement).not.toBeNull(); + expect(linkElement?.textContent).toBe('link'); + }); }); describe('reset', () => { diff --git a/packages/bundle/src/markdown/createStreamingRenderer.ts b/packages/bundle/src/markdown/createStreamingRenderer.ts index 52ba2f03a9..4f65039e85 100644 --- a/packages/bundle/src/markdown/createStreamingRenderer.ts +++ b/packages/bundle/src/markdown/createStreamingRenderer.ts @@ -35,6 +35,8 @@ type StreamingRenderer = Readonly<{ reset: () => void; }>; +export const STREAMING_ERROR = Symbol('markdown streaming error'); + // Top-level block token types emitted by micromark. // An exit event at depth 0 for one of these types marks a committed block boundary. const TOP_LEVEL_BLOCK_TYPES: ReadonlySet = new Set([ @@ -77,7 +79,7 @@ function findTopLevelBlocks(events: ReadonlyArray): readonly BlockBoundar depth--; if (!depth && blocks.length) { - blocks[blocks.length - 1].endOffset = token.end.offset; + blocks.at(-1).endOffset = token.end.offset; } } } @@ -108,7 +110,6 @@ export default function createStreamingRenderer( const domParser = new DOMParser(); // Parser state. - let activeBlockStartOffset = 0; let previousMarkdown = ''; const emptyDefinitions: readonly MarkdownLinkDefinition[] = Object.freeze([]); @@ -149,12 +150,97 @@ export default function createStreamingRenderer( return wrapper; } + function setError(error: unknown, wrapper: HTMLElement) { + wrapper.dataset.renderError = String(error instanceof Error ? error.message : error); + wrapper.dataset.renderErrorCount = String(Number(wrapper.dataset.renderErrorCount || '0') + 1); + // eslint-disable-next-line security/detect-object-injection + (wrapper as any)[STREAMING_ERROR] = error; + } + + const knownDefinitions: Set = new Set(); + function extractDefinitions(events: ReadonlyArray) { + for (const [action, token, ctx] of events) { + token.type === 'definition' && action === 'exit' && knownDefinitions.add(ctx.sliceSerialize(token) + '\n'); + } + } + + let lastStepDefinitionOffset = 0; + let lastCommittedBlockEndOffset = 0; + let stepEvents: Event[] = []; + function step(markdown: string): Event[] { + const markdownTail = markdown.slice(lastCommittedBlockEndOffset); + + const doc = parse(micromarkOptions).document(); + const prep = preprocess(); + + // Ensure definitions are resolved during parse phase + if (knownDefinitions.size) { + for (const definition of knownDefinitions) { + doc.write(prep(definition, undefined, false)); + } + + const lastDefinitionTokenOffset = doc.events.at(-1)?.[1].end.offset; + if (typeof lastDefinitionTokenOffset !== 'number') { + throw new Error('Failed to extract definition token offset'); + } + + lastStepDefinitionOffset = lastDefinitionTokenOffset; + } else { + lastStepDefinitionOffset = 0; + } + + const tailEvents = doc.write(prep(markdownTail, undefined, true)); + stepEvents = postprocess(tailEvents); + return stepEvents; + } + + function commit(block: BlockBoundary): string { + const compiler = compile(micromarkOptions); + + // Extract all available definitions to prevent compiler crashes + // on definitions appearing after the block boundary. + extractDefinitions(stepEvents); + + // Rather than trying to restore compiler state, we parse and fed the definitions + // back to the compiler as if they appear in the markdown. + if (knownDefinitions.size) { + const doc = parse(micromarkOptions).document(); + const prep = preprocess(); + for (const definition of knownDefinitions) { + doc.write(prep(definition, undefined, false)); + } + + compiler(postprocess(doc.write(prep('', undefined, true)))); + } + + const newCommittedOffset = block.startOffset; + const newCommittedEvents = stepEvents.filter(([, token]) => token.start.offset < newCommittedOffset); + + // Offset the committed block by the provided boundary start + // excluding the offset of definitions upserted during the step. + lastCommittedBlockEndOffset += newCommittedOffset - lastStepDefinitionOffset; + + return compiler(newCommittedEvents); + } + + function revert() { + lastStepDefinitionOffset = 0; + stepEvents = []; + } + + function cleanup() { + revert(); + activeSentinel = null; + lastCommittedBlockEndOffset = 0; + knownDefinitions.clear(); + } + function renderNext(chunk: string, options: StreamingNextOptions): void { + const isAppend = !!previousMarkdown; previousMarkdown += chunk; if (!previousMarkdown) { - activeBlockStartOffset = 0; - activeSentinel = null; + cleanup(); const wrapper = ensureWrapper(options.container, options.containerClassName); @@ -169,137 +255,123 @@ export default function createStreamingRenderer( processedMarkdown = respectCRLFPre(processedMarkdown); } - // Incremental path: re-parse only from the last committed block boundary. - // Guard: if sentinel was lost (e.g. container changed), fall back to full reparse. - if (activeBlockStartOffset > 0) { - const wrapper = ensureWrapper(options.container, options.containerClassName); - - if (activeSentinel && wrapper.contains(activeSentinel)) { - const tailEvents = parseEvents(processedMarkdown.slice(activeBlockStartOffset)); - const tailBlocks = findTopLevelBlocks(tailEvents); - const tailHTML = compile(micromarkOptions)(tailEvents); + try { + // Incremental path: re-parse only from the last committed block boundary. + if (isAppend) { + const wrapper = ensureWrapper(options.container, options.containerClassName); - if (tailBlocks.length <= 1) { - // Fast path: active block grew, no new committed blocks. - // Replace only the active zone (after sentinel). - const activeDoc = domParser.parseFromString(tailHTML.trim(), 'text/html'); - const activeFragment = activeDoc.createDocumentFragment(); + if (activeSentinel && wrapper.contains(activeSentinel)) { + const tailEvents = step(processedMarkdown); + const tailBlocks = findTopLevelBlocks(tailEvents); + const decorate = createDecorate(emptyDefinitions, externalLinkAlt); - activeFragment.append(...Array.from(activeDoc.body.childNodes)); - betterLinkDocumentMod(activeFragment, createDecorate(emptyDefinitions, externalLinkAlt)); + if (tailBlocks.length <= 1) { + // Fast path: active block grew, no new committed blocks. + // Replace only the active zone (after sentinel). + const tailHTML = compile(micromarkOptions)(tailEvents); + const activeDoc = domParser.parseFromString(tailHTML.trim(), 'text/html'); + const activeFragment = activeDoc.createDocumentFragment(); - const activeRange = document.createRange(); + activeFragment.append(...Array.from(activeDoc.body.childNodes)); + betterLinkDocumentMod(activeFragment, decorate); - activeRange.setStartAfter(activeSentinel); - activeRange.setEndAfter(wrapper.lastChild!); - activeRange.deleteContents(); + const activeRange = document.createRange(); - wrapper.append(applyTransform(activeFragment, options.transformFragment)); - } else { - // New block boundary in tail: commit newly-finished blocks, replace active. - const newActiveOffsetInTail = tailBlocks[tailBlocks.length - 1].startOffset; - const committedTailEvents = tailEvents.filter(([, token]) => token.start.offset < newActiveOffsetInTail); - const committedTailHTML = compile(micromarkOptions)(committedTailEvents); + activeRange.setStartAfter(activeSentinel); + activeRange.setEndAfter(wrapper.lastChild!); + activeRange.deleteContents(); - activeBlockStartOffset += newActiveOffsetInTail; + wrapper.append(applyTransform(activeFragment, options.transformFragment)); + } else { + // New block boundary in tail: commit newly-finished blocks, replace active. + const committedTailHTML = commit(tailBlocks.at(-1)); + const committedDoc = domParser.parseFromString(committedTailHTML, 'text/html'); + const committedFragment = committedDoc.createDocumentFragment(); - const committedDoc = domParser.parseFromString(committedTailHTML, 'text/html'); - const committedFragment = committedDoc.createDocumentFragment(); + const activeEvents = step(processedMarkdown); + const activeHTML = compile(micromarkOptions)(activeEvents); + const activeDoc = domParser.parseFromString(activeHTML.trim(), 'text/html'); + const activeFragment = activeDoc.createDocumentFragment(); - committedFragment.append(...Array.from(committedDoc.body.childNodes)); - betterLinkDocumentMod(committedFragment, createDecorate(emptyDefinitions, externalLinkAlt)); + committedFragment.append(...Array.from(committedDoc.body.childNodes)); + betterLinkDocumentMod(committedFragment, decorate); - const remainingHTML = tailHTML.slice(committedTailHTML.length); - const activeDoc = domParser.parseFromString(remainingHTML.trim(), 'text/html'); - const activeFragment = activeDoc.createDocumentFragment(); + activeFragment.append(...Array.from(activeDoc.body.childNodes)); + betterLinkDocumentMod(activeFragment, decorate); - activeFragment.append(...Array.from(activeDoc.body.childNodes)); - betterLinkDocumentMod(activeFragment, createDecorate(emptyDefinitions, externalLinkAlt)); + // Remove old sentinel and active zone. + const tailRange = document.createRange(); - // Remove old sentinel and active zone. - const tailRange = document.createRange(); + tailRange.setStartBefore(activeSentinel); + tailRange.setEndAfter(wrapper.lastChild!); + tailRange.deleteContents(); - tailRange.setStartBefore(activeSentinel); - tailRange.setEndAfter(wrapper.lastChild!); - tailRange.deleteContents(); + // Append newly committed, new sentinel, active. + activeSentinel = document.createComment(''); - // Append newly committed, new sentinel, active. - activeSentinel = document.createComment(''); + wrapper.append( + applyTransform(committedFragment, options.transformFragment), + activeSentinel, + applyTransform(activeFragment, options.transformFragment) + ); + } - wrapper.append( - applyTransform(committedFragment, options.transformFragment), - activeSentinel, - applyTransform(activeFragment, options.transformFragment) - ); + return; } - - return; } - - // Sentinel lost โ€” reset and fall through to full reparse. - activeBlockStartOffset = 0; - activeSentinel = null; + } catch (error) { + setError(error, ensureWrapper(options.container, options.containerClassName)); } // Full reparse path. - const fullEvents = parseEvents(processedMarkdown); - const rawHTML = compile(micromarkOptions)(fullEvents); + cleanup(); + const fullEvents = step(processedMarkdown); const blocks = findTopLevelBlocks(fullEvents); + const wrapper = ensureWrapper(options.container, options.containerClassName); + const decorate = createDecorate(emptyDefinitions, externalLinkAlt); - if (blocks.length >= 2) { - const lastBlock = blocks[blocks.length - 1]; - - activeBlockStartOffset = lastBlock.startOffset; - - // Compile the active (last block) portion first โ€” it is always a single - // block and therefore cheap. Then derive the committed HTML by slicing - // the already-compiled full output so that inter-block whitespace - // (produced by lineEnding events the compiler only flushes mid-stream) - // is preserved in the committed fragment. - const activeEvents = fullEvents.filter(([, token]) => token.start.offset >= lastBlock.startOffset); - const activeHTML = compile(micromarkOptions)(activeEvents); - const committedHTML = rawHTML.slice(0, rawHTML.length - activeHTML.length); - - const committedDoc = domParser.parseFromString(committedHTML, 'text/html'); - const committedFragment = committedDoc.createDocumentFragment(); - - committedFragment.append(...Array.from(committedDoc.body.childNodes)); - - const activeDoc = domParser.parseFromString(activeHTML.trim(), 'text/html'); - const activeFragment = activeDoc.createDocumentFragment(); + try { + if (blocks.length >= 2) { + const committedHTML = commit(blocks.at(-1)); + const committedDoc = domParser.parseFromString(committedHTML, 'text/html'); + const committedFragment = committedDoc.createDocumentFragment(); - activeFragment.append(...Array.from(activeDoc.body.childNodes)); + const activeEvents = step(processedMarkdown); + const activeHTML = compile(micromarkOptions)(activeEvents); + const activeDoc = domParser.parseFromString(activeHTML.trim(), 'text/html'); + const activeFragment = activeDoc.createDocumentFragment(); - const decorate = createDecorate(emptyDefinitions, externalLinkAlt); + committedFragment.append(...Array.from(committedDoc.body.childNodes)); + betterLinkDocumentMod(committedFragment, decorate); - betterLinkDocumentMod(committedFragment, decorate); - betterLinkDocumentMod(activeFragment, decorate); + activeFragment.append(...Array.from(activeDoc.body.childNodes)); + betterLinkDocumentMod(activeFragment, decorate); - const wrapper = ensureWrapper(options.container, options.containerClassName); + activeSentinel = document.createComment(''); - activeSentinel = document.createComment(''); + wrapper.replaceChildren( + applyTransform(committedFragment, options.transformFragment), + activeSentinel, + applyTransform(activeFragment, options.transformFragment) + ); - wrapper.replaceChildren( - applyTransform(committedFragment, options.transformFragment), - activeSentinel, - applyTransform(activeFragment, options.transformFragment) - ); + return; + } + } catch (error) { + setError(error, ensureWrapper(options.container, options.containerClassName)); - return; + cleanup(); } // Single block โ€” full replace, no sentinel. - activeBlockStartOffset = 0; activeSentinel = null; + const rawHTML = compile(micromarkOptions)(fullEvents); const parsedDocument = domParser.parseFromString(rawHTML.trim(), 'text/html'); const fragment = parsedDocument.createDocumentFragment(); fragment.append(...Array.from(parsedDocument.body.childNodes)); - - betterLinkDocumentMod(fragment, createDecorate(emptyDefinitions, externalLinkAlt)); - - const wrapper = ensureWrapper(options.container, options.containerClassName); + betterLinkDocumentMod(fragment, decorate); wrapper.replaceChildren(applyTransform(fragment, options.transformFragment)); } @@ -322,36 +394,35 @@ export default function createStreamingRenderer( const fullEvents = parseEvents(processedMarkdown); const rawHTML = compile(micromarkOptions)(fullEvents); - const parsedDocument = domParser.parseFromString(rawHTML.trim(), 'text/html'); - const fragment = parsedDocument.createDocumentFragment(); - - fragment.append(...Array.from(parsedDocument.body.childNodes)); + const finalDoc = domParser.parseFromString(rawHTML.trim(), 'text/html'); + const fragment = finalDoc.createDocumentFragment(); const definitions = extractDefinitionsFromEvents(fullEvents); + const wrapper = ensureWrapper(options.container, options.containerClassName); + const decorate = createDecorate(definitions, externalLinkAlt); - betterLinkDocumentMod(fragment, createDecorate(definitions, externalLinkAlt)); + fragment.append(...Array.from(finalDoc.body.childNodes)); + betterLinkDocumentMod(fragment, decorate); - activeBlockStartOffset = 0; activeSentinel = null; // Full replace on finalize โ€” no incremental path needed. - const wrapper = ensureWrapper(options.container, options.containerClassName); - const transformedFragment = applyTransform(fragment, options.transformFragment); - - wrapper.replaceChildren(transformedFragment); + wrapper.replaceChildren(applyTransform(fragment, options.transformFragment)); return Object.freeze({ definitions }); }, next(chunk: string, options: StreamingNextOptions): void { - renderNext(chunk, options); + try { + renderNext(chunk, options); + } finally { + revert(); + } }, reset(): void { previousMarkdown = ''; - activeBlockStartOffset = 0; - activeSentinel = null; - wrapperDiv = null; + cleanup(); } }); }