From 697e3f235ca5f2500b7b594b25f4392dc6ca609e Mon Sep 17 00:00:00 2001 From: Charly Gomez Date: Fri, 20 Mar 2026 17:04:56 +0100 Subject: [PATCH 1/4] wip --- .../node-runtime-metrics/scenario-opt-out.ts | 28 ++ .../suites/node-runtime-metrics/scenario.ts | 23 ++ .../suites/node-runtime-metrics/test.ts | 48 ++++ packages/node-core/src/common-exports.ts | 1 + .../src/integrations/nodeRuntimeMetrics.ts | 150 ++++++++++ .../integrations/nodeRuntimeMetrics.test.ts | 271 ++++++++++++++++++ packages/node/src/index.ts | 2 + 7 files changed, 523 insertions(+) create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts create mode 100644 packages/node-core/src/integrations/nodeRuntimeMetrics.ts create mode 100644 packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts new file mode 100644 index 000000000000..f6a62c561ceb --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts @@ -0,0 +1,28 @@ +import * as Sentry from '@sentry/node'; +import { loggingTransport } from '@sentry-internal/node-integration-tests'; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + release: '1.0.0', + environment: 'test', + transport: loggingTransport, + integrations: [ + Sentry.nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 100, + collect: { + cpu: false, + eventLoopDelay: false, + eventLoopUtilization: false, + uptime: false, + }, + }), + ], +}); + +async function run(): Promise { + await new Promise(resolve => setTimeout(resolve, 250)); + await Sentry.flush(); +} + +// eslint-disable-next-line @typescript-eslint/no-floating-promises +run(); diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts new file mode 100644 index 000000000000..b862634c719a --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario.ts @@ -0,0 +1,23 @@ +import * as Sentry from '@sentry/node'; +import { loggingTransport } from '@sentry-internal/node-integration-tests'; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + release: '1.0.0', + environment: 'test', + transport: loggingTransport, + integrations: [ + Sentry.nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 100, + }), + ], +}); + +async function run(): Promise { + // Wait long enough for the collection interval to fire at least once. + await new Promise(resolve => setTimeout(resolve, 250)); + await Sentry.flush(); +} + +// eslint-disable-next-line @typescript-eslint/no-floating-promises +run(); diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts new file mode 100644 index 000000000000..e4af3f5f0918 --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts @@ -0,0 +1,48 @@ +import { afterAll, describe, expect, test } from 'vitest'; +import { cleanupChildProcesses, createRunner } from '../../utils/runner'; + +describe('nodeRuntimeMetricsIntegration', () => { + afterAll(() => { + cleanupChildProcesses(); + }); + + test('emits runtime metrics', async () => { + const runner = createRunner(__dirname, 'scenario.ts') + .expect({ + trace_metric: { + items: expect.arrayContaining([ + expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.heap_total', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.heap_used', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.external', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.cpu.user', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.cpu.system', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.cpu.percent', type: 'gauge', unit: '1' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.min', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.max', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.mean', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p50', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p90', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p99', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.utilization', type: 'gauge', unit: '1' }), + expect.objectContaining({ name: 'node.runtime.process.uptime', type: 'counter', unit: 'second' }), + ]), + }, + }) + .start(); + + await runner.completed(); + }); + + test('respects opt-out options', async () => { + const runner = createRunner(__dirname, 'scenario-opt-out.ts') + .expect({ + trace_metric: { + items: expect.arrayContaining([expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge' })]), + }, + }) + .start(); + + await runner.completed(); + }); +}); diff --git a/packages/node-core/src/common-exports.ts b/packages/node-core/src/common-exports.ts index 3fff4100b352..d6d1e070ef85 100644 --- a/packages/node-core/src/common-exports.ts +++ b/packages/node-core/src/common-exports.ts @@ -12,6 +12,7 @@ import * as logger from './logs/exports'; // Node-core integrations (not OTel-dependent) export { nodeContextIntegration } from './integrations/context'; +export { nodeRuntimeMetricsIntegration, type NodeRuntimeMetricsOptions } from './integrations/nodeRuntimeMetrics'; export { contextLinesIntegration } from './integrations/contextlines'; export { localVariablesIntegration } from './integrations/local-variables'; export { modulesIntegration } from './integrations/modules'; diff --git a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts new file mode 100644 index 000000000000..22b20cc7b8e1 --- /dev/null +++ b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts @@ -0,0 +1,150 @@ +import { monitorEventLoopDelay, performance } from 'perf_hooks'; +import { defineIntegration, flushIfServerless, metrics } from '@sentry/core'; + +const INTEGRATION_NAME = 'NodeRuntimeMetrics'; +const DEFAULT_INTERVAL_MS = 30_000; + +export interface NodeRuntimeMetricsOptions { + /** + * Which metric groups to collect. All groups are enabled by default. + */ + collect?: { + cpu?: boolean; + memory?: boolean; + eventLoopDelay?: boolean; + eventLoopUtilization?: boolean; + uptime?: boolean; + }; + /** + * How often to collect metrics, in milliseconds. + * @default 30000 + */ + collectionIntervalMs?: number; +} + +/** + * Automatically collects Node.js runtime metrics and emits them to Sentry. + * + * @example + * ```ts + * Sentry.init({ + * integrations: [ + * Sentry.nodeRuntimeMetricsIntegration(), + * ], + * }); + * ``` + */ +export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRuntimeMetricsOptions = {}) => { + const collectionIntervalMs = options.collectionIntervalMs ?? DEFAULT_INTERVAL_MS; + const collect = { + cpu: true, + memory: true, + eventLoopDelay: true, + eventLoopUtilization: true, + uptime: true, + ...options.collect, + }; + + let intervalId: ReturnType | undefined; + let prevCpuUsage: NodeJS.CpuUsage | undefined; + let prevElu: ReturnType | undefined; + let prevFlushTime: number | undefined; + let eventLoopDelayHistogram: ReturnType | undefined; + + function collectMetrics(): void { + const now = Date.now(); + const elapsed = now - (prevFlushTime ?? now); + + if (collect.cpu && prevCpuUsage !== undefined) { + const delta = process.cpuUsage(prevCpuUsage); + metrics.gauge('node.runtime.cpu.user', delta.user / 1e6, { unit: 'second' }); + metrics.gauge('node.runtime.cpu.system', delta.system / 1e6, { unit: 'second' }); + if (elapsed > 0) { + // Ratio of CPU time to wall-clock time. Can exceed 1.0 on multi-core systems. + // TODO: In cluster mode, add a runtime_id/process_id attribute to disambiguate per-worker metrics. + metrics.gauge('node.runtime.cpu.percent', (delta.user + delta.system) / (elapsed * 1000), { unit: '1' }); + } + prevCpuUsage = process.cpuUsage(); + } + + if (collect.memory) { + const mem = process.memoryUsage(); + metrics.gauge('node.runtime.mem.rss', mem.rss, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.heap_total', mem.heapTotal, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.heap_used', mem.heapUsed, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.external', mem.external, { unit: 'byte' }); + if (mem.arrayBuffers !== undefined) { + metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' }); + } + } + + if (collect.eventLoopDelay && eventLoopDelayHistogram) { + // Resolution is 10ms (10_000_000 ns) as configured below. Subtract it to normalize out sampling overhead. + const resolutionNs = 10_000_000; + const nsToS = (ns: number): number => Math.max(0, (ns - resolutionNs) / 1e9); + + metrics.gauge('node.runtime.event_loop.delay.min', nsToS(eventLoopDelayHistogram.min), { unit: 'second' }); + metrics.gauge('node.runtime.event_loop.delay.max', nsToS(eventLoopDelayHistogram.max), { unit: 'second' }); + metrics.gauge('node.runtime.event_loop.delay.mean', nsToS(eventLoopDelayHistogram.mean), { unit: 'second' }); + metrics.gauge('node.runtime.event_loop.delay.p50', nsToS(eventLoopDelayHistogram.percentile(50)), { + unit: 'second', + }); + metrics.gauge('node.runtime.event_loop.delay.p90', nsToS(eventLoopDelayHistogram.percentile(90)), { + unit: 'second', + }); + metrics.gauge('node.runtime.event_loop.delay.p99', nsToS(eventLoopDelayHistogram.percentile(99)), { + unit: 'second', + }); + + eventLoopDelayHistogram.reset(); + } + + if (collect.eventLoopUtilization && prevElu !== undefined) { + const currentElu = performance.eventLoopUtilization(); + const delta = performance.eventLoopUtilization(currentElu, prevElu); + metrics.gauge('node.runtime.event_loop.utilization', delta.utilization, { unit: '1' }); + prevElu = currentElu; + } + + if (collect.uptime && elapsed > 0) { + metrics.count('node.runtime.process.uptime', elapsed / 1000, { unit: 'second' }); + } + + prevFlushTime = now; + } + + return { + name: INTEGRATION_NAME, + + setup(): void { + if (collect.eventLoopDelay) { + try { + eventLoopDelayHistogram = monitorEventLoopDelay({ resolution: 10 }); + eventLoopDelayHistogram.enable(); + } catch { + // Not available in all runtimes (e.g. Bun throws NotImplementedError). + } + } + + // Prime baselines before the first collection interval. + if (collect.cpu) { + prevCpuUsage = process.cpuUsage(); + } + if (collect.eventLoopUtilization) { + prevElu = performance.eventLoopUtilization(); + } + prevFlushTime = Date.now(); + + intervalId = setInterval(collectMetrics, collectionIntervalMs); + // Do not keep the process alive solely for metric collection. + intervalId.unref(); + + // In serverless environments the process may not live long enough to hit the interval. + // Collect and flush eagerly whenever the event loop drains (end of invocation). + process.on('beforeExit', () => { + collectMetrics(); + void flushIfServerless(); + }); + }, + }; +}); diff --git a/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts new file mode 100644 index 000000000000..f970fbcd9148 --- /dev/null +++ b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts @@ -0,0 +1,271 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { metrics } from '@sentry/core'; +import { nodeRuntimeMetricsIntegration } from '../../src/integrations/nodeRuntimeMetrics'; + +const { mockHistogram, mockMonitorEventLoopDelay, mockPerformance } = vi.hoisted(() => { + const mockHistogram = { + min: 2_000_000, + max: 20_000_000, + mean: 10_000_000, + percentile: vi.fn((p: number) => { + if (p === 50) return 8_000_000; + if (p === 90) return 15_000_000; + if (p === 99) return 19_000_000; + return 0; + }), + enable: vi.fn(), + reset: vi.fn(), + disable: vi.fn(), + }; + + const mockMonitorEventLoopDelay = vi.fn(() => mockHistogram); + const mockElu = { idle: 700, active: 300, utilization: 0.3 }; + const mockEluDelta = { idle: 700, active: 300, utilization: 0.3 }; + const mockPerformance = { + eventLoopUtilization: vi.fn((curr?: object, _prev?: object) => { + if (curr) return mockEluDelta; + return mockElu; + }), + }; + + return { mockHistogram, mockMonitorEventLoopDelay, mockPerformance }; +}); + +vi.mock('perf_hooks', () => ({ + monitorEventLoopDelay: mockMonitorEventLoopDelay, + performance: mockPerformance, +})); + +vi.mock('@sentry/core', async () => { + const actual = await vi.importActual('@sentry/core'); + return { + ...actual, + flushIfServerless: vi.fn(), + }; +}); + +describe('nodeRuntimeMetricsIntegration', () => { + let gaugeSpy: ReturnType; + let countSpy: ReturnType; + + beforeEach(() => { + vi.useFakeTimers(); + gaugeSpy = vi.spyOn(metrics, 'gauge'); + countSpy = vi.spyOn(metrics, 'count'); + + vi.spyOn(process, 'cpuUsage').mockReturnValue({ user: 500_000, system: 200_000 }); + vi.spyOn(process, 'memoryUsage').mockReturnValue({ + rss: 50_000_000, + heapTotal: 30_000_000, + heapUsed: 20_000_000, + external: 1_000_000, + arrayBuffers: 500_000, + }); + + mockHistogram.percentile.mockClear(); + mockHistogram.enable.mockClear(); + mockHistogram.reset.mockClear(); + mockMonitorEventLoopDelay.mockClear(); + mockPerformance.eventLoopUtilization.mockClear(); + }); + + afterEach(() => { + vi.useRealTimers(); + vi.restoreAllMocks(); + }); + + it('has the correct name', () => { + const integration = nodeRuntimeMetricsIntegration(); + expect(integration.name).toBe('NodeRuntimeMetrics'); + }); + + describe('setup', () => { + it('initializes event loop delay histogram with resolution 10', () => { + const integration = nodeRuntimeMetricsIntegration(); + integration.setup(); + + expect(mockMonitorEventLoopDelay).toHaveBeenCalledWith({ resolution: 10 }); + expect(mockHistogram.enable).toHaveBeenCalledOnce(); + }); + + it('does not throw if monitorEventLoopDelay is unavailable (e.g. Bun)', () => { + mockMonitorEventLoopDelay.mockImplementationOnce(() => { + throw new Error('NotImplementedError'); + }); + + const integration = nodeRuntimeMetricsIntegration(); + expect(() => integration.setup()).not.toThrow(); + }); + + it('starts a collection interval', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + + expect(gaugeSpy).not.toHaveBeenCalled(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).toHaveBeenCalled(); + }); + }); + + describe('metric collection', () => { + it('emits CPU metrics', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.user', expect.any(Number), { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.system', expect.any(Number), { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.percent', expect.any(Number), { unit: '1' }); + }); + + it('emits memory metrics', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.rss', 50_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_total', 30_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_used', 20_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.external', 1_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.array_buffers', 500_000, { unit: 'byte' }); + }); + + it('emits event loop delay metrics and resets histogram after collection', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + // min: (2_000_000 - 10_000_000) clamped to 0 → 0s + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.min', 0, { unit: 'second' }); + // max: (20_000_000 - 10_000_000) / 1e9 → 0.01s + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.max', 0.01, { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.mean', 0, { unit: 'second' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p50', expect.any(Number), { + unit: 'second', + }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p90', expect.any(Number), { + unit: 'second', + }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p99', expect.any(Number), { + unit: 'second', + }); + expect(mockHistogram.reset).toHaveBeenCalledOnce(); + }); + + it('emits event loop utilization metric', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.utilization', 0.3, { unit: '1' }); + }); + + it('emits uptime counter', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(countSpy).toHaveBeenCalledWith('node.runtime.process.uptime', expect.any(Number), { unit: 'second' }); + }); + + it('does not emit event loop delay metrics if monitorEventLoopDelay threw', () => { + mockMonitorEventLoopDelay.mockImplementationOnce(() => { + throw new Error('NotImplementedError'); + }); + + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith( + 'node.runtime.event_loop.delay.min', + expect.anything(), + expect.anything(), + ); + }); + }); + + describe('opt-out', () => { + it('skips CPU metrics when collect.cpu is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { cpu: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.user', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.system', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.percent', expect.anything(), expect.anything()); + }); + + it('skips memory metrics when collect.memory is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { memory: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.mem.rss', expect.anything(), expect.anything()); + }); + + it('skips event loop delay when collect.eventLoopDelay is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { eventLoopDelay: false }, + }); + integration.setup(); + + expect(mockMonitorEventLoopDelay).not.toHaveBeenCalled(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).not.toHaveBeenCalledWith( + 'node.runtime.event_loop.delay.min', + expect.anything(), + expect.anything(), + ); + }); + + it('skips event loop utilization when collect.eventLoopUtilization is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { eventLoopUtilization: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith( + 'node.runtime.event_loop.utilization', + expect.anything(), + expect.anything(), + ); + }); + + it('skips uptime when collect.uptime is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { uptime: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(countSpy).not.toHaveBeenCalledWith('node.runtime.process.uptime', expect.anything(), expect.anything()); + }); + }); + + describe('serverless flush', () => { + it('collects metrics and calls flushIfServerless on beforeExit', async () => { + const { flushIfServerless } = await import('@sentry/core'); + const flushSpy = vi.mocked(flushIfServerless); + + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 60_000 }); + integration.setup(); + + // Interval has not fired yet — beforeExit should still trigger a collection + flush. + process.emit('beforeExit', 0); + + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.rss', expect.any(Number), { unit: 'byte' }); + expect(flushSpy).toHaveBeenCalled(); + }); + }); +}); diff --git a/packages/node/src/index.ts b/packages/node/src/index.ts index 8458dee5f6a7..67fe97e59300 100644 --- a/packages/node/src/index.ts +++ b/packages/node/src/index.ts @@ -188,6 +188,8 @@ export { spotlightIntegration, childProcessIntegration, processSessionIntegration, + nodeRuntimeMetricsIntegration, + type NodeRuntimeMetricsOptions, pinoIntegration, createSentryWinstonTransport, SentryContextManager, From 1b973a9f6f4524d5e671e4f8eef31965bf1a367a Mon Sep 17 00:00:00 2001 From: Charly Gomez Date: Mon, 23 Mar 2026 10:52:21 +0100 Subject: [PATCH 2/4] fix units --- .../suites/node-runtime-metrics/test.ts | 4 +-- .../src/integrations/nodeRuntimeMetrics.ts | 28 ++++++++++--------- .../integrations/nodeRuntimeMetrics.test.ts | 6 ++-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts index e4af3f5f0918..6b8593c30c04 100644 --- a/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts @@ -17,14 +17,14 @@ describe('nodeRuntimeMetricsIntegration', () => { expect.objectContaining({ name: 'node.runtime.mem.external', type: 'gauge', unit: 'byte' }), expect.objectContaining({ name: 'node.runtime.cpu.user', type: 'gauge', unit: 'second' }), expect.objectContaining({ name: 'node.runtime.cpu.system', type: 'gauge', unit: 'second' }), - expect.objectContaining({ name: 'node.runtime.cpu.percent', type: 'gauge', unit: '1' }), + expect.objectContaining({ name: 'node.runtime.cpu.utilization', type: 'gauge' }), expect.objectContaining({ name: 'node.runtime.event_loop.delay.min', type: 'gauge', unit: 'second' }), expect.objectContaining({ name: 'node.runtime.event_loop.delay.max', type: 'gauge', unit: 'second' }), expect.objectContaining({ name: 'node.runtime.event_loop.delay.mean', type: 'gauge', unit: 'second' }), expect.objectContaining({ name: 'node.runtime.event_loop.delay.p50', type: 'gauge', unit: 'second' }), expect.objectContaining({ name: 'node.runtime.event_loop.delay.p90', type: 'gauge', unit: 'second' }), expect.objectContaining({ name: 'node.runtime.event_loop.delay.p99', type: 'gauge', unit: 'second' }), - expect.objectContaining({ name: 'node.runtime.event_loop.utilization', type: 'gauge', unit: '1' }), + expect.objectContaining({ name: 'node.runtime.event_loop.utilization', type: 'gauge' }), expect.objectContaining({ name: 'node.runtime.process.uptime', type: 'counter', unit: 'second' }), ]), }, diff --git a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts index 22b20cc7b8e1..26e7e08a3df6 100644 --- a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts +++ b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts @@ -1,5 +1,5 @@ import { monitorEventLoopDelay, performance } from 'perf_hooks'; -import { defineIntegration, flushIfServerless, metrics } from '@sentry/core'; +import { defineIntegration, flushIfServerless, metrics, safeDateNow } from '@sentry/core'; const INTEGRATION_NAME = 'NodeRuntimeMetrics'; const DEFAULT_INTERVAL_MS = 30_000; @@ -48,12 +48,12 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun let intervalId: ReturnType | undefined; let prevCpuUsage: NodeJS.CpuUsage | undefined; let prevElu: ReturnType | undefined; - let prevFlushTime: number | undefined; + let prevFlushTime: number; let eventLoopDelayHistogram: ReturnType | undefined; function collectMetrics(): void { - const now = Date.now(); - const elapsed = now - (prevFlushTime ?? now); + const now = safeDateNow(); + const elapsed = now - prevFlushTime; if (collect.cpu && prevCpuUsage !== undefined) { const delta = process.cpuUsage(prevCpuUsage); @@ -62,7 +62,7 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun if (elapsed > 0) { // Ratio of CPU time to wall-clock time. Can exceed 1.0 on multi-core systems. // TODO: In cluster mode, add a runtime_id/process_id attribute to disambiguate per-worker metrics. - metrics.gauge('node.runtime.cpu.percent', (delta.user + delta.system) / (elapsed * 1000), { unit: '1' }); + metrics.gauge('node.runtime.cpu.utilization', (delta.user + delta.system) / (elapsed * 1000)); } prevCpuUsage = process.cpuUsage(); } @@ -73,9 +73,7 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun metrics.gauge('node.runtime.mem.heap_total', mem.heapTotal, { unit: 'byte' }); metrics.gauge('node.runtime.mem.heap_used', mem.heapUsed, { unit: 'byte' }); metrics.gauge('node.runtime.mem.external', mem.external, { unit: 'byte' }); - if (mem.arrayBuffers !== undefined) { - metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' }); - } + metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' }); } if (collect.eventLoopDelay && eventLoopDelayHistogram) { @@ -102,7 +100,7 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun if (collect.eventLoopUtilization && prevElu !== undefined) { const currentElu = performance.eventLoopUtilization(); const delta = performance.eventLoopUtilization(currentElu, prevElu); - metrics.gauge('node.runtime.event_loop.utilization', delta.utilization, { unit: '1' }); + metrics.gauge('node.runtime.event_loop.utilization', delta.utilization); prevElu = currentElu; } @@ -133,15 +131,19 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun if (collect.eventLoopUtilization) { prevElu = performance.eventLoopUtilization(); } - prevFlushTime = Date.now(); + prevFlushTime = safeDateNow(); + // Guard against double setup (e.g. re-init). + if (intervalId) { + clearInterval(intervalId); + } intervalId = setInterval(collectMetrics, collectionIntervalMs); // Do not keep the process alive solely for metric collection. intervalId.unref(); - // In serverless environments the process may not live long enough to hit the interval. - // Collect and flush eagerly whenever the event loop drains (end of invocation). - process.on('beforeExit', () => { + // Collect and flush at the end of every invocation. In non-serverless environments + // flushIfServerless is a no-op, so this is safe to call unconditionally. + process.once('beforeExit', () => { collectMetrics(); void flushIfServerless(); }); diff --git a/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts index f970fbcd9148..02355a8efbae 100644 --- a/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts +++ b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts @@ -115,7 +115,7 @@ describe('nodeRuntimeMetricsIntegration', () => { expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.user', expect.any(Number), { unit: 'second' }); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.system', expect.any(Number), { unit: 'second' }); - expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.percent', expect.any(Number), { unit: '1' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.utilization', expect.any(Number)); }); it('emits memory metrics', () => { @@ -157,7 +157,7 @@ describe('nodeRuntimeMetricsIntegration', () => { integration.setup(); vi.advanceTimersByTime(1_000); - expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.utilization', 0.3, { unit: '1' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.utilization', 0.3); }); it('emits uptime counter', () => { @@ -196,7 +196,7 @@ describe('nodeRuntimeMetricsIntegration', () => { expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.user', expect.anything(), expect.anything()); expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.system', expect.anything(), expect.anything()); - expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.percent', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.utilization', expect.anything(), expect.anything()); }); it('skips memory metrics when collect.memory is false', () => { From 596caeca759f2e5b09418adfbd2db1ffc209b14f Mon Sep 17 00:00:00 2001 From: Charly Gomez Date: Mon, 23 Mar 2026 11:22:25 +0100 Subject: [PATCH 3/4] . --- packages/node-core/src/integrations/nodeRuntimeMetrics.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts index 26e7e08a3df6..206f9fdfba44 100644 --- a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts +++ b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts @@ -1,5 +1,5 @@ import { monitorEventLoopDelay, performance } from 'perf_hooks'; -import { defineIntegration, flushIfServerless, metrics, safeDateNow } from '@sentry/core'; +import { _INTERNAL_safeDateNow, defineIntegration, flushIfServerless, metrics } from '@sentry/core'; const INTEGRATION_NAME = 'NodeRuntimeMetrics'; const DEFAULT_INTERVAL_MS = 30_000; @@ -52,7 +52,7 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun let eventLoopDelayHistogram: ReturnType | undefined; function collectMetrics(): void { - const now = safeDateNow(); + const now = _INTERNAL_safeDateNow(); const elapsed = now - prevFlushTime; if (collect.cpu && prevCpuUsage !== undefined) { @@ -131,7 +131,7 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun if (collect.eventLoopUtilization) { prevElu = performance.eventLoopUtilization(); } - prevFlushTime = safeDateNow(); + prevFlushTime = _INTERNAL_safeDateNow(); // Guard against double setup (e.g. re-init). if (intervalId) { From c330b28bf04cdffd1b8cda0b43b008a8e3025ed8 Mon Sep 17 00:00:00 2001 From: Charly Gomez Date: Mon, 23 Mar 2026 19:35:54 +0100 Subject: [PATCH 4/4] trim down defaults --- .../node-runtime-metrics/scenario-all.ts | 30 ++++ .../node-runtime-metrics/scenario-opt-out.ts | 5 +- .../suites/node-runtime-metrics/test.ts | 23 ++- .../src/integrations/nodeRuntimeMetrics.ts | 135 +++++++++++++----- .../integrations/nodeRuntimeMetrics.test.ts | 135 ++++++++++++++---- 5 files changed, 262 insertions(+), 66 deletions(-) create mode 100644 dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-all.ts diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-all.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-all.ts new file mode 100644 index 000000000000..e995482fafbf --- /dev/null +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-all.ts @@ -0,0 +1,30 @@ +import * as Sentry from '@sentry/node'; +import { loggingTransport } from '@sentry-internal/node-integration-tests'; + +Sentry.init({ + dsn: 'https://public@dsn.ingest.sentry.io/1337', + release: '1.0.0', + environment: 'test', + transport: loggingTransport, + integrations: [ + Sentry.nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 100, + collect: { + cpuTime: true, + memExternal: true, + eventLoopDelayMin: true, + eventLoopDelayMax: true, + eventLoopDelayMean: true, + eventLoopDelayP90: true, + }, + }), + ], +}); + +async function run(): Promise { + await new Promise(resolve => setTimeout(resolve, 250)); + await Sentry.flush(); +} + +// eslint-disable-next-line @typescript-eslint/no-floating-promises +run(); diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts index f6a62c561ceb..bfa1de9f9cdc 100644 --- a/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/scenario-opt-out.ts @@ -10,8 +10,9 @@ Sentry.init({ Sentry.nodeRuntimeMetricsIntegration({ collectionIntervalMs: 100, collect: { - cpu: false, - eventLoopDelay: false, + cpuUtilization: false, + cpuTime: false, + eventLoopDelayP99: false, eventLoopUtilization: false, uptime: false, }, diff --git a/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts index 6b8593c30c04..22b6e3b16032 100644 --- a/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts +++ b/dev-packages/node-integration-tests/suites/node-runtime-metrics/test.ts @@ -6,8 +6,29 @@ describe('nodeRuntimeMetricsIntegration', () => { cleanupChildProcesses(); }); - test('emits runtime metrics', async () => { + test('emits default runtime metrics', async () => { const runner = createRunner(__dirname, 'scenario.ts') + .expect({ + trace_metric: { + items: expect.arrayContaining([ + expect.objectContaining({ name: 'node.runtime.mem.rss', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.heap_used', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.mem.heap_total', type: 'gauge', unit: 'byte' }), + expect.objectContaining({ name: 'node.runtime.cpu.utilization', type: 'gauge' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p50', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.delay.p99', type: 'gauge', unit: 'second' }), + expect.objectContaining({ name: 'node.runtime.event_loop.utilization', type: 'gauge' }), + expect.objectContaining({ name: 'node.runtime.process.uptime', type: 'counter', unit: 'second' }), + ]), + }, + }) + .start(); + + await runner.completed(); + }); + + test('emits all metrics when fully opted in', async () => { + const runner = createRunner(__dirname, 'scenario-all.ts') .expect({ trace_metric: { items: expect.arrayContaining([ diff --git a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts index 206f9fdfba44..63f20080e69a 100644 --- a/packages/node-core/src/integrations/nodeRuntimeMetrics.ts +++ b/packages/node-core/src/integrations/nodeRuntimeMetrics.ts @@ -3,17 +3,44 @@ import { _INTERNAL_safeDateNow, defineIntegration, flushIfServerless, metrics } const INTEGRATION_NAME = 'NodeRuntimeMetrics'; const DEFAULT_INTERVAL_MS = 30_000; +const EVENT_LOOP_DELAY_RESOLUTION_MS = 10; export interface NodeRuntimeMetricsOptions { /** - * Which metric groups to collect. All groups are enabled by default. + * Which metrics to collect. + * + * Default on (8 metrics): + * - `cpuUtilization` — CPU utilization ratio + * - `memRss` — Resident Set Size (actual memory footprint) + * - `memHeapUsed` — V8 heap currently in use + * - `memHeapTotal` — total V8 heap allocated (headroom paired with `memHeapUsed`) + * - `eventLoopDelayP50` — median event loop delay (baseline latency) + * - `eventLoopDelayP99` — 99th percentile event loop delay (tail latency / spikes) + * - `eventLoopUtilization` — fraction of time the event loop was active + * - `uptime` — process uptime (detect restarts/crashes) + * + * Default off (opt-in): + * - `cpuTime` — raw user/system CPU time in seconds + * - `memExternal` — external/ArrayBuffer memory (relevant for native addons) + * - `eventLoopDelayMin` / `eventLoopDelayMax` / `eventLoopDelayMean` / `eventLoopDelayP90` */ collect?: { - cpu?: boolean; - memory?: boolean; - eventLoopDelay?: boolean; + // Default on + cpuUtilization?: boolean; + memHeapUsed?: boolean; + memRss?: boolean; + eventLoopDelayP99?: boolean; eventLoopUtilization?: boolean; uptime?: boolean; + // Default off + cpuTime?: boolean; + memHeapTotal?: boolean; + memExternal?: boolean; + eventLoopDelayMin?: boolean; + eventLoopDelayMax?: boolean; + eventLoopDelayMean?: boolean; + eventLoopDelayP50?: boolean; + eventLoopDelayP90?: boolean; }; /** * How often to collect metrics, in milliseconds. @@ -37,62 +64,94 @@ export interface NodeRuntimeMetricsOptions { export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRuntimeMetricsOptions = {}) => { const collectionIntervalMs = options.collectionIntervalMs ?? DEFAULT_INTERVAL_MS; const collect = { - cpu: true, - memory: true, - eventLoopDelay: true, + // Default on + cpuUtilization: true, + memHeapUsed: true, + memHeapTotal: true, + memRss: true, + eventLoopDelayP50: true, + eventLoopDelayP99: true, eventLoopUtilization: true, uptime: true, + // Default off + cpuTime: false, + memExternal: false, + eventLoopDelayMin: false, + eventLoopDelayMax: false, + eventLoopDelayMean: false, + eventLoopDelayP90: false, ...options.collect, }; + const needsEventLoopDelay = + collect.eventLoopDelayP99 || + collect.eventLoopDelayMin || + collect.eventLoopDelayMax || + collect.eventLoopDelayMean || + collect.eventLoopDelayP50 || + collect.eventLoopDelayP90; + + const needsCpu = collect.cpuUtilization || collect.cpuTime; + let intervalId: ReturnType | undefined; let prevCpuUsage: NodeJS.CpuUsage | undefined; let prevElu: ReturnType | undefined; - let prevFlushTime: number; + let prevFlushTime: number = 0; let eventLoopDelayHistogram: ReturnType | undefined; + const resolutionNs = EVENT_LOOP_DELAY_RESOLUTION_MS * 1e6; + const nsToS = (ns: number): number => Math.max(0, (ns - resolutionNs) / 1e9); + function collectMetrics(): void { const now = _INTERNAL_safeDateNow(); const elapsed = now - prevFlushTime; - if (collect.cpu && prevCpuUsage !== undefined) { + if (needsCpu && prevCpuUsage !== undefined) { const delta = process.cpuUsage(prevCpuUsage); - metrics.gauge('node.runtime.cpu.user', delta.user / 1e6, { unit: 'second' }); - metrics.gauge('node.runtime.cpu.system', delta.system / 1e6, { unit: 'second' }); - if (elapsed > 0) { + + if (collect.cpuTime) { + metrics.gauge('node.runtime.cpu.user', delta.user / 1e6, { unit: 'second' }); + metrics.gauge('node.runtime.cpu.system', delta.system / 1e6, { unit: 'second' }); + } + if (collect.cpuUtilization && elapsed > 0) { // Ratio of CPU time to wall-clock time. Can exceed 1.0 on multi-core systems. // TODO: In cluster mode, add a runtime_id/process_id attribute to disambiguate per-worker metrics. metrics.gauge('node.runtime.cpu.utilization', (delta.user + delta.system) / (elapsed * 1000)); } + prevCpuUsage = process.cpuUsage(); } - if (collect.memory) { + if (collect.memRss || collect.memHeapUsed || collect.memHeapTotal || collect.memExternal) { const mem = process.memoryUsage(); - metrics.gauge('node.runtime.mem.rss', mem.rss, { unit: 'byte' }); - metrics.gauge('node.runtime.mem.heap_total', mem.heapTotal, { unit: 'byte' }); - metrics.gauge('node.runtime.mem.heap_used', mem.heapUsed, { unit: 'byte' }); - metrics.gauge('node.runtime.mem.external', mem.external, { unit: 'byte' }); - metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' }); + if (collect.memRss) metrics.gauge('node.runtime.mem.rss', mem.rss, { unit: 'byte' }); + if (collect.memHeapUsed) metrics.gauge('node.runtime.mem.heap_used', mem.heapUsed, { unit: 'byte' }); + if (collect.memHeapTotal) metrics.gauge('node.runtime.mem.heap_total', mem.heapTotal, { unit: 'byte' }); + if (collect.memExternal) { + metrics.gauge('node.runtime.mem.external', mem.external, { unit: 'byte' }); + metrics.gauge('node.runtime.mem.array_buffers', mem.arrayBuffers, { unit: 'byte' }); + } } - if (collect.eventLoopDelay && eventLoopDelayHistogram) { - // Resolution is 10ms (10_000_000 ns) as configured below. Subtract it to normalize out sampling overhead. - const resolutionNs = 10_000_000; - const nsToS = (ns: number): number => Math.max(0, (ns - resolutionNs) / 1e9); - - metrics.gauge('node.runtime.event_loop.delay.min', nsToS(eventLoopDelayHistogram.min), { unit: 'second' }); - metrics.gauge('node.runtime.event_loop.delay.max', nsToS(eventLoopDelayHistogram.max), { unit: 'second' }); - metrics.gauge('node.runtime.event_loop.delay.mean', nsToS(eventLoopDelayHistogram.mean), { unit: 'second' }); - metrics.gauge('node.runtime.event_loop.delay.p50', nsToS(eventLoopDelayHistogram.percentile(50)), { - unit: 'second', - }); - metrics.gauge('node.runtime.event_loop.delay.p90', nsToS(eventLoopDelayHistogram.percentile(90)), { - unit: 'second', - }); - metrics.gauge('node.runtime.event_loop.delay.p99', nsToS(eventLoopDelayHistogram.percentile(99)), { - unit: 'second', - }); + if (needsEventLoopDelay && eventLoopDelayHistogram) { + if (collect.eventLoopDelayMin) + metrics.gauge('node.runtime.event_loop.delay.min', nsToS(eventLoopDelayHistogram.min), { unit: 'second' }); + if (collect.eventLoopDelayMax) + metrics.gauge('node.runtime.event_loop.delay.max', nsToS(eventLoopDelayHistogram.max), { unit: 'second' }); + if (collect.eventLoopDelayMean) + metrics.gauge('node.runtime.event_loop.delay.mean', nsToS(eventLoopDelayHistogram.mean), { unit: 'second' }); + if (collect.eventLoopDelayP50) + metrics.gauge('node.runtime.event_loop.delay.p50', nsToS(eventLoopDelayHistogram.percentile(50)), { + unit: 'second', + }); + if (collect.eventLoopDelayP90) + metrics.gauge('node.runtime.event_loop.delay.p90', nsToS(eventLoopDelayHistogram.percentile(90)), { + unit: 'second', + }); + if (collect.eventLoopDelayP99) + metrics.gauge('node.runtime.event_loop.delay.p99', nsToS(eventLoopDelayHistogram.percentile(99)), { + unit: 'second', + }); eventLoopDelayHistogram.reset(); } @@ -115,9 +174,9 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun name: INTEGRATION_NAME, setup(): void { - if (collect.eventLoopDelay) { + if (needsEventLoopDelay) { try { - eventLoopDelayHistogram = monitorEventLoopDelay({ resolution: 10 }); + eventLoopDelayHistogram = monitorEventLoopDelay({ resolution: EVENT_LOOP_DELAY_RESOLUTION_MS }); eventLoopDelayHistogram.enable(); } catch { // Not available in all runtimes (e.g. Bun throws NotImplementedError). @@ -125,7 +184,7 @@ export const nodeRuntimeMetricsIntegration = defineIntegration((options: NodeRun } // Prime baselines before the first collection interval. - if (collect.cpu) { + if (needsCpu) { prevCpuUsage = process.cpuUsage(); } if (collect.eventLoopUtilization) { diff --git a/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts index 02355a8efbae..5c7362be4bf5 100644 --- a/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts +++ b/packages/node-core/test/integrations/nodeRuntimeMetrics.test.ts @@ -107,49 +107,116 @@ describe('nodeRuntimeMetricsIntegration', () => { }); }); - describe('metric collection', () => { - it('emits CPU metrics', () => { + describe('metric collection — defaults', () => { + it('emits cpu utilization (default on)', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); integration.setup(); vi.advanceTimersByTime(1_000); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.utilization', expect.any(Number)); + }); + + it('does not emit cpu.user / cpu.system by default (opt-in)', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.user', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.system', expect.anything(), expect.anything()); + }); + + it('emits cpu.user / cpu.system when cpuTime is opted in', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { cpuTime: true }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.user', expect.any(Number), { unit: 'second' }); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.system', expect.any(Number), { unit: 'second' }); - expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.cpu.utilization', expect.any(Number)); }); - it('emits memory metrics', () => { + it('emits mem.rss, mem.heap_used, mem.heap_total (default on)', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); integration.setup(); vi.advanceTimersByTime(1_000); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.rss', 50_000_000, { unit: 'byte' }); - expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_total', 30_000_000, { unit: 'byte' }); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_used', 20_000_000, { unit: 'byte' }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.heap_total', 30_000_000, { unit: 'byte' }); + }); + + it('does not emit mem.external / mem.array_buffers by default (opt-in)', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.mem.external', expect.anything(), expect.anything()); + expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.mem.array_buffers', expect.anything(), expect.anything()); + }); + + it('emits mem.external / mem.array_buffers when opted in', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { memExternal: true }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.external', 1_000_000, { unit: 'byte' }); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.mem.array_buffers', 500_000, { unit: 'byte' }); }); - it('emits event loop delay metrics and resets histogram after collection', () => { + it('emits event_loop.delay.p50 and p99 (default on) and resets histogram', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); integration.setup(); vi.advanceTimersByTime(1_000); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p50', expect.any(Number), { + unit: 'second', + }); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p99', expect.any(Number), { + unit: 'second', + }); + expect(mockHistogram.reset).toHaveBeenCalledOnce(); + }); + + it('does not emit min/max/mean/p90 event loop delay by default (opt-in)', () => { + const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000 }); + integration.setup(); + vi.advanceTimersByTime(1_000); + + for (const suffix of ['min', 'max', 'mean', 'p90']) { + expect(gaugeSpy).not.toHaveBeenCalledWith( + `node.runtime.event_loop.delay.${suffix}`, + expect.anything(), + expect.anything(), + ); + } + }); + + it('emits all opt-in event loop delay percentiles when enabled', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { + eventLoopDelayMin: true, + eventLoopDelayMax: true, + eventLoopDelayMean: true, + eventLoopDelayP90: true, + }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + // min: (2_000_000 - 10_000_000) clamped to 0 → 0s expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.min', 0, { unit: 'second' }); // max: (20_000_000 - 10_000_000) / 1e9 → 0.01s expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.max', 0.01, { unit: 'second' }); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.mean', 0, { unit: 'second' }); - expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p50', expect.any(Number), { - unit: 'second', - }); expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p90', expect.any(Number), { unit: 'second', }); - expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p99', expect.any(Number), { - unit: 'second', - }); - expect(mockHistogram.reset).toHaveBeenCalledOnce(); }); it('emits event loop utilization metric', () => { @@ -178,7 +245,7 @@ describe('nodeRuntimeMetricsIntegration', () => { vi.advanceTimersByTime(1_000); expect(gaugeSpy).not.toHaveBeenCalledWith( - 'node.runtime.event_loop.delay.min', + 'node.runtime.event_loop.delay.p99', expect.anything(), expect.anything(), ); @@ -186,23 +253,21 @@ describe('nodeRuntimeMetricsIntegration', () => { }); describe('opt-out', () => { - it('skips CPU metrics when collect.cpu is false', () => { + it('skips cpu.utilization when cpuUtilization is false', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000, - collect: { cpu: false }, + collect: { cpuUtilization: false }, }); integration.setup(); vi.advanceTimersByTime(1_000); - expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.user', expect.anything(), expect.anything()); - expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.system', expect.anything(), expect.anything()); expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.cpu.utilization', expect.anything(), expect.anything()); }); - it('skips memory metrics when collect.memory is false', () => { + it('skips mem.rss when memRss is false', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000, - collect: { memory: false }, + collect: { memRss: false }, }); integration.setup(); vi.advanceTimersByTime(1_000); @@ -210,23 +275,43 @@ describe('nodeRuntimeMetricsIntegration', () => { expect(gaugeSpy).not.toHaveBeenCalledWith('node.runtime.mem.rss', expect.anything(), expect.anything()); }); - it('skips event loop delay when collect.eventLoopDelay is false', () => { + it('skips event loop delay metrics when all delay flags are false', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000, - collect: { eventLoopDelay: false }, + collect: { eventLoopDelayP50: false, eventLoopDelayP99: false }, }); integration.setup(); expect(mockMonitorEventLoopDelay).not.toHaveBeenCalled(); vi.advanceTimersByTime(1_000); + for (const suffix of ['min', 'max', 'mean', 'p50', 'p90', 'p99']) { + expect(gaugeSpy).not.toHaveBeenCalledWith( + `node.runtime.event_loop.delay.${suffix}`, + expect.anything(), + expect.anything(), + ); + } + }); + + it('skips only p99 but still emits p50 when eventLoopDelayP99 is false', () => { + const integration = nodeRuntimeMetricsIntegration({ + collectionIntervalMs: 1_000, + collect: { eventLoopDelayP99: false }, + }); + integration.setup(); + vi.advanceTimersByTime(1_000); + expect(gaugeSpy).not.toHaveBeenCalledWith( - 'node.runtime.event_loop.delay.min', + 'node.runtime.event_loop.delay.p99', expect.anything(), expect.anything(), ); + expect(gaugeSpy).toHaveBeenCalledWith('node.runtime.event_loop.delay.p50', expect.any(Number), { + unit: 'second', + }); }); - it('skips event loop utilization when collect.eventLoopUtilization is false', () => { + it('skips event loop utilization when eventLoopUtilization is false', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000, collect: { eventLoopUtilization: false }, @@ -241,7 +326,7 @@ describe('nodeRuntimeMetricsIntegration', () => { ); }); - it('skips uptime when collect.uptime is false', () => { + it('skips uptime when uptime is false', () => { const integration = nodeRuntimeMetricsIntegration({ collectionIntervalMs: 1_000, collect: { uptime: false },