From 6021292714f2a881916edb345f07c15f07bf9999 Mon Sep 17 00:00:00 2001 From: Simplyalex99 Date: Wed, 2 Jul 2025 14:39:56 -0400 Subject: [PATCH 1/4] feat: duplicate message scanner --- .../duplicate-scanner/duplicate-scanner.ts | 81 +++++++++++++++++++ src/index.ts | 13 ++- 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/features/duplicate-scanner/duplicate-scanner.ts diff --git a/src/features/duplicate-scanner/duplicate-scanner.ts b/src/features/duplicate-scanner/duplicate-scanner.ts new file mode 100644 index 00000000..57d6f43d --- /dev/null +++ b/src/features/duplicate-scanner/duplicate-scanner.ts @@ -0,0 +1,81 @@ +import type { ChannelHandlers, HandleMessageArgs } from "../../types/index.js"; +import { EmbedType } from "discord.js"; + +import { EMBED_COLOR } from "../commands.js"; +import { LRUCache } from "lru-cache"; +import { isStaff } from "../../helpers/discord.js"; + +const maxMessagesPerUser = 5; // Maximum number of messages per user to track +// Time (ms) to keep track of duplicates (e.g., 30 sec) +export const duplicateCache = new LRUCache>({ + max: 100, + ttl: 1000 * 60 * 0.5, + dispose: (value) => { + value.clear(); + }, +}); +const maxTrivialCharacters = 10; +const removeFirstElement = (messages: Set) => { + const iterator = messages.values(); + const firstElement = iterator.next().value; + if (firstElement) { + messages.delete(firstElement); + } +}; + +const handleDuplicateMessage = async ({ + msg, + userId, +}: HandleMessageArgs & { userId: string }) => { + await msg.delete().catch(console.error); + const warningMsg = `Hey <@${userId}>, it looks like you've posted this message in another channel already. Please avoid cross-posting.`; + const warning = await msg.channel.send({ + embeds: [ + { + title: "Duplicate Message Detected", + type: EmbedType.Rich, + description: warningMsg, + color: EMBED_COLOR, + }, + ], + }); + + // Auto-delete warning after 30 seconds + setTimeout(() => { + warning.delete().catch(console.error); + }, 30_000); + + return; +}; +const normalizeContent = (content: string) => + content.trim().toLowerCase().replace(/\s+/g, " "); +export const messageDuplicateChecker: ChannelHandlers = { + handleMessage: async ({ msg, bot }) => { + if (msg.author.bot || isStaff(msg.member)) return; + + const content = normalizeContent(msg.content); + const userId = msg.author.id; + + if (content.length < maxTrivialCharacters) return; + + const userMessages = duplicateCache.get(userId); + + if (!userMessages) { + const messages = new Set(); + messages.add(content); + duplicateCache.set(userId, messages); + return; + } + + if (userMessages.has(content)) { + await handleDuplicateMessage({ msg, bot, userId }); + return; + } + + if (userMessages.size >= maxMessagesPerUser) { + removeFirstElement(userMessages); + } + + userMessages.add(content); + }, +}; diff --git a/src/index.ts b/src/index.ts index 5a556afa..8e8e8a9f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -42,6 +42,7 @@ import { recommendBookCommand } from "./features/book-list.js"; import { mdnSearch } from "./features/mdn.js"; import "./server.js"; import { jobScanner } from "./features/job-scanner.js"; +import { messageDuplicateChecker } from "./features/duplicate-scanner/duplicate-scanner.js"; export const bot = new Client({ intents: [ @@ -222,7 +223,17 @@ const threadChannels = [CHANNELS.helpJs, CHANNELS.helpThreadsReact]; addHandler(threadChannels, autothread); addHandler(CHANNELS.resumeReview, resumeReviewPdf); - +addHandler( + [ + CHANNELS.helpReact, + CHANNELS.generalReact, + CHANNELS.generalTech, + CHANNELS.helpBackend, + CHANNELS.helpStyling, + CHANNELS.helpReactNative, + ], + messageDuplicateChecker, +); bot.on("ready", () => { deployCommands(bot); jobsMod(bot); From 18f759374f63e154730241bd7afc235460c55126 Mon Sep 17 00:00:00 2001 From: Simplyalex99 Date: Wed, 2 Jul 2025 14:43:22 -0400 Subject: [PATCH 2/4] test: duplicate scanner caching --- package-lock.json | 7 ++ package.json | 1 + .../duplicate-scanner.test.ts | 93 +++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 src/features/duplicate-scanner/duplicate-scanner.test.ts diff --git a/package-lock.json b/package-lock.json index 40091068..b9c77045 100644 --- a/package-lock.json +++ b/package-lock.json @@ -33,6 +33,7 @@ "xss": "^1.0.15" }, "devDependencies": { + "@total-typescript/shoehorn": "^0.1.2", "@types/node": "20.14.2", "@types/node-cron": "3.0.11", "@types/node-fetch": "2.6.11", @@ -1240,6 +1241,12 @@ "npm": ">=7.0.0" } }, + "node_modules/@total-typescript/shoehorn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/@total-typescript/shoehorn/-/shoehorn-0.1.2.tgz", + "integrity": "sha512-p7nNZbOZIofpDNyP0u1BctFbjxD44Qc+oO5jufgQdFdGIXJLc33QRloJpq7k5T59CTgLWfQSUxsuqLcmeurYRw==", + "dev": true + }, "node_modules/@tsconfig/node10": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", diff --git a/package.json b/package.json index 377b70aa..a5b91207 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "xss": "^1.0.15" }, "devDependencies": { + "@total-typescript/shoehorn": "^0.1.2", "@types/node": "20.14.2", "@types/node-cron": "3.0.11", "@types/node-fetch": "2.6.11", diff --git a/src/features/duplicate-scanner/duplicate-scanner.test.ts b/src/features/duplicate-scanner/duplicate-scanner.test.ts new file mode 100644 index 00000000..ba5c92ab --- /dev/null +++ b/src/features/duplicate-scanner/duplicate-scanner.test.ts @@ -0,0 +1,93 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { messageDuplicateChecker } from "./duplicate-scanner"; +import { User } from "discord.js"; +import { fromPartial } from "@total-typescript/shoehorn"; +import { duplicateCache } from "./duplicate-scanner"; + +const maxMessagesPerUser = 5; +const maxCacheSize = 100; +const maxTrivialCharacters = 10; +// Mock dependencies +const mockBot = { + channels: { + fetch: vi.fn().mockResolvedValue({ + type: "GUILD_TEXT", + send: vi.fn().mockResolvedValue({ + delete: vi.fn().mockResolvedValue(undefined), + }), + }), + }, +}; +const mockMessage = (content: string, authorId: string, isBot = false) => { + return { + content, + author: { id: authorId, bot: isBot } as User, + delete: vi.fn(), + channel: { + send: vi.fn().mockResolvedValue({ + delete: vi.fn().mockResolvedValue(undefined), + }), + }, + }; +}; +describe("Duplicate Scanner Tests", () => { + beforeEach(() => { + // Reset the cache before each test + duplicateCache.clear(); + }); + it(`should not store messages less than ${maxTrivialCharacters} characters`, async () => { + const msg = mockMessage("Help me", "user1"); + const bot = mockBot; + messageDuplicateChecker.handleMessage?.(fromPartial({ msg, bot })); + const userMessages = duplicateCache.get("user1"); + expect(userMessages).toBeUndefined(); + }); + + it("should store messages correctly in the cache", async () => { + const msg = mockMessage("Hello world", "user1"); + const bot = mockBot; + + messageDuplicateChecker.handleMessage?.(fromPartial({ msg, bot })); + + const userMessages = duplicateCache.get("user1"); + expect(userMessages).toBeDefined(); + expect(userMessages?.has("hello world")).toBe(true); + }); + + it(`should enforce max size of ${maxMessagesPerUser} messages per user`, async () => { + const bot = mockBot; + for (let i = 1; i <= maxMessagesPerUser; i++) { + const msg = mockMessage(`Message to delete ${i}`, "user1"); + await messageDuplicateChecker.handleMessage?.(fromPartial({ msg, bot })); + } + + const userMessages = duplicateCache.get("user1"); + expect(userMessages).toBeDefined(); + expect(userMessages?.size).toBe(maxMessagesPerUser); + + const msg = mockMessage("New Message", "user1"); + await messageDuplicateChecker.handleMessage?.(fromPartial({ msg, bot })); + + expect(userMessages?.size).toBe(maxMessagesPerUser); + expect(userMessages?.has("message 1")).toBe(false); // First message should be removed + expect(userMessages?.has("new message")).toBe(true); // New message should be added + }); + + it(`should enforce max size of ${maxCacheSize} users in the cache`, async () => { + const bot = mockBot; + + for (let i = 1; i <= maxCacheSize; i++) { + const msg = mockMessage("Hello world", `user${i}`); + await messageDuplicateChecker.handleMessage?.(fromPartial({ msg, bot })); + } + + expect(duplicateCache.size).toBe(maxCacheSize); + + const msg = mockMessage("Hello world", "user101"); + await messageDuplicateChecker.handleMessage?.(fromPartial({ msg, bot })); + + expect(duplicateCache.size).toBe(maxCacheSize); + expect(duplicateCache.has("user1")).toBe(false); + expect(duplicateCache.has("user101")).toBe(true); + }); +}); From 3569615863371e5665751ce8a11c5a262e0046a5 Mon Sep 17 00:00:00 2001 From: Simplyalex99 Date: Wed, 9 Jul 2025 06:26:28 -0400 Subject: [PATCH 3/4] chore: disabled bot for star helpers/mvp --- src/features/duplicate-scanner/duplicate-scanner.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/features/duplicate-scanner/duplicate-scanner.ts b/src/features/duplicate-scanner/duplicate-scanner.ts index 57d6f43d..592a772f 100644 --- a/src/features/duplicate-scanner/duplicate-scanner.ts +++ b/src/features/duplicate-scanner/duplicate-scanner.ts @@ -3,7 +3,7 @@ import { EmbedType } from "discord.js"; import { EMBED_COLOR } from "../commands.js"; import { LRUCache } from "lru-cache"; -import { isStaff } from "../../helpers/discord.js"; +import { isStaff, isHelpful } from "../../helpers/discord.js"; const maxMessagesPerUser = 5; // Maximum number of messages per user to track // Time (ms) to keep track of duplicates (e.g., 30 sec) @@ -51,7 +51,7 @@ const normalizeContent = (content: string) => content.trim().toLowerCase().replace(/\s+/g, " "); export const messageDuplicateChecker: ChannelHandlers = { handleMessage: async ({ msg, bot }) => { - if (msg.author.bot || isStaff(msg.member)) return; + if (msg.author.bot || isStaff(msg.member) || isHelpful(msg.member)) return; const content = normalizeContent(msg.content); const userId = msg.author.id; From 1a6c9a7bd247574bcab1bf932d8b78fab405dc8c Mon Sep 17 00:00:00 2001 From: Simplyalex99 Date: Mon, 14 Jul 2025 19:36:49 -0400 Subject: [PATCH 4/4] chore: limited channels, added logging and cooldown --- .../duplicate-scanner/duplicate-scanner.ts | 14 +++++++++++++- src/features/duplicate-scanner/helper.ts | 11 +++++++++++ src/index.ts | 10 ++-------- 3 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 src/features/duplicate-scanner/helper.ts diff --git a/src/features/duplicate-scanner/duplicate-scanner.ts b/src/features/duplicate-scanner/duplicate-scanner.ts index 592a772f..3394ecb0 100644 --- a/src/features/duplicate-scanner/duplicate-scanner.ts +++ b/src/features/duplicate-scanner/duplicate-scanner.ts @@ -4,7 +4,10 @@ import { EmbedType } from "discord.js"; import { EMBED_COLOR } from "../commands.js"; import { LRUCache } from "lru-cache"; import { isStaff, isHelpful } from "../../helpers/discord.js"; - +import { logger } from "../log.js"; +import { truncateMessage } from "../../helpers/modLog.js"; +import { formatWithEllipsis } from "./helper.js"; +import cooldown from "../cooldown.js"; const maxMessagesPerUser = 5; // Maximum number of messages per user to track // Time (ms) to keep track of duplicates (e.g., 30 sec) export const duplicateCache = new LRUCache>({ @@ -28,6 +31,11 @@ const handleDuplicateMessage = async ({ userId, }: HandleMessageArgs & { userId: string }) => { await msg.delete().catch(console.error); + const cooldownKey = `resume-${msg.channelId}`; + if (cooldown.hasCooldown(userId, cooldownKey)) { + return; + } + cooldown.addCooldown(userId, cooldownKey); const warningMsg = `Hey <@${userId}>, it looks like you've posted this message in another channel already. Please avoid cross-posting.`; const warning = await msg.channel.send({ embeds: [ @@ -45,6 +53,10 @@ const handleDuplicateMessage = async ({ warning.delete().catch(console.error); }, 30_000); + logger.log( + "duplicate message detected", + `${msg.author.username} in <#${msg.channel.id}> \n${formatWithEllipsis(truncateMessage(msg.content, 100))}`, + ); return; }; const normalizeContent = (content: string) => diff --git a/src/features/duplicate-scanner/helper.ts b/src/features/duplicate-scanner/helper.ts new file mode 100644 index 00000000..3c5c4dfe --- /dev/null +++ b/src/features/duplicate-scanner/helper.ts @@ -0,0 +1,11 @@ +export const formatWithEllipsis = (sentences: string): string => { + const ellipsis = "..."; + + if (sentences.length === 0) { + return ellipsis; + } + if (sentences.charAt(sentences.length - 1) !== ".") { + return sentences + ellipsis; + } + return sentences + ".."; +}; diff --git a/src/index.ts b/src/index.ts index 8e8e8a9f..c2dbad56 100644 --- a/src/index.ts +++ b/src/index.ts @@ -43,6 +43,7 @@ import { mdnSearch } from "./features/mdn.js"; import "./server.js"; import { jobScanner } from "./features/job-scanner.js"; import { messageDuplicateChecker } from "./features/duplicate-scanner/duplicate-scanner.js"; +import cooldown from "./features/cooldown.js"; export const bot = new Client({ intents: [ @@ -224,14 +225,7 @@ addHandler(threadChannels, autothread); addHandler(CHANNELS.resumeReview, resumeReviewPdf); addHandler( - [ - CHANNELS.helpReact, - CHANNELS.generalReact, - CHANNELS.generalTech, - CHANNELS.helpBackend, - CHANNELS.helpStyling, - CHANNELS.helpReactNative, - ], + [CHANNELS.helpReact, CHANNELS.generalReact, CHANNELS.generalTech], messageDuplicateChecker, ); bot.on("ready", () => {