Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 140 additions & 22 deletions app/api/webhook/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,34 +101,118 @@ async function fetchCommentThread(
}


// Helper function to load contributing.md from repository with caching
// Helper function to extract markdown links from content
function extractMarkdownLinks(content: string): Array<{text: string, url: string}> {
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
const links: Array<{text: string, url: string}> = [];
let match;

while ((match = linkRegex.exec(content)) !== null) {
links.push({
text: match[1],
url: match[2]
});
}

return links;
}

// Helper function to resolve relative URLs to raw GitHub URLs
function resolveGitHubUrl(url: string, owner: string, repo: string): string | null {
if (url.startsWith('http')) {
if (url.includes('github.com') && url.includes(owner) && url.includes(repo)) {
return url.replace('github.com', 'raw.githubusercontent.com').replace('/blob/', '/').replace('/tree/', '/');
}
return null; // Skip non-GitHub URLs
}

// Handle relative URLs - convert to raw GitHub URL
const cleanUrl = url.startsWith('./') ? url.substring(2) : url;
return `https://raw.githubusercontent.com/${owner}/${repo}/main/${cleanUrl}`;
}

// Helper function to fetch content from URL
async function fetchUrlContent(url: string): Promise<string | null> {
try {
const response = await fetch(url);
if (response.ok) {
return await response.text();
}
} catch (error: any) {
log("DEBUG", `Failed to fetch URL content: ${url}`, { error: error.message });
}
return null;
}

// Helper function to process linked content for additional links
async function processLinkedContent(
content: string,
owner: string,
repo: string,
depth: number,
visitedUrls: Set<string>
): Promise<string> {
if (depth >= 3) return content;

const links = extractMarkdownLinks(content);
let processedContent = content;

for (const link of links) {
const resolvedUrl = resolveGitHubUrl(link.url, owner, repo);
if (!resolvedUrl || visitedUrls.has(resolvedUrl)) {
continue;
}

visitedUrls.add(resolvedUrl);
const linkedContent = await fetchUrlContent(resolvedUrl);
if (linkedContent) {
const nestedContent = await processLinkedContent(linkedContent, owner, repo, depth + 1, visitedUrls);
processedContent += `\n\n--- Content from ${link.text} (${link.url}) ---\n${nestedContent}`;
}
}

return processedContent;
}

async function loadContributingGuidelines(
octokit: any,
owner: string,
repo: string
repo: string,
depth: number = 0,
visitedUrls: Set<string> = new Set()
): Promise<string | null> {
const cacheKey = `${owner}/${repo}`;
const maxDepth = 3;
if (depth >= maxDepth) {
log("DEBUG", `Maximum recursion depth reached for ${owner}/${repo}`);
return null;
}

const cacheKey = `${owner}/${repo}:${depth}`;

// Check cache first
if (config.enableCaching && cache.has(cacheKey)) {
const cached = cache.get(cacheKey)!;
if (Date.now() - cached.timestamp < config.cacheTimeout) {
log("INFO", `Contributing guidelines loaded from cache for ${cacheKey}`);
return cached.content;
} else {
cache.delete(cacheKey); // Remove expired cache
cache.delete(cacheKey);
}
}

log("INFO", `Loading contributing guidelines for ${cacheKey}`);

const altPaths = [
"CONTRIBUTING.md",
"contributing.md",
"contributing.md",
".github/CONTRIBUTING.md",
"docs/CONTRIBUTING.md",
];

let mainContent = "";
let foundPath = "";

// First, get the main contributing guidelines
for (const path of altPaths) {
try {
const response = await octokit.request(
Expand All @@ -141,31 +225,63 @@ async function loadContributingGuidelines(
);

if (response.data.content) {
const content = Buffer.from(response.data.content, "base64").toString(
"utf-8"
);

// Cache the result
if (config.enableCaching) {
cache.set(cacheKey, {
content,
timestamp: Date.now(),
});
}

log("INFO", `Contributing guidelines found at ${path} for ${cacheKey}`);
return content;
mainContent = Buffer.from(response.data.content, "base64").toString("utf-8");
foundPath = path;
log("INFO", `Contributing guidelines found at ${path} for ${owner}/${repo}`);
break;
}
} catch (error: any) {
log("DEBUG", `Failed to load contributing guidelines from ${path}`, {
error: error.message,
});
// Continue to next path
}
}

log("WARN", `No contributing guidelines found for ${cacheKey}`);
return null;
if (!mainContent) {
log("WARN", `No contributing guidelines found for ${owner}/${repo}`);
return null;
}

let aggregatedContent = mainContent;

if (depth < maxDepth - 1) {
const links = extractMarkdownLinks(mainContent);
log("DEBUG", `Found ${links.length} markdown links in ${foundPath}`, { links: links.map(l => l.url) });

for (const link of links) {
const resolvedUrl = resolveGitHubUrl(link.url, owner, repo);
if (!resolvedUrl || visitedUrls.has(resolvedUrl)) {
continue;
}

visitedUrls.add(resolvedUrl);
log("DEBUG", `Following link: ${link.text} -> ${resolvedUrl}`);

const linkedContent = await fetchUrlContent(resolvedUrl);
if (linkedContent) {
const processedLinkedContent = await processLinkedContent(
linkedContent,
owner,
repo,
depth + 1,
visitedUrls
);

aggregatedContent += `\n\n--- Content from ${link.text} (${link.url}) ---\n${processedLinkedContent}`;
log("INFO", `Successfully aggregated content from ${resolvedUrl}`);
}
}
}

// Cache the aggregated result
if (config.enableCaching) {
cache.set(cacheKey, {
content: aggregatedContent,
timestamp: Date.now(),
});
}

return aggregatedContent;
}

// Helper function to generate friendly response using Claude
Expand All @@ -181,6 +297,8 @@ async function generateFriendlyResponse(

const systemPrompt = `You are a GitHub bot that enforces contributing guidelines. Only comment when there are clear, specific violations of the contributing guidelines.

The contributing guidelines may include content from multiple linked documents that have been automatically aggregated to provide comprehensive context.

DO NOT comment for:
- Minor style, grammar, or formatting issues
- Casual but professional language
Expand Down
94 changes: 94 additions & 0 deletions tests/webhook.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,97 @@ describe('generateFriendlyResponse integration', () => {
expect(result.comment).toBe("");
});
});

describe('Link Following Functionality', () => {
function extractMarkdownLinks(content: string): Array<{text: string, url: string}> {
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
const links: Array<{text: string, url: string}> = [];
let match;

while ((match = linkRegex.exec(content)) !== null) {
links.push({
text: match[1],
url: match[2]
});
}

return links;
}

function resolveGitHubUrl(url: string, owner: string, repo: string): string | null {
if (url.startsWith('http')) {
if (url.includes('github.com') && url.includes(owner) && url.includes(repo)) {
return url.replace('github.com', 'raw.githubusercontent.com').replace('/blob/', '/').replace('/tree/', '/');
}
return null;
}

const cleanUrl = url.startsWith('./') ? url.substring(2) : url;
return `https://raw.githubusercontent.com/${owner}/${repo}/main/${cleanUrl}`;
}

test('extractMarkdownLinks parses standard markdown links', () => {
const content = 'Please see our [README](README.md) and [Code of Conduct](CODE_OF_CONDUCT.md) for details.';
const links = extractMarkdownLinks(content);

expect(links).toHaveLength(2);
expect(links[0]).toEqual({ text: 'README', url: 'README.md' });
expect(links[1]).toEqual({ text: 'Code of Conduct', url: 'CODE_OF_CONDUCT.md' });
});

test('extractMarkdownLinks handles various link formats', () => {
const content = `
- [Relative link](./docs/guide.md)
- [Absolute GitHub link](https://github.com/owner/repo/blob/main/SETUP.md)
- [External link](https://example.com)
`;
const links = extractMarkdownLinks(content);

expect(links).toHaveLength(3);
expect(links[0].url).toBe('./docs/guide.md');
expect(links[1].url).toBe('https://github.com/owner/repo/blob/main/SETUP.md');
expect(links[2].url).toBe('https://example.com');
});

test('resolveGitHubUrl converts relative URLs correctly', () => {
expect(resolveGitHubUrl('README.md', 'owner', 'repo'))
.toBe('https://raw.githubusercontent.com/owner/repo/main/README.md');

expect(resolveGitHubUrl('./docs/guide.md', 'owner', 'repo'))
.toBe('https://raw.githubusercontent.com/owner/repo/main/docs/guide.md');
});

test('resolveGitHubUrl converts GitHub URLs to raw URLs', () => {
const githubUrl = 'https://github.com/owner/repo/blob/main/SETUP.md';
expect(resolveGitHubUrl(githubUrl, 'owner', 'repo'))
.toBe('https://raw.githubusercontent.com/owner/repo/main/SETUP.md');
});

test('resolveGitHubUrl filters out non-GitHub URLs', () => {
expect(resolveGitHubUrl('https://example.com/guide', 'owner', 'repo')).toBeNull();
});

test('depth limiting prevents infinite recursion', () => {
const maxDepth = 3;
expect(maxDepth).toBe(3);
});

test('extractMarkdownLinks handles empty content', () => {
const links = extractMarkdownLinks('');
expect(links).toHaveLength(0);
});

test('extractMarkdownLinks handles content with no links', () => {
const content = 'This is just plain text with no markdown links.';
const links = extractMarkdownLinks(content);
expect(links).toHaveLength(0);
});

test('resolveGitHubUrl handles different GitHub URL formats', () => {
expect(resolveGitHubUrl('https://github.com/owner/repo/blob/main/docs/file.md', 'owner', 'repo'))
.toBe('https://raw.githubusercontent.com/owner/repo/main/docs/file.md');

expect(resolveGitHubUrl('https://github.com/owner/repo/tree/main/docs', 'owner', 'repo'))
.toBe('https://raw.githubusercontent.com/owner/repo/main/docs');
});
});