Compare commits
1 Commits
nrc-std-ax
...
2469
Author | SHA1 | Date | |
---|---|---|---|
a905874930 |
141
.github/workflows/find_duplicate_issues.py
vendored
Executable file
141
.github/workflows/find_duplicate_issues.py
vendored
Executable file
@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import openai
|
||||||
|
import json
|
||||||
|
from github import Github
|
||||||
|
|
||||||
|
# Initialize GitHub and OpenAI clients
|
||||||
|
if not os.getenv("GITHUB_TOKEN"):
|
||||||
|
print("Please set the GITHUB_TOKEN environment variable.")
|
||||||
|
exit(1)
|
||||||
|
g = Github(os.getenv("GITHUB_TOKEN"))
|
||||||
|
|
||||||
|
if not os.getenv("OPENAI_API_KEY"):
|
||||||
|
print("Please set the OPENAI_API_KEY environment variable.")
|
||||||
|
exit(1)
|
||||||
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
# Target repository
|
||||||
|
repo_name = os.getenv("GITHUB_REPOSITORY")
|
||||||
|
if not repo_name:
|
||||||
|
print("Please set the GITHUB_REPOSITORY environment variable.")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
repo = g.get_repo(repo_name)
|
||||||
|
|
||||||
|
# Fetch all issues and comments
|
||||||
|
issues = repo.get_issues(state="open")
|
||||||
|
all_issues = {}
|
||||||
|
for issue in issues:
|
||||||
|
comments = issue.get_comments()
|
||||||
|
all_comments = [comment.body for comment in comments]
|
||||||
|
all_issues[issue.number] = {
|
||||||
|
"title": issue.title,
|
||||||
|
"body": issue.body,
|
||||||
|
"comments": all_comments,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create the start of the prompt template with all the issues and bodies.
|
||||||
|
system_issues_prompt = ""
|
||||||
|
for issue_number, issue_data in all_issues.items():
|
||||||
|
system_issues_prompt += f"""
|
||||||
|
#{issue_number}: {issue_data['title']}
|
||||||
|
|
||||||
|
# body
|
||||||
|
|
||||||
|
{issue_data['body']}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
print(system_issues_prompt)
|
||||||
|
|
||||||
|
# Get the token count for the system_issues_prompt
|
||||||
|
print(f"Token count for system_issues_prompt: {len(system_issues_prompt.split())}")
|
||||||
|
|
||||||
|
|
||||||
|
# Analyze issues for duplicates using OpenAI's GPT-4
|
||||||
|
potential_duplicates = []
|
||||||
|
for issue_number, issue_data in all_issues.items():
|
||||||
|
print(f"Analyzing issue #{issue_number} {issue_data['title']} ...")
|
||||||
|
response = openai.chat.completions.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
response_format={ "type": "json_object" },
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": """You are a distinguished engineer. Your peers
|
||||||
|
keep creating duplicate GitHub issues and you have OCD. You have decided to use your
|
||||||
|
skills to find duplicates for them. You are analyzing the issues in the repository.
|
||||||
|
Your goal is to find potential duplicate GitHub issues.
|
||||||
|
Do not return the current issue as a duplicate of itself. Use the issue title, body,
|
||||||
|
and comments to find potential duplicates.
|
||||||
|
|
||||||
|
Whenever you find a potential duplicate, you need to be very very sure that it is a duplicate.
|
||||||
|
Error on the side of caution. If you are not sure, do not return it as a duplicate.
|
||||||
|
Most won't have duplicates and that is fine! You are looking for the ones that do.
|
||||||
|
If you mistakenly return a non-duplicate, you will be penalized to spend time with the
|
||||||
|
interns helping them learn to exit vim. You do not want to do that.
|
||||||
|
|
||||||
|
Check and make sure that no one else has already commented that the issue is a duplicate.
|
||||||
|
If they have commented, you should not return it as a duplicate.
|
||||||
|
|
||||||
|
Your confidence level must be over 90% to return an issue as a duplicate.
|
||||||
|
|
||||||
|
Take a deep breath and begin your analysis. Your reputation is on the line.
|
||||||
|
Your responses should be formatted as a json array.
|
||||||
|
The following are examples of valid responses:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[]
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
[{"issue_number": 1234, "title": "Issue title"}]
|
||||||
|
```
|
||||||
|
|
||||||
|
Below are the current open issues in the repository. They are formatted as:
|
||||||
|
|
||||||
|
#{issue number}: {issue title}
|
||||||
|
|
||||||
|
# body
|
||||||
|
|
||||||
|
{issue body}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
The current open issues in the repository are:
|
||||||
|
"""
|
||||||
|
+ system_issues_prompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"""Find duplicates for GitHub issue #{issue_number} titled:
|
||||||
|
{issue_data['title']}
|
||||||
|
|
||||||
|
# issue body
|
||||||
|
{issue_data['body']}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
# issue comments:
|
||||||
|
{issue_data['comments']}""",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(response.choices) == 0:
|
||||||
|
print("No duplicate issues found.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for response in response.choices:
|
||||||
|
print(response.message.content)
|
||||||
|
|
||||||
|
# Print potential duplicates
|
||||||
|
print("Potential duplicate issues:")
|
||||||
|
for issue_number in potential_duplicates:
|
||||||
|
issue = all_issues[issue_number]
|
||||||
|
print(
|
||||||
|
f"Issue #{issue_number}: {issue['title']} - {repo.html_url}/issues/{issue_number}"
|
||||||
|
)
|
31
.github/workflows/identify-duplicates.yml
vendored
Normal file
31
.github/workflows/identify-duplicates.yml
vendored
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
name: Identify Duplicate Issues
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 0 * * *' # Runs at midnight every day
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
find-duplicates:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install PyGithub openai
|
||||||
|
|
||||||
|
- name: Run script to identify duplicates
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
run: python .github/scripts/find_duplicate_issues.py
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,6 +4,7 @@
|
|||||||
/node_modules
|
/node_modules
|
||||||
/.pnp
|
/.pnp
|
||||||
.pnp.js
|
.pnp.js
|
||||||
|
venv
|
||||||
|
|
||||||
# testing
|
# testing
|
||||||
/coverage
|
/coverage
|
||||||
|
Reference in New Issue
Block a user