Compare commits
	
		
			1 Commits
		
	
	
		
			pierremtb/
			...
			2469
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a905874930 | 
							
								
								
									
										141
									
								
								.github/workflows/find_duplicate_issues.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										141
									
								
								.github/workflows/find_duplicate_issues.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @ -0,0 +1,141 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  | import os | ||||||
|  | import openai | ||||||
|  | import json | ||||||
|  | from github import Github | ||||||
|  |  | ||||||
|  | # Initialize GitHub and OpenAI clients | ||||||
|  | if not os.getenv("GITHUB_TOKEN"): | ||||||
|  |     print("Please set the GITHUB_TOKEN environment variable.") | ||||||
|  |     exit(1) | ||||||
|  | g = Github(os.getenv("GITHUB_TOKEN")) | ||||||
|  |  | ||||||
|  | if not os.getenv("OPENAI_API_KEY"): | ||||||
|  |     print("Please set the OPENAI_API_KEY environment variable.") | ||||||
|  |     exit(1) | ||||||
|  | openai.api_key = os.getenv("OPENAI_API_KEY") | ||||||
|  |  | ||||||
|  | # Target repository | ||||||
|  | repo_name = os.getenv("GITHUB_REPOSITORY") | ||||||
|  | if not repo_name: | ||||||
|  |     print("Please set the GITHUB_REPOSITORY environment variable.") | ||||||
|  |     exit(1) | ||||||
|  |  | ||||||
|  | repo = g.get_repo(repo_name) | ||||||
|  |  | ||||||
|  | # Fetch all issues and comments | ||||||
|  | issues = repo.get_issues(state="open") | ||||||
|  | all_issues = {} | ||||||
|  | for issue in issues: | ||||||
|  |     comments = issue.get_comments() | ||||||
|  |     all_comments = [comment.body for comment in comments] | ||||||
|  |     all_issues[issue.number] = { | ||||||
|  |         "title": issue.title, | ||||||
|  |         "body": issue.body, | ||||||
|  |         "comments": all_comments, | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | # Create the start of the prompt template with all the issues and bodies. | ||||||
|  | system_issues_prompt = "" | ||||||
|  | for issue_number, issue_data in all_issues.items(): | ||||||
|  |     system_issues_prompt += f""" | ||||||
|  | #{issue_number}: {issue_data['title']} | ||||||
|  |  | ||||||
|  | # body | ||||||
|  |  | ||||||
|  | {issue_data['body']} | ||||||
|  |  | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | """ | ||||||
|  |  | ||||||
|  | print(system_issues_prompt) | ||||||
|  |  | ||||||
|  | # Get the token count for the system_issues_prompt | ||||||
|  | print(f"Token count for system_issues_prompt: {len(system_issues_prompt.split())}") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | # Analyze issues for duplicates using OpenAI's GPT-4 | ||||||
|  | potential_duplicates = [] | ||||||
|  | for issue_number, issue_data in all_issues.items(): | ||||||
|  |     print(f"Analyzing issue #{issue_number} {issue_data['title']} ...") | ||||||
|  |     response = openai.chat.completions.create( | ||||||
|  |         model="gpt-4o", | ||||||
|  |         response_format={ "type": "json_object" }, | ||||||
|  |         messages=[ | ||||||
|  |             { | ||||||
|  |                 "role": "system", | ||||||
|  |                 "content": """You are a distinguished engineer. Your peers | ||||||
|  | keep creating duplicate GitHub issues and you have OCD. You have decided to use your | ||||||
|  | skills to find duplicates for them. You are analyzing the issues in the repository. | ||||||
|  | Your goal is to find potential duplicate GitHub issues. | ||||||
|  | Do not return the current issue as a duplicate of itself. Use the issue title, body, | ||||||
|  | and comments to find potential duplicates. | ||||||
|  |  | ||||||
|  | Whenever you find a potential duplicate, you need to be very very sure that it is a duplicate. | ||||||
|  | Error on the side of caution. If you are not sure, do not return it as a duplicate. | ||||||
|  | Most won't have duplicates and that is fine! You are looking for the ones that do. | ||||||
|  | If you mistakenly return a non-duplicate, you will be penalized to spend time with the | ||||||
|  | interns helping them learn to exit vim. You do not want to do that. | ||||||
|  |  | ||||||
|  | Check and make sure that no one else has already commented that the issue is a duplicate. | ||||||
|  | If they have commented, you should not return it as a duplicate. | ||||||
|  |  | ||||||
|  | Your confidence level must be over 90% to return an issue as a duplicate. | ||||||
|  |  | ||||||
|  | Take a deep breath and begin your analysis. Your reputation is on the line. | ||||||
|  | Your responses should be formatted as a json array. | ||||||
|  | The following are examples of valid responses: | ||||||
|  |  | ||||||
|  | ```json | ||||||
|  | [] | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | ```json | ||||||
|  | [{"issue_number": 1234, "title": "Issue title"}] | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | Below are the current open issues in the repository. They are formatted as: | ||||||
|  |  | ||||||
|  | #{issue number}: {issue title} | ||||||
|  |  | ||||||
|  | # body | ||||||
|  |  | ||||||
|  | {issue body} | ||||||
|  |  | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | The current open issues in the repository are: | ||||||
|  | """ | ||||||
|  |                 + system_issues_prompt, | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "role": "user", | ||||||
|  |                 "content": f"""Find duplicates for GitHub issue #{issue_number} titled: | ||||||
|  | {issue_data['title']} | ||||||
|  |  | ||||||
|  | # issue body | ||||||
|  | {issue_data['body']} | ||||||
|  |  | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | # issue comments: | ||||||
|  | {issue_data['comments']}""", | ||||||
|  |             }, | ||||||
|  |         ], | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     if len(response.choices) == 0: | ||||||
|  |         print("No duplicate issues found.") | ||||||
|  |         continue | ||||||
|  |  | ||||||
|  |     for response in response.choices: | ||||||
|  |         print(response.message.content) | ||||||
|  |  | ||||||
|  | # Print potential duplicates | ||||||
|  | print("Potential duplicate issues:") | ||||||
|  | for issue_number in potential_duplicates: | ||||||
|  |     issue = all_issues[issue_number] | ||||||
|  |     print( | ||||||
|  |         f"Issue #{issue_number}: {issue['title']} - {repo.html_url}/issues/{issue_number}" | ||||||
|  |     ) | ||||||
							
								
								
									
										31
									
								
								.github/workflows/identify-duplicates.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								.github/workflows/identify-duplicates.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | |||||||
|  | name: Identify Duplicate Issues | ||||||
|  |  | ||||||
|  | on: | ||||||
|  |   schedule: | ||||||
|  |     - cron: '0 0 * * *' # Runs at midnight every day | ||||||
|  |  | ||||||
|  | permissions: | ||||||
|  |   issues: write | ||||||
|  |  | ||||||
|  | jobs: | ||||||
|  |   find-duplicates: | ||||||
|  |     runs-on: ubuntu-latest | ||||||
|  |     steps: | ||||||
|  |       - name: Checkout code | ||||||
|  |         uses: actions/checkout@v3 | ||||||
|  |  | ||||||
|  |       - name: Set up Python | ||||||
|  |         uses: actions/setup-python@v4 | ||||||
|  |         with: | ||||||
|  |           python-version: '3.x' | ||||||
|  |  | ||||||
|  |       - name: Install dependencies | ||||||
|  |         run: | | ||||||
|  |           python -m pip install --upgrade pip | ||||||
|  |           pip install PyGithub openai | ||||||
|  |  | ||||||
|  |       - name: Run script to identify duplicates | ||||||
|  |         env: | ||||||
|  |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||||
|  |           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||||||
|  |         run: python .github/scripts/find_duplicate_issues.py | ||||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -4,6 +4,7 @@ | |||||||
| /node_modules | /node_modules | ||||||
| /.pnp | /.pnp | ||||||
| .pnp.js | .pnp.js | ||||||
|  | venv | ||||||
|  |  | ||||||
| # testing | # testing | ||||||
| /coverage | /coverage | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	