Compare commits
	
		
			1 Commits
		
	
	
		
			lf94/speci
			...
			2469
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a905874930 | 
							
								
								
									
										141
									
								
								.github/workflows/find_duplicate_issues.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										141
									
								
								.github/workflows/find_duplicate_issues.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							| @ -0,0 +1,141 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import os | ||||
| import openai | ||||
| import json | ||||
| from github import Github | ||||
|  | ||||
| # Initialize GitHub and OpenAI clients | ||||
| if not os.getenv("GITHUB_TOKEN"): | ||||
|     print("Please set the GITHUB_TOKEN environment variable.") | ||||
|     exit(1) | ||||
| g = Github(os.getenv("GITHUB_TOKEN")) | ||||
|  | ||||
| if not os.getenv("OPENAI_API_KEY"): | ||||
|     print("Please set the OPENAI_API_KEY environment variable.") | ||||
|     exit(1) | ||||
| openai.api_key = os.getenv("OPENAI_API_KEY") | ||||
|  | ||||
| # Target repository | ||||
| repo_name = os.getenv("GITHUB_REPOSITORY") | ||||
| if not repo_name: | ||||
|     print("Please set the GITHUB_REPOSITORY environment variable.") | ||||
|     exit(1) | ||||
|  | ||||
| repo = g.get_repo(repo_name) | ||||
|  | ||||
| # Fetch all issues and comments | ||||
| issues = repo.get_issues(state="open") | ||||
| all_issues = {} | ||||
| for issue in issues: | ||||
|     comments = issue.get_comments() | ||||
|     all_comments = [comment.body for comment in comments] | ||||
|     all_issues[issue.number] = { | ||||
|         "title": issue.title, | ||||
|         "body": issue.body, | ||||
|         "comments": all_comments, | ||||
|     } | ||||
|  | ||||
| # Create the start of the prompt template with all the issues and bodies. | ||||
| system_issues_prompt = "" | ||||
| for issue_number, issue_data in all_issues.items(): | ||||
|     system_issues_prompt += f""" | ||||
| #{issue_number}: {issue_data['title']} | ||||
|  | ||||
| # body | ||||
|  | ||||
| {issue_data['body']} | ||||
|  | ||||
| --- | ||||
|  | ||||
| """ | ||||
|  | ||||
| print(system_issues_prompt) | ||||
|  | ||||
| # Get the token count for the system_issues_prompt | ||||
| print(f"Token count for system_issues_prompt: {len(system_issues_prompt.split())}") | ||||
|  | ||||
|  | ||||
| # Analyze issues for duplicates using OpenAI's GPT-4 | ||||
| potential_duplicates = [] | ||||
| for issue_number, issue_data in all_issues.items(): | ||||
|     print(f"Analyzing issue #{issue_number} {issue_data['title']} ...") | ||||
|     response = openai.chat.completions.create( | ||||
|         model="gpt-4o", | ||||
|         response_format={ "type": "json_object" }, | ||||
|         messages=[ | ||||
|             { | ||||
|                 "role": "system", | ||||
|                 "content": """You are a distinguished engineer. Your peers | ||||
| keep creating duplicate GitHub issues and you have OCD. You have decided to use your | ||||
| skills to find duplicates for them. You are analyzing the issues in the repository. | ||||
| Your goal is to find potential duplicate GitHub issues. | ||||
| Do not return the current issue as a duplicate of itself. Use the issue title, body, | ||||
| and comments to find potential duplicates. | ||||
|  | ||||
| Whenever you find a potential duplicate, you need to be very very sure that it is a duplicate. | ||||
| Error on the side of caution. If you are not sure, do not return it as a duplicate. | ||||
| Most won't have duplicates and that is fine! You are looking for the ones that do. | ||||
| If you mistakenly return a non-duplicate, you will be penalized to spend time with the | ||||
| interns helping them learn to exit vim. You do not want to do that. | ||||
|  | ||||
| Check and make sure that no one else has already commented that the issue is a duplicate. | ||||
| If they have commented, you should not return it as a duplicate. | ||||
|  | ||||
| Your confidence level must be over 90% to return an issue as a duplicate. | ||||
|  | ||||
| Take a deep breath and begin your analysis. Your reputation is on the line. | ||||
| Your responses should be formatted as a json array. | ||||
| The following are examples of valid responses: | ||||
|  | ||||
| ```json | ||||
| [] | ||||
| ``` | ||||
|  | ||||
| ```json | ||||
| [{"issue_number": 1234, "title": "Issue title"}] | ||||
| ``` | ||||
|  | ||||
| Below are the current open issues in the repository. They are formatted as: | ||||
|  | ||||
| #{issue number}: {issue title} | ||||
|  | ||||
| # body | ||||
|  | ||||
| {issue body} | ||||
|  | ||||
| --- | ||||
|  | ||||
| The current open issues in the repository are: | ||||
| """ | ||||
|                 + system_issues_prompt, | ||||
|             }, | ||||
|             { | ||||
|                 "role": "user", | ||||
|                 "content": f"""Find duplicates for GitHub issue #{issue_number} titled: | ||||
| {issue_data['title']} | ||||
|  | ||||
| # issue body | ||||
| {issue_data['body']} | ||||
|  | ||||
| --- | ||||
|  | ||||
| # issue comments: | ||||
| {issue_data['comments']}""", | ||||
|             }, | ||||
|         ], | ||||
|     ) | ||||
|  | ||||
|     if len(response.choices) == 0: | ||||
|         print("No duplicate issues found.") | ||||
|         continue | ||||
|  | ||||
|     for response in response.choices: | ||||
|         print(response.message.content) | ||||
|  | ||||
| # Print potential duplicates | ||||
| print("Potential duplicate issues:") | ||||
| for issue_number in potential_duplicates: | ||||
|     issue = all_issues[issue_number] | ||||
|     print( | ||||
|         f"Issue #{issue_number}: {issue['title']} - {repo.html_url}/issues/{issue_number}" | ||||
|     ) | ||||
							
								
								
									
										31
									
								
								.github/workflows/identify-duplicates.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								.github/workflows/identify-duplicates.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | ||||
| name: Identify Duplicate Issues | ||||
|  | ||||
| on: | ||||
|   schedule: | ||||
|     - cron: '0 0 * * *' # Runs at midnight every day | ||||
|  | ||||
| permissions: | ||||
|   issues: write | ||||
|  | ||||
| jobs: | ||||
|   find-duplicates: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - name: Checkout code | ||||
|         uses: actions/checkout@v3 | ||||
|  | ||||
|       - name: Set up Python | ||||
|         uses: actions/setup-python@v4 | ||||
|         with: | ||||
|           python-version: '3.x' | ||||
|  | ||||
|       - name: Install dependencies | ||||
|         run: | | ||||
|           python -m pip install --upgrade pip | ||||
|           pip install PyGithub openai | ||||
|  | ||||
|       - name: Run script to identify duplicates | ||||
|         env: | ||||
|           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||
|           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||||
|         run: python .github/scripts/find_duplicate_issues.py | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -4,6 +4,7 @@ | ||||
| /node_modules | ||||
| /.pnp | ||||
| .pnp.js | ||||
| venv | ||||
|  | ||||
| # testing | ||||
| /coverage | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	