Skip to content

Commit 37612b8

Browse files
committed
Use a git_diff_context and check the token size for review and commit functions.
1 parent 4b5db06 commit 37612b8

File tree

3 files changed

+63
-45
lines changed

3 files changed

+63
-45
lines changed

aicodebot/cli.py

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from aicodebot import version as aicodebot_version
2-
from aicodebot.helpers import exec_and_get_output
2+
from aicodebot.helpers import exec_and_get_output, get_token_length, git_diff_context
33
from dotenv import load_dotenv
44
from langchain.chains import LLMChain
55
from langchain.chat_models import ChatOpenAI
@@ -81,10 +81,10 @@ def alignment(verbose):
8181

8282
@cli.command()
8383
@click.option("-v", "--verbose", count=True)
84-
@click.option("-t", "--max-tokens", type=int, default=250)
84+
@click.option("-t", "--response-token-size", type=int, default=250)
8585
@click.option("-y", "--yes", is_flag=True, default=False, help="Don't ask for confirmation before committing.")
8686
@click.option("--skip-pre-commit", is_flag=True, help="Skip running pre-commit (otherwise run it if it is found).")
87-
def commit(verbose, max_tokens, yes, skip_pre_commit):
87+
def commit(verbose, response_token_size, yes, skip_pre_commit):
8888
"""Generate a git commit message and commit changes after you approve."""
8989
setup_environment()
9090

@@ -99,36 +99,45 @@ def commit(verbose, max_tokens, yes, skip_pre_commit):
9999
# Load the prompt
100100
prompt = load_prompt(Path(__file__).parent / "prompts" / "commit_message.yaml")
101101

102-
# Set up the language model
103-
llm = OpenAI(temperature=0.1, max_tokens=max_tokens)
104-
105-
# Set up the chain
106-
chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
107-
108102
# Get the changes from git
109103
staged_files = exec_and_get_output(["git", "diff", "--name-only", "--cached"])
110-
base_git_diff = ["git", "diff", "-U10"] # Tell diff to provide 10 lines of context
111104
if not staged_files:
112105
# If no files are staged, Assume they want to commit all changed files
113106
exec_and_get_output(["git", "add", "-A"])
114-
# Get the diff for all changes since the last commit
115-
diff = exec_and_get_output(base_git_diff + ["HEAD"])
116107
# Get the list of files to be committed
117108
files = exec_and_get_output(["git", "diff", "--name-only", "--cached"])
118109
else:
119-
# If some files are staged, get the diff for those files
120-
diff = exec_and_get_output(base_git_diff + ["--cached"])
121110
# The list of files to be committed is the same as the list of staged files
122111
files = staged_files
123112

124-
if not diff:
113+
diff_context = git_diff_context()
114+
115+
if not diff_context:
125116
console.print("No changes to commit.")
126117
sys.exit(0)
127118

128-
console.print("The following files will be committed:\n" + files)
119+
# Check the size of the diff context and adjust accordingly
120+
diff_context_token_size = get_token_length(diff_context)
121+
if verbose:
122+
console.print(f"Diff context token size: {diff_context_token_size}")
123+
124+
if diff_context_token_size + response_token_size > 16_000:
125+
console.print("The diff context is too large to review. Bigger models coming soon.")
126+
sys.exit(1)
127+
elif diff_context_token_size + response_token_size > 4_000:
128+
model = "gpt-3.5-turbo-16k" # supports 16k tokens but is a bit slower and more expensive
129+
else:
130+
model = "gpt-3.5-turbo" # supports 4k tokens
131+
132+
# Set up the language model
133+
llm = ChatOpenAI(temperature=0.1, model=model, max_tokens=response_token_size)
129134

135+
# Set up the chain
136+
chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
137+
138+
console.print("The following files will be committed:\n" + files)
130139
with console.status("Thinking", spinner="point"):
131-
response = chain.run(diff)
140+
response = chain.run(diff_context)
132141

133142
# Write the commit message to a temporary file
134143
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp:
@@ -212,41 +221,42 @@ def fun_fact(verbose):
212221

213222

214223
@cli.command
215-
@click.option("--commit", "-c", help="The commit hash to review.")
216-
@click.option("--verbose", "-v")
224+
@click.option("-c", "--commit", help="The commit hash to review.")
225+
@click.option("-v", "--verbose", count=True)
217226
def review(commit, verbose):
218227
"""Use AI to do a code review, with [un]staged changes, or a specified commit."""
219228
setup_environment()
220229

221-
if commit:
222-
# If a commit hash is specified, get the diff for that commit
223-
diff = exec_and_get_output(["git", "show", commit])
224-
else:
225-
# If no commit hash is specified, get the diff for changes, staged or not
226-
staged_files = exec_and_get_output(["git", "diff", "--name-only", "--cached"])
227-
base_git_diff = ["git", "diff", "-U10"] # Tell diff to provide 10 lines of context
228-
if not staged_files:
229-
# Get the diff for all changes since the last commit
230-
diff = exec_and_get_output(base_git_diff + ["HEAD"])
231-
else:
232-
# If some files are staged, get the diff for those files
233-
diff = exec_and_get_output(base_git_diff + ["--cached"])
234-
235-
if not diff:
236-
console.print("No changes to commit.")
237-
sys.exit(0)
230+
diff_context = git_diff_context(commit)
231+
if not diff_context:
232+
console.print("No changes to commit.")
233+
sys.exit(0)
238234

239235
# Load the prompt
240236
prompt = load_prompt(Path(__file__).parent / "prompts" / "review.yaml")
241237

238+
# Check the size of the diff context and adjust accordingly
239+
response_token_size = DEFAULT_MAX_TOKENS / 2
240+
diff_context_token_size = get_token_length(diff_context)
241+
if verbose:
242+
console.print(f"Diff context token size: {diff_context_token_size}")
243+
244+
if diff_context_token_size + response_token_size > 16_000:
245+
console.print("The diff context is too large to review. Bigger models coming soon.")
246+
sys.exit(1)
247+
elif diff_context_token_size + response_token_size > 4_000:
248+
model = "gpt-3.5-turbo-16k" # supports 16k tokens but is a bit slower and more expensive
249+
else:
250+
model = "gpt-3.5-turbo" # supports 4k tokens
251+
242252
# Set up the language model
243-
llm = OpenAI(temperature=0.1, max_tokens=DEFAULT_MAX_TOKENS)
253+
llm = ChatOpenAI(temperature=0.1, model=model, max_tokens=response_token_size)
244254

245255
# Set up the chain
246256
chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
247257

248258
with console.status("Reviewing", spinner="point"):
249-
response = chain.run(diff)
259+
response = chain.run(diff_context)
250260
console.print(response, style=bot_style)
251261

252262

aicodebot/prompts/commit_message.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
_type: prompt
22
template_format: f-string
3-
input_variables: ["diff"]
3+
input_variables: ["diff_context"]
44
template: |
5-
I have a diff of a code change that I need to commit to a git repository. The diff is as follows:
5+
I have a diff of a code change that I need to commit to a git repository. The relevant diff context is as follows,
6+
between the BEGIN DIFF and END DIFF markers:
67
78
BEGIN DIFF
8-
{diff}
9+
{diff_context}
910
END DIFF
1011
1112
Generate a commit message for me. The commit message should follow best practices,
1213
which means it should have a short, single-line summary, followed by a blank line, and then a more
1314
detailed explanatory text, but only if necessary.
1415
Avoid redundancy between the summary line and the explanatory text. Don't repeat yourself.
1516
If the detailed explanatory text is not necessary, then omit it and just do the summary
17+
Use imperative mood for the commit message, e.g. "Add feature" instead of "Added feature".
1618
1719
The text can be in GitHub-flavored markdown format.
1820

aicodebot/prompts/review.yaml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
_type: prompt
22
template_format: f-string
3-
input_variables: ["diff"]
3+
input_variables: ["diff_context"]
44
template: |
55
You are an expert code reviewer.
66
You know how to give constructive feedback.
77
You know how to give feedback that is actionable.
88
You know how to give feedback that is kind.
99
You know how to give feedback that is specific.
10+
Contextually appropriate emojis are encouraged, but not required.
1011
11-
Review this code change:
12+
DO NOT give comments that discuss formatting, as those will be handled with pre-commit with the black and isort hooks.
13+
DO NOT respond with line numbers, use function names or file names instead (you're going to be wrong about the line numbers anyway).
14+
15+
Review this code change. The relevant diff context is as follows, between the BEGIN DIFF and END DIFF markers:
1216
1317
BEGIN DIFF
14-
{diff}
18+
{diff_context}
1519
END DIFF
1620
17-
If the changes look good and don't require any feedback, then just respond with "LGTM" (looks good to me).
21+
The main focus is to tell the author how they could make the code better.
22+
23+
If the changes look good overall and don't require any feedback, then just respond with "LGTM" (looks good to me).

0 commit comments

Comments
 (0)