Refine token size calculation and model selection in Coder class

TechNickAI · TechNickAI · commit da2fb1fc1de8 · 2023-07-16T16:24:26.000-07:00
Resolves #25 In this commit, we've made several adjustments to the `Coder` class in `aicodebot/coder.py` and `aicodebot/cli.py`. The token size calculation now includes a 5% buffer, down from 10%, to account for the occasional underestimation by the `tiktoken` library. The `get_token_length` method now defaults to the `gpt-4` model for token counting, and the debug output has been improved for readability. In `aicodebot/cli.py`, we've adjusted the `model_name` calculation in several methods to include `response_token_size` in the token count. This ensures that the selected model can handle the combined size of the request and response. In the `sidekick` method, we've also introduced a `memory_token_size` to allow for a decent history. These changes should improve the accuracy of model selection and prevent errors when the token count exceeds the model's limit.
diff --git a/aicodebot/cli.py b/aicodebot/cli.py
@@ -59,7 +59,7 @@ def alignment(response_token_size, verbose):
     logger.trace(f"Prompt: {prompt}")
 
     # Set up the language model
-    model_name = Coder.get_llm_model_name(Coder.get_token_length(prompt.template))
+    model_name = Coder.get_llm_model_name(Coder.get_token_length(prompt.template) + response_token_size)
 
     with Live(Markdown(""), auto_refresh=True) as live:
         llm = Coder.get_llm(
@@ -142,7 +142,7 @@ def commit(verbose, response_token_size, yes, skip_pre_commit, files):  # noqa:
 
     # Check the size of the diff context and adjust accordingly
     request_token_size = Coder.get_token_length(diff_context) + Coder.get_token_length(prompt.template)
-    model_name = Coder.get_llm_model_name(request_token_size)
+    model_name = Coder.get_llm_model_name(request_token_size + response_token_size)
     if model_name is None:
         raise click.ClickException(
             f"The diff is too large to generate a commit message ({request_token_size} tokens). 😢"
@@ -303,7 +303,7 @@ def debug(command, verbose):
 
     # Set up the language model
     request_token_size = Coder.get_token_length(error_output) + Coder.get_token_length(prompt.template)
-    model_name = Coder.get_llm_model_name(request_token_size)
+    model_name = Coder.get_llm_model_name(request_token_size + DEFAULT_MAX_TOKENS)
     if model_name is None:
         raise click.ClickException(f"The output is too large to debug ({request_token_size} tokens). 😢")
 
@@ -379,9 +379,8 @@ def review(commit, verbose, output_format, response_token_size, files):
     logger.trace(f"Prompt: {prompt}")
 
     # Check the size of the diff context and adjust accordingly
-    response_token_size = DEFAULT_MAX_TOKENS * 2
     request_token_size = Coder.get_token_length(diff_context) + Coder.get_token_length(prompt.template)
-    model_name = Coder.get_llm_model_name(request_token_size)
+    model_name = Coder.get_llm_model_name(request_token_size + response_token_size)
     if model_name is None:
         raise click.ClickException(f"The diff is too large to review ({request_token_size} tokens). 😢")
 
@@ -432,8 +431,9 @@ def sidekick(request, verbose, response_token_size, files):
 
     # Generate the prompt and set up the model
     prompt = get_prompt("sidekick")
+    memory_token_size = response_token_size * 2  # Allow decent history
     request_token_size = Coder.get_token_length(prompt.template) + Coder.get_token_length(context)
-    model_name = Coder.get_llm_model_name(request_token_size)
+    model_name = Coder.get_llm_model_name(request_token_size + response_token_size + memory_token_size)
     if model_name is None:
         raise click.ClickException(
             f"The file context you supplied is too large ({request_token_size} tokens). 😢 Try again with less files."
@@ -446,7 +446,7 @@ def sidekick(request, verbose, response_token_size, files):
 
     # Set up the chain
     memory = ConversationTokenBufferMemory(
-        memory_key="chat_history", input_key="task", llm=llm, max_token_limit=DEFAULT_MAX_TOKENS
+        memory_key="chat_history", input_key="task", llm=llm, max_token_limit=memory_token_size
     )
     chain = LLMChain(llm=llm, prompt=prompt, memory=memory, verbose=verbose)
     history_file = Path.home() / ".aicodebot_request_history"
@@ -457,8 +457,11 @@ def sidekick(request, verbose, response_token_size, files):
         if request:
             human_input = request
         else:
-            human_input = input_prompt("🤖 ➤ ", history=FileHistory(history_file))
-            if len(human_input) == 1:
+            human_input = input_prompt("🤖 ➤ ", history=FileHistory(history_file)).strip()
+            if not human_input:
+                # Must have been spaces or blank line
+                continue
+            elif len(human_input) == 1:
                 if human_input.lower() == "q":
                     break
                 elif human_input.lower() == "e":
diff --git a/aicodebot/coder.py b/aicodebot/coder.py
@@ -115,8 +115,8 @@ def get_llm_model_name(token_size=0):
             # Pull the list of supported engines from the OpenAI API for this key
             supported_engines = Coder.get_openai_supported_engines()
 
-        # For some unknown reason, tiktoken often underestimates the token size by ~10%, so let's buffer
-        token_size = int(token_size * 1.1)
+        # For some unknown reason, tiktoken often underestimates the token size by ~5%, so let's buffer
+        token_size = int(token_size * 1.05)
 
         for model, max_tokens in model_options.items():
             if model in supported_engines and token_size <= max_tokens:
@@ -130,12 +130,13 @@ def get_llm_model_name(token_size=0):
         return None
 
     @staticmethod
-    def get_token_length(text, model="gpt-3.5-turbo"):
+    def get_token_length(text, model="gpt-4"):
         """Get the number of tokens in a string using the tiktoken library."""
         encoding = tiktoken.encoding_for_model(model)
         tokens = encoding.encode(text)
         token_length = len(tokens)
-        logger.debug(f"Token length for text {text[0:10]}...: {token_length}")
+        short_text = text.strip()[0:20] + "..." if len(text) > 10 else text
+        logger.debug(f"Token length for {short_text}: {token_length}")
         return token_length
 
     @staticmethod