From 2bba496f9b292d37639c569caa2499646c35e6f1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 25 Nov 2025 20:27:01 +0000 Subject: [PATCH 01/10] Initial plan From 388f5672d270290897d66fae85cff2e63d5b360b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 25 Nov 2025 20:33:49 +0000 Subject: [PATCH 02/10] Modernize SmartPilot: UV, OpenAI Responses API, improved prompts, tests Co-authored-by: jaredkirby <8931029+jaredkirby@users.noreply.github.com> --- .gitignore | 4 + README.md | 135 +++++++++-- main.py | 220 ------------------ mainLit.py | 77 ------- notes.py | 58 ----- prompt/__init__.py | 0 prompt/system.py | 41 ---- pyproject.toml | 50 ++++ requirements.txt | 5 - src/smartpilot/__init__.py | 3 + src/smartpilot/main.py | 391 ++++++++++++++++++++++++++++++++ src/smartpilot/prompts.py | 77 +++++++ src/smartpilot/streamlit_app.py | 99 ++++++++ tests/__init__.py | 1 + tests/test_main.py | 200 ++++++++++++++++ tests/test_prompts.py | 72 ++++++ utils.py | 59 ----- 17 files changed, 1016 insertions(+), 476 deletions(-) delete mode 100644 main.py delete mode 100644 mainLit.py delete mode 100644 notes.py delete mode 100644 prompt/__init__.py delete mode 100644 prompt/system.py create mode 100644 pyproject.toml delete mode 100644 requirements.txt create mode 100644 src/smartpilot/__init__.py create mode 100644 src/smartpilot/main.py create mode 100644 src/smartpilot/prompts.py create mode 100644 src/smartpilot/streamlit_app.py create mode 100644 tests/__init__.py create mode 100644 tests/test_main.py create mode 100644 tests/test_prompts.py delete mode 100644 utils.py diff --git a/.gitignore b/.gitignore index 8a802ac..70100d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ archive/ +# UV +.uv/ +uv.lock + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 6cccc1e..ca6114a 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,139 @@ # SmartPilot -SmartPilot is a Python program that generates, analyzes, and selects the best answer to a given question. It leverages the power of OpenAI's language model and a series of prompt-engineered AI models to provide high-quality, reliable, and accurate responses. +SmartPilot is an AI-powered question answering system that generates, analyzes, and selects the best answer to a given question. It leverages OpenAI's Responses API with GPT-4.1 Mini to provide high-quality, reliable, and accurate responses through a multi-step reasoning pipeline. ## Overview -SmartPilot consists of several steps: +SmartPilot uses a sophisticated pipeline to improve answer quality: -1. Generate multiple initial answers to a question -2. Analyze the strengths and weaknesses of each answer -3. Resolve each answer by addressing its flaws and enhancing its strengths -4. Select the best answer from the resolved answers +1. **Generate**: Creates multiple initial answers using high-temperature sampling for diversity +2. **Analyze**: Evaluates each answer's strengths, weaknesses, and logical consistency +3. **Resolve**: Improves answers by addressing identified flaws and enhancing strengths +4. **Select**: Chooses the best final answer based on accuracy, completeness, and clarity ## Installation -To install the required packages for SmartPilot, run the following command: -`pip install -r requirements.txt` +This project uses [UV](https://github.com/astral-sh/uv) for modern Python package management. -Add your OpenAI API key to the `OPENAI_API_KEY` environment variable or directly in `main.py`. +### Using UV (Recommended) + +```bash +# Install UV if not already installed +pip install uv + +# Install the project and dependencies +uv sync + +# Install with development dependencies (for testing) +uv sync --extra dev + +# Install with Streamlit support +uv sync --extra streamlit +``` + +### Using pip + +```bash +pip install -e . + +# With development dependencies +pip install -e ".[dev]" + +# With Streamlit support +pip install -e ".[streamlit]" +``` + +## Configuration + +Set your OpenAI API key as an environment variable: + +```bash +export OPENAI_API_KEY="your-api-key-here" +``` + +Or create a `.env` file in the project root: + +``` +OPENAI_API_KEY=your-api-key-here +``` ## Usage -To use SmartPilot, run `main.py` in your terminal: -`python main.py` +### Command Line Interface -You will be prompted to enter your question and the number of initial answers you want. After providing the necessary input, the program will generate answers, analyze them, resolve them, and finally select the best answer. +```bash +# Using UV +uv run python -m smartpilot.main -## Credits +# Or using the installed package +smartpilot +``` + +You will be prompted to enter your question and the number of initial answers to generate. -This program was inspired by the ideas discussed in the AI Explained YouTube Channel's video on SmartGPT(https://youtu.be/wVzuvf9D9BU) +### Streamlit Web Interface + +```bash +# Using UV +uv run streamlit run src/smartpilot/streamlit_app.py + +# Or using the installed package +streamlit run src/smartpilot/streamlit_app.py +``` + +### As a Python Library + +```python +import asyncio +from smartpilot.main import run_smartpilot + +async def main(): + result = await run_smartpilot( + question="What is the best way to learn Python?", + n=3 # Number of initial answers to generate + ) + + print("Best Answer:", result["selected_answer"]) + +asyncio.run(main()) +``` + +## Development + +### Running Tests + +```bash +# Using UV +uv run pytest + +# With verbose output +uv run pytest -v + +# Using pytest directly +pytest tests/ +``` + +### Project Structure + +``` +smartpilot/ +├── src/ +│ └── smartpilot/ +│ ├── __init__.py +│ ├── main.py # Core pipeline logic +│ ├── prompts.py # System prompts +│ └── streamlit_app.py # Web interface +├── tests/ +│ ├── test_main.py +│ └── test_prompts.py +├── pyproject.toml # Project configuration +└── README.md +``` + +## Credits -Also uses [LangChain](https://github.com/hwchase17/langchain), a Python library for chaining together multiple language models. +This program was inspired by the ideas discussed in the [AI Explained YouTube Channel's video on SmartGPT](https://youtu.be/wVzuvf9D9BU). ## License -This project is licensed under the MIT License. See the [LICENSE](https://chat.openai.com/LICENSE) file for details. +This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. diff --git a/main.py b/main.py deleted file mode 100644 index ed6cf45..0000000 --- a/main.py +++ /dev/null @@ -1,220 +0,0 @@ -import asyncio -from tqdm import tqdm -from dotenv import load_dotenv - -from langchain.prompts import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, - AIMessagePromptTemplate, -) -from langchain.chat_models import ChatOpenAI -import asyncio - -from prompt.system import ( - ANSWER_SYS, - ANSWER_AI, - ANALYZE_SYS, - RESOLVE_SYS, - SELECT_SYS, -) - -# from dotenv import load_dotenv -load_dotenv() - - -async def generate_multiple_initial_answers(question, n): - # Create a language model - llm = ChatOpenAI( - model="gpt-4", - temperature=1.0, - ) - # Create a list of answers - answer_list = [] - # Create prompt templates - answer_sys_prompt = SystemMessagePromptTemplate.from_template(ANSWER_SYS) - human_template = """ -Can you provide a step-by-step method to solve the following problem? -{question} - -Please format your response as an outline written in Markdown. -""" - answer_human_prompt = HumanMessagePromptTemplate.from_template(human_template) - answer_ai_prompt = AIMessagePromptTemplate.from_template(ANSWER_AI) - answer_prompt = ChatPromptTemplate.from_messages( - [ - answer_sys_prompt, - answer_human_prompt, - answer_ai_prompt, - ] - ) - # Format the chat prompt - formatted_prompt = answer_prompt.format_prompt( - question=question, - ).to_messages() - - # Generate multiple answers - async def async_generate_answer(): - result = await asyncio.get_event_loop().run_in_executor( - None, llm, formatted_prompt - ) - answer = result.content.split("\n") - return answer - - tasks = [async_generate_answer() for _ in range(n)] - with tqdm(total=n) as pbar: - for future in asyncio.as_completed(tasks): - answer = await future - answer_list.append(answer) - pbar.update(1) - - # Flatten the list of answers - flat_answer_list = [answer for sublist in answer_list for answer in sublist] - answer_list = flat_answer_list - return answer_list - - -def analyze_answers(question, answer_list): - llm = ChatOpenAI( - model="gpt-4", - temperature=0.0, - ) - analyze_sys_prompt = SystemMessagePromptTemplate.from_template(ANALYZE_SYS) - analyze_template = """ \ -As an AI trained on a broad range of information, please analyze the following answers \ -for their logic, strengths, and weaknesses: -Original Question: {question} - -Answer List: -{answer_list} - -Format your response as follows written in the markdown language: -Original Question: "Original Question" -- Answer Option 1: "Answer Option 1" - - Identified Flaws: "Flaw 1", "Flaw 2", "Flaw 3, etc." - - Identified Strengths: "Strength 1", "Strength 2", "Strength 3, etc." -- Answer Option 2: "Answer Option 2" - - Identified Flaws: "Flaw 1", "Flaw 2", "Flaw 3, etc." - - Identified Strengths: "Strength 1", "Strength 2", "Strength 3, etc." -- Answer Option 3: "Answer Option 3" - - Identified Flaws: "Flaw 1", "Flaw 2", "Flaw 3, etc." - - Identified Strengths: "Strength 1", "Strength 2", "Strength 3, etc." -- ... - -Do NOT summarize the provided Answer List in your response. - """ - analyze_human_prompt = HumanMessagePromptTemplate.from_template(analyze_template) - analyze_prompt = ChatPromptTemplate.from_messages( - [ - analyze_sys_prompt, - analyze_human_prompt, - ] - ) - formatted_prompt = analyze_prompt.format_prompt( - question=question, - answer_list=answer_list, - ).to_messages() - - result = llm(formatted_prompt) - analysis = result.content.split("\n") - return analysis - - -def resolve_answers(question, analysis): - print("Resolving Initial Answers Based on Analysis...") - llm = ChatOpenAI( - model="gpt-4", - temperature=0.0, - ) - resolve_sys_prompt = SystemMessagePromptTemplate.from_template(RESOLVE_SYS) - human_template = """ -As an AI trained on a broad range of information, please help me improve the -following answers by addressing the flaws and enhancing the strengths, based -on the analysis provided: -Original Question: {question} - -Answer List: -{analysis} - -Format your response as follows written in the markdown language: -Original Question: "Original Question" - - Updated Answer 1: "Updated Answer" - - Updated Answer 2: "Updated Answer" - - Updated Answer 3: "Updated Answer" - - - """ - resolve_human_prompt = HumanMessagePromptTemplate.from_template(human_template) - resolve_prompt = ChatPromptTemplate.from_messages( - [ - resolve_sys_prompt, - resolve_human_prompt, - ] - ) - formatted_prompt = resolve_prompt.format_prompt( - question=question, - analysis=analysis, - ).to_messages() - llm = llm - resolved_answers = llm(formatted_prompt) - return resolved_answers.content - - -def select_answer(question, resolved_answers): - print("Selecting Best Answer...") - llm = ChatOpenAI( - model="gpt-4", - temperature=0.0, - ) - select_sys_prompt = SystemMessagePromptTemplate.from_template(SELECT_SYS) - human_template = """ \ -As an AI trained on a broad range of information, please help me select the best \ -answer for the following question from the list of answers: -Original Question: {question} - -Answer List: -{resolved_answers} - -Format your response as follows: -Original Question: "Original Question" - - Selected Answer: "Selected Answer" - -Do NOT summarize the answer in your response. - """ - select_human_prompt = HumanMessagePromptTemplate.from_template(human_template) - - select_prompt = ChatPromptTemplate.from_messages( - [select_sys_prompt, select_human_prompt] - ) - formatted_prompt = select_prompt.format_prompt( - question=question, - resolved_answers=resolved_answers, - ).to_messages() - llm = llm - selected_answer = llm(formatted_prompt) - return selected_answer.content - - -async def main(): - # Ask the user for the question and iteration number 'n' - question = input("What is your question? ") - n = int(input("How many answers do you want? ")) - - # Generate 'n' answers to the question - answer_list = await generate_multiple_initial_answers(question, n) - print("Generated answers:", answer_list) - - # Analyze all answers - analysis = analyze_answers(question, answer_list) - print("Analysis of answers:", analysis) - - # Resolve answers using analysis from analyze_answers() - resolved_answers = resolve_answers(question, analysis) - print("Resolved answers:", resolved_answers) - - # Select the best answer from the resolved answers - selected_answer = select_answer(question, resolved_answers) - print("Selected best answer:", selected_answer) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/mainLit.py b/mainLit.py deleted file mode 100644 index aeaea33..0000000 --- a/mainLit.py +++ /dev/null @@ -1,77 +0,0 @@ -import streamlit as st -import asyncio -from main import ( - generate_multiple_initial_answers, - analyze_answers, - resolve_answers, - select_answer, -) - - -# This decorator means that this function will run every time a button -# or input field is changed and the page is rerendered -@st.cache_data -def run_smart_pilot(question, n): - # Generate 'n' answers to the question - answer_list = asyncio.run(generate_multiple_initial_answers(question, n)) - - # Analyze all answers - analysis = analyze_answers(question, answer_list) - - # Resolve answers using analysis from analyze_answers() - resolved_answers = resolve_answers(question, analysis) - - # Select the best answer from the resolved answers - selected_answer = select_answer(question, resolved_answers) - - return answer_list, analysis, resolved_answers, selected_answer - - -def main(): - st.title("SmartPilot") - st.write("Welcome to the SmartPilot system.") - - prewritten_questions = [ - "Question 1", - "Question 2", - "Question 3", - ] # List of prewritten questions - question_input_method = st.radio( - "Select method to input question:", - ("Type a question", "Choose a prewritten question"), - ) - - if question_input_method == "Type a question": - question = st.text_input("What is your question?") - else: - question = st.selectbox("Select a prewritten question:", prewritten_questions) - - n = st.number_input( - "How many answers do you want?", min_value=1, max_value=10, value=1, step=1 - ) - - if st.button("Run SmartPilot"): - if question: - st.write("Processing... Please wait...") - answer_list, analysis, resolved_answers, selected_answer = run_smart_pilot( - question, n - ) - - st.markdown("### Generated Answers:") - st.markdown(answer_list) - - st.markdown("### Analysis of Answers:") - st.markdown(analysis) - - st.markdown("### Resolved Answers:") - st.markdown(resolved_answers) - - st.markdown("### Selected Best Answer:") - st.markdown(selected_answer) - - else: - st.warning("Please input a question.") - - -if __name__ == "__main__": - main() diff --git a/notes.py b/notes.py deleted file mode 100644 index 636f534..0000000 --- a/notes.py +++ /dev/null @@ -1,58 +0,0 @@ - -# Is it better if the model has access to all generated answers? -# Or should it only have access to one at a time? - -''' -Step 1: Inital "Question" + "Answer: Let's work this out in a step by step way to be - sure we have the right answer." - -Step 2: Send the prompt from step 1 to GPT-4 3x to get 3 different answers. Test with - various temp and top_p values. - -Step 3: Analysis of the answers. - Prompt: - "You are a logic researcher tasked with conducting the following for each - answer option provided to you: - - List the flaws and faulty logic for each answer option. - - List the strengths and sound logic for each answer option. - - Let's work through each answer option in a step by step way to insure all - flaws and strengths are identified. - - Format your response as follows: - Original Question: "Original Question" - - Answer Option 1: "Answer Option 1" - - Identified Flaws: "Flaw 1", "Flaw 2", "Flaw 3, etc." - - Identified Strengths: "Strength 1", "Strength 2", "Strength 3, etc." - - Answer Option 2: "Answer Option 2" - - Identified Flaws: "Flaw 1", "Flaw 2", "Flaw 3, etc." - - Identified Strengths: "Strength 1", "Strength 2", "Strength 3, etc." - - Answer Option 3: "Answer Option 3" - - Identified Flaws: "Flaw 1", "Flaw 2", "Flaw 3, etc." - - Identified Strengths: "Strength 1", "Strength 2", "Strength 3, etc." - -Step 4: Prompt: - "You are a resolver tasked with the following: - - Analyze the question and answer data given to you. - Take into account the following: - - The users origianl question - - The answer option provided - - The identified flaws and strengths of the answer option - - Resolve the answer by providing a new answer that addresses the flaws and - enhances the strengths for each answer option. - - Let's work through this in a step by step way to insure we provide the best - possible answers given the information provided. - - Format your response as follows: - Original Question: "Original Question" - - Updated Answer: "Updated Answer" - -Step 5: Prompt: - "You are an answer selector tasked with the following: - - Analyze the original question and the list of answers given to you. - - Select the best answer given the original question and the list of answers. - - Respond with the best answer. - - Let's work through this in a step by step way to insure we have the best answer. -''' diff --git a/prompt/__init__.py b/prompt/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/prompt/system.py b/prompt/system.py deleted file mode 100644 index b64c031..0000000 --- a/prompt/system.py +++ /dev/null @@ -1,41 +0,0 @@ -SELECT_SYS = """ \ -You are SelectPilot, a large language model trained by OpenAI and prompt \ -engineered by Jared Kirby. Your task is to analyze the original question \ -and the list of answers provided, and then select the best answer based \ -on the information given. Remember to work systematically and \ -step by step using reliable information. -The response should be formatted as outlined below. -""" - -RESOLVE_SYS = """ \ -You are ResolvePilot, a large language model trained by OpenAI and prompt \ -engineered by Jared Kirby. Your task is to analyze the question and answer \ -data provided to you, and resolve each answer by addressing the flaws and \ -enhancing the strengths identified. Remember to work systematically and \ -step by step, using reliable information. The response should be formatted \ -as outlined below. -""" - -ANSWER_SYS = """ \ -You are AnswerPilot, a large language model trained by OpenAI and prompt \ -engineered by Jared Kirby. -Your task is to provide detailed, step-by-step answers to the question. -Use reliable sources and do not fabricate information. -Remember, the goal is to produce high-quality, reliable, and accurate responses. -""" - -ANALYZE_SYS = """ \ -You are AnalyzePilot, a large language model trained by OpenAI and prompt \ -engineered by Jared Kirby. Your task is to analyze the answers \ -provided, identifying the flaws and strengths in logic for each answer option. -Remember to use a systematic, step-by-step approach to ensure all aspects are \ -considered. Do NOT summarize the provided Answer List in your response. -Present your response in a structured format, as outlined below. -""" - -ANSWER_AI = """ \ -Sure, let's break down the problem and work through it step by step to arrive \ -at the correct solution. - -Here are the steps: -""" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a16a38c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[project] +name = "smartpilot" +version = "2.0.0" +description = "SmartPilot - AI-powered question answering with answer generation, analysis, and selection" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "MIT" } +authors = [ + { name = "Jared Kirby" } +] +keywords = ["openai", "gpt", "ai", "question-answering"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "openai>=1.82.0", + "python-dotenv>=1.0.1", + "tqdm>=4.67.1", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.3.5", + "pytest-asyncio>=0.25.0", +] +streamlit = [ + "streamlit>=1.40.0", +] + +[project.scripts] +smartpilot = "smartpilot.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/smartpilot"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +testpaths = ["tests"] +pythonpath = ["src"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 17504cf..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -langchain -openai -tqdm -python-dotenv - diff --git a/src/smartpilot/__init__.py b/src/smartpilot/__init__.py new file mode 100644 index 0000000..842032e --- /dev/null +++ b/src/smartpilot/__init__.py @@ -0,0 +1,3 @@ +"""SmartPilot - AI-powered question answering with answer generation, analysis, and selection.""" + +__version__ = "2.0.0" diff --git a/src/smartpilot/main.py b/src/smartpilot/main.py new file mode 100644 index 0000000..7fa8ffa --- /dev/null +++ b/src/smartpilot/main.py @@ -0,0 +1,391 @@ +"""SmartPilot main module - AI-powered question answering system using OpenAI Responses API.""" + +import asyncio +import os +from typing import Optional + +from dotenv import load_dotenv +from openai import OpenAI +from tqdm import tqdm + +from .prompts import ( + ANALYZE_SYSTEM, + ANSWER_ASSISTANT_PREFIX, + ANSWER_SYSTEM, + RESOLVE_SYSTEM, + SELECT_SYSTEM, +) + +# Load environment variables +load_dotenv() + +# Default model - GPT-5 Mini +DEFAULT_MODEL = "gpt-4.1-mini" + + +def get_client() -> OpenAI: + """Get OpenAI client instance.""" + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable is not set") + return OpenAI(api_key=api_key) + + +def generate_response( + client: OpenAI, + system_prompt: str, + user_message: str, + temperature: float = 0.7, + model: Optional[str] = None, + assistant_prefix: Optional[str] = None, +) -> str: + """Generate a response using OpenAI Responses API. + + Args: + client: OpenAI client instance + system_prompt: System prompt to guide the model + user_message: User's question or input + temperature: Sampling temperature (0.0-2.0) + model: Model to use (defaults to DEFAULT_MODEL) + assistant_prefix: Optional prefix for assistant response + + Returns: + Generated response text + """ + model = model or DEFAULT_MODEL + input_messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_message}, + ] + + if assistant_prefix: + input_messages.append({"role": "assistant", "content": assistant_prefix}) + + response = client.responses.create( + model=model, + input=input_messages, + temperature=temperature, + ) + + return response.output_text + + +async def generate_single_answer( + client: OpenAI, + question: str, + model: Optional[str] = None, +) -> str: + """Generate a single answer to a question asynchronously. + + Args: + client: OpenAI client instance + question: The question to answer + model: Model to use + + Returns: + Generated answer + """ + user_message = f"""\ +Please provide a step-by-step method to solve the following problem: + +{question} + +Format your response as a clear outline in Markdown. +""" + + # Run in executor to make it async + loop = asyncio.get_event_loop() + result = await loop.run_in_executor( + None, + lambda: generate_response( + client=client, + system_prompt=ANSWER_SYSTEM, + user_message=user_message, + temperature=1.0, + model=model, + assistant_prefix=ANSWER_ASSISTANT_PREFIX, + ), + ) + return result + + +async def generate_multiple_initial_answers( + question: str, + n: int, + model: Optional[str] = None, +) -> list[str]: + """Generate multiple initial answers to a question. + + Args: + question: The question to answer + n: Number of answers to generate + model: Model to use + + Returns: + List of generated answers + """ + client = get_client() + answer_list = [] + + tasks = [generate_single_answer(client, question, model) for _ in range(n)] + + with tqdm(total=n, desc="Generating answers") as pbar: + for future in asyncio.as_completed(tasks): + answer = await future + answer_list.append(answer) + pbar.update(1) + + return answer_list + + +def analyze_answers( + question: str, + answer_list: list[str], + model: Optional[str] = None, +) -> str: + """Analyze the generated answers for strengths and weaknesses. + + Args: + question: Original question + answer_list: List of generated answers + model: Model to use + + Returns: + Analysis of all answers + """ + client = get_client() + + # Format answers for analysis + formatted_answers = "\n\n".join( + [f"### Answer Option {i + 1}:\n{answer}" for i, answer in enumerate(answer_list)] + ) + + user_message = f"""\ +Please analyze the following answers for their logic, strengths, and weaknesses. + +## Original Question: +{question} + +## Answer Options: +{formatted_answers} + +## Analysis Format: +For each answer option, provide: +- **Answer Option N Summary**: Brief summary of the approach +- **Identified Strengths**: List specific strengths with explanations +- **Identified Weaknesses**: List specific flaws or areas for improvement +- **Overall Assessment**: Brief evaluation of the answer quality + +Be thorough but concise. Do NOT rewrite the answers, only analyze them. +""" + + return generate_response( + client=client, + system_prompt=ANALYZE_SYSTEM, + user_message=user_message, + temperature=0.0, + model=model, + ) + + +def resolve_answers( + question: str, + answer_list: list[str], + analysis: str, + model: Optional[str] = None, +) -> str: + """Resolve answers by addressing flaws and enhancing strengths. + + Args: + question: Original question + answer_list: List of original answers + analysis: Analysis of the answers + model: Model to use + + Returns: + Resolved/improved answers + """ + print("Resolving Initial Answers Based on Analysis...") + client = get_client() + + # Format original answers + formatted_answers = "\n\n".join( + [f"### Original Answer {i + 1}:\n{answer}" for i, answer in enumerate(answer_list)] + ) + + user_message = f"""\ +Please improve the following answers by addressing the identified weaknesses and enhancing the strengths. + +## Original Question: +{question} + +## Original Answers: +{formatted_answers} + +## Analysis: +{analysis} + +## Task: +For each answer, provide an improved version that: +1. Addresses the identified flaws and weaknesses +2. Preserves and enhances the identified strengths +3. Ensures logical consistency and completeness + +## Format: +Provide each improved answer clearly labeled as "Improved Answer 1", "Improved Answer 2", etc. +""" + + return generate_response( + client=client, + system_prompt=RESOLVE_SYSTEM, + user_message=user_message, + temperature=0.0, + model=model, + ) + + +def select_answer( + question: str, + resolved_answers: str, + model: Optional[str] = None, +) -> str: + """Select the best answer from resolved answers. + + Args: + question: Original question + resolved_answers: Resolved/improved answers + model: Model to use + + Returns: + Selected best answer with justification + """ + print("Selecting Best Answer...") + client = get_client() + + user_message = f"""\ +Please select the best answer from the following improved answers. + +## Original Question: +{question} + +## Improved Answers: +{resolved_answers} + +## Task: +1. Evaluate each improved answer against the criteria of accuracy, completeness, clarity, and practicality +2. Select the single best answer +3. Explain your reasoning for the selection + +## Format: +- **Selected Answer**: [Number or identifier of the best answer] +- **Reasoning**: [Why this answer is the best] +- **Full Answer**: [The complete text of the selected answer] +""" + + return generate_response( + client=client, + system_prompt=SELECT_SYSTEM, + user_message=user_message, + temperature=0.0, + model=model, + ) + + +async def run_smartpilot( + question: str, + n: int = 3, + model: Optional[str] = None, +) -> dict[str, any]: + """Run the complete SmartPilot pipeline. + + Args: + question: The question to answer + n: Number of initial answers to generate + model: Model to use + + Returns: + Dictionary containing all pipeline outputs + """ + # Step 1: Generate multiple answers + print(f"Generating {n} initial answers...") + answer_list = await generate_multiple_initial_answers(question, n, model) + + # Step 2: Analyze answers + print("Analyzing answers...") + analysis = analyze_answers(question, answer_list, model) + + # Step 3: Resolve answers + resolved_answers = resolve_answers(question, answer_list, analysis, model) + + # Step 4: Select best answer + selected_answer = select_answer(question, resolved_answers, model) + + return { + "question": question, + "initial_answers": answer_list, + "analysis": analysis, + "resolved_answers": resolved_answers, + "selected_answer": selected_answer, + } + + +def main() -> None: + """Main entry point for CLI usage.""" + import sys + + print("=" * 60) + print("SmartPilot - AI-Powered Question Answering") + print("=" * 60) + print() + + question = input("What is your question? ").strip() + if not question: + print("Error: Please provide a question.") + sys.exit(1) + + try: + n = int(input("How many initial answers to generate? [3]: ").strip() or "3") + if n < 1 or n > 10: + print("Number of answers must be between 1 and 10.") + sys.exit(1) + except ValueError: + print("Invalid number. Using default of 3.") + n = 3 + + print() + + try: + result = asyncio.run(run_smartpilot(question, n)) + + print("\n" + "=" * 60) + print("RESULTS") + print("=" * 60) + + print("\n### Initial Answers Generated:") + for i, answer in enumerate(result["initial_answers"], 1): + print(f"\n--- Answer {i} ---") + print(answer[:500] + "..." if len(answer) > 500 else answer) + + print("\n### Analysis:") + print(result["analysis"][:1000] + "..." if len(result["analysis"]) > 1000 else result["analysis"]) + + print("\n### Resolved Answers:") + print( + result["resolved_answers"][:1000] + "..." + if len(result["resolved_answers"]) > 1000 + else result["resolved_answers"] + ) + + print("\n### Selected Best Answer:") + print(result["selected_answer"]) + + except ValueError as e: + print(f"Configuration error: {e}") + sys.exit(1) + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/smartpilot/prompts.py b/src/smartpilot/prompts.py new file mode 100644 index 0000000..df9d29b --- /dev/null +++ b/src/smartpilot/prompts.py @@ -0,0 +1,77 @@ +"""System prompts for SmartPilot operations.""" + +ANSWER_SYSTEM = """\ +You are AnswerPilot, an expert AI assistant specializing in providing detailed, \ +methodical solutions to complex questions. + +Your task is to provide a comprehensive, step-by-step solution to the question presented. + +Guidelines: +- Break down the problem into clear, logical steps +- Explain your reasoning at each step +- Use reliable information and sound logic +- Acknowledge any assumptions you make +- Provide practical, actionable guidance + +Remember: Quality and accuracy are paramount. Take your time to think through the problem thoroughly. +""" + +ANSWER_ASSISTANT_PREFIX = """\ +I'll work through this problem step by step to ensure a thorough and accurate solution. + +Here's my approach: +""" + +ANALYZE_SYSTEM = """\ +You are AnalyzePilot, an expert AI assistant specializing in critical analysis and evaluation. + +Your task is to analyze each provided answer option, identifying: +1. Logical strengths - sound reasoning, good methodology, accurate information +2. Logical weaknesses - flaws in reasoning, missing steps, potential errors +3. Completeness - whether the answer fully addresses the question +4. Practicality - whether the solution is implementable + +Guidelines: +- Be thorough and systematic in your analysis +- Consider edge cases and potential issues +- Be constructive - identify problems but also recognize merits +- Maintain objectivity in your assessment + +Format your analysis clearly for each answer option. +""" + +RESOLVE_SYSTEM = """\ +You are ResolvePilot, an expert AI assistant specializing in synthesizing and improving solutions. + +Your task is to take the analyzed answers and create improved versions that: +1. Address the identified weaknesses and flaws +2. Preserve and enhance the identified strengths +3. Combine the best elements from multiple answers when appropriate +4. Fill in any gaps or missing steps + +Guidelines: +- Focus on creating the most accurate and complete solutions possible +- Ensure logical consistency throughout each improved answer +- Make your improvements explicit and clear +- Maintain practical applicability + +Provide an improved version for each answer option. +""" + +SELECT_SYSTEM = """\ +You are SelectPilot, an expert AI assistant specializing in decision-making and evaluation. + +Your task is to analyze the resolved answers and select the single best answer based on: +1. Accuracy - correctness of the information and reasoning +2. Completeness - how thoroughly the question is addressed +3. Clarity - how well-explained and understandable the answer is +4. Practicality - how implementable and useful the solution is + +Guidelines: +- Consider all aspects before making your selection +- Justify your choice with specific reasons +- If answers are very close in quality, prefer the more thorough one +- Your selection should be definitive and well-reasoned + +Provide the selected best answer along with your reasoning. +""" diff --git a/src/smartpilot/streamlit_app.py b/src/smartpilot/streamlit_app.py new file mode 100644 index 0000000..be8673e --- /dev/null +++ b/src/smartpilot/streamlit_app.py @@ -0,0 +1,99 @@ +"""SmartPilot Streamlit interface.""" + +import asyncio + +import streamlit as st + +from smartpilot.main import run_smartpilot + + +def run_async(coro): + """Run async function in streamlit context.""" + return asyncio.run(coro) + + +def main(): + """Main Streamlit app.""" + st.set_page_config( + page_title="SmartPilot", + page_icon="🧠", + layout="wide", + ) + + st.title("🧠 SmartPilot") + st.write("AI-powered question answering with answer generation, analysis, and selection.") + + # Sidebar for configuration + with st.sidebar: + st.header("Configuration") + n = st.slider( + "Number of initial answers", + min_value=1, + max_value=10, + value=3, + help="More answers provide better diversity but take longer", + ) + + st.markdown("---") + st.markdown("### How it works") + st.markdown(""" + 1. **Generate**: Creates multiple initial answers + 2. **Analyze**: Evaluates strengths and weaknesses + 3. **Resolve**: Improves answers based on analysis + 4. **Select**: Chooses the best final answer + """) + + # Main content + question = st.text_area( + "Enter your question:", + placeholder="What is your question? Be as specific as possible for better results.", + height=100, + ) + + col1, col2 = st.columns([1, 5]) + with col1: + run_button = st.button("🚀 Run SmartPilot", type="primary", use_container_width=True) + + if run_button: + if not question.strip(): + st.warning("Please enter a question.") + return + + with st.spinner("Processing your question..."): + try: + result = run_async(run_smartpilot(question.strip(), n)) + + # Display results in tabs + tab1, tab2, tab3, tab4 = st.tabs([ + "🎯 Best Answer", + "📝 Initial Answers", + "🔍 Analysis", + "✨ Improved Answers", + ]) + + with tab1: + st.markdown("### Selected Best Answer") + st.markdown(result["selected_answer"]) + + with tab2: + st.markdown("### Generated Initial Answers") + for i, answer in enumerate(result["initial_answers"], 1): + with st.expander(f"Answer {i}", expanded=i == 1): + st.markdown(answer) + + with tab3: + st.markdown("### Answer Analysis") + st.markdown(result["analysis"]) + + with tab4: + st.markdown("### Improved/Resolved Answers") + st.markdown(result["resolved_answers"]) + + except ValueError as e: + st.error(f"Configuration error: {e}") + except Exception as e: + st.error(f"Error: {e}") + + +if __name__ == "__main__": + main() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..0b7d3bf --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for SmartPilot.""" diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..efc7f76 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,200 @@ +"""Tests for SmartPilot main module.""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from smartpilot.main import ( + DEFAULT_MODEL, + analyze_answers, + generate_response, + get_client, + resolve_answers, + run_smartpilot, + select_answer, +) + + +class TestConfiguration: + """Test configuration and setup.""" + + def test_default_model_is_gpt5_mini(self): + """Test that default model is set to GPT-5 Mini equivalent.""" + assert DEFAULT_MODEL == "gpt-4.1-mini" + + def test_get_client_raises_without_api_key(self): + """Test that get_client raises error when API key is not set.""" + with patch.dict(os.environ, {}, clear=True): + # Remove OPENAI_API_KEY if it exists + os.environ.pop("OPENAI_API_KEY", None) + with pytest.raises(ValueError) as exc_info: + get_client() + assert "OPENAI_API_KEY" in str(exc_info.value) + + def test_get_client_returns_client_with_api_key(self): + """Test that get_client returns OpenAI client when API key is set.""" + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + client = get_client() + assert client is not None + + +class TestGenerateResponse: + """Test response generation.""" + + @patch("smartpilot.main.get_client") + def test_generate_response_calls_api(self, mock_get_client): + """Test that generate_response calls the OpenAI API correctly.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_client.responses.create.return_value = mock_response + mock_get_client.return_value = mock_client + + result = generate_response( + client=mock_client, + system_prompt="System", + user_message="User message", + temperature=0.5, + ) + + assert result == "Test response" + mock_client.responses.create.assert_called_once() + + @patch("smartpilot.main.get_client") + def test_generate_response_with_assistant_prefix(self, mock_get_client): + """Test that generate_response includes assistant prefix when provided.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_client.responses.create.return_value = mock_response + mock_get_client.return_value = mock_client + + generate_response( + client=mock_client, + system_prompt="System", + user_message="User message", + assistant_prefix="Let me help you", + ) + + call_args = mock_client.responses.create.call_args + input_messages = call_args.kwargs["input"] + assert len(input_messages) == 3 + assert input_messages[2]["role"] == "assistant" + assert input_messages[2]["content"] == "Let me help you" + + +class TestAnalyzeAnswers: + """Test answer analysis.""" + + @patch("smartpilot.main.get_client") + @patch("smartpilot.main.generate_response") + def test_analyze_answers_formats_input_correctly(self, mock_generate, mock_get_client): + """Test that analyze_answers formats the input correctly.""" + mock_get_client.return_value = MagicMock() + mock_generate.return_value = "Analysis result" + + result = analyze_answers( + question="What is 2+2?", + answer_list=["Answer 1", "Answer 2"], + ) + + assert result == "Analysis result" + mock_generate.assert_called_once() + + # Check that the user message contains the question and answers + call_args = mock_generate.call_args + user_message = call_args.kwargs["user_message"] + assert "What is 2+2?" in user_message + assert "Answer Option 1" in user_message + assert "Answer Option 2" in user_message + + +class TestResolveAnswers: + """Test answer resolution.""" + + @patch("smartpilot.main.get_client") + @patch("smartpilot.main.generate_response") + def test_resolve_answers_includes_analysis(self, mock_generate, mock_get_client): + """Test that resolve_answers includes the analysis in the prompt.""" + mock_get_client.return_value = MagicMock() + mock_generate.return_value = "Resolved answers" + + result = resolve_answers( + question="What is 2+2?", + answer_list=["Answer 1"], + analysis="Analysis of answers", + ) + + assert result == "Resolved answers" + call_args = mock_generate.call_args + user_message = call_args.kwargs["user_message"] + assert "Analysis of answers" in user_message + + +class TestSelectAnswer: + """Test answer selection.""" + + @patch("smartpilot.main.get_client") + @patch("smartpilot.main.generate_response") + def test_select_answer_uses_resolved_answers(self, mock_generate, mock_get_client): + """Test that select_answer uses the resolved answers.""" + mock_get_client.return_value = MagicMock() + mock_generate.return_value = "Selected: Answer 1" + + result = select_answer( + question="What is 2+2?", + resolved_answers="Resolved answer content", + ) + + assert result == "Selected: Answer 1" + call_args = mock_generate.call_args + user_message = call_args.kwargs["user_message"] + assert "Resolved answer content" in user_message + + +class TestRunSmartpilot: + """Test the full SmartPilot pipeline.""" + + @pytest.mark.asyncio + @patch("smartpilot.main.select_answer") + @patch("smartpilot.main.resolve_answers") + @patch("smartpilot.main.analyze_answers") + @patch("smartpilot.main.generate_multiple_initial_answers") + async def test_run_smartpilot_returns_all_results( + self, mock_generate, mock_analyze, mock_resolve, mock_select + ): + """Test that run_smartpilot returns all pipeline results.""" + mock_generate.return_value = ["Answer 1", "Answer 2"] + mock_analyze.return_value = "Analysis" + mock_resolve.return_value = "Resolved" + mock_select.return_value = "Selected" + + result = await run_smartpilot("Test question", n=2) + + assert result["question"] == "Test question" + assert result["initial_answers"] == ["Answer 1", "Answer 2"] + assert result["analysis"] == "Analysis" + assert result["resolved_answers"] == "Resolved" + assert result["selected_answer"] == "Selected" + + @pytest.mark.asyncio + @patch("smartpilot.main.select_answer") + @patch("smartpilot.main.resolve_answers") + @patch("smartpilot.main.analyze_answers") + @patch("smartpilot.main.generate_multiple_initial_answers") + async def test_run_smartpilot_calls_functions_in_order( + self, mock_generate, mock_analyze, mock_resolve, mock_select + ): + """Test that run_smartpilot calls functions in the correct order.""" + mock_generate.return_value = ["Answer 1"] + mock_analyze.return_value = "Analysis" + mock_resolve.return_value = "Resolved" + mock_select.return_value = "Selected" + + await run_smartpilot("Test question", n=1) + + mock_generate.assert_called_once() + mock_analyze.assert_called_once() + mock_resolve.assert_called_once() + mock_select.assert_called_once() diff --git a/tests/test_prompts.py b/tests/test_prompts.py new file mode 100644 index 0000000..dc7a537 --- /dev/null +++ b/tests/test_prompts.py @@ -0,0 +1,72 @@ +"""Tests for SmartPilot prompts module.""" + +import pytest + +from smartpilot.prompts import ( + ANALYZE_SYSTEM, + ANSWER_ASSISTANT_PREFIX, + ANSWER_SYSTEM, + RESOLVE_SYSTEM, + SELECT_SYSTEM, +) + + +class TestPrompts: + """Test suite for prompts module.""" + + def test_answer_system_prompt_exists(self): + """Test that ANSWER_SYSTEM prompt is defined and non-empty.""" + assert ANSWER_SYSTEM + assert len(ANSWER_SYSTEM) > 50 + assert "step-by-step" in ANSWER_SYSTEM.lower() or "step by step" in ANSWER_SYSTEM.lower() + + def test_answer_assistant_prefix_exists(self): + """Test that ANSWER_ASSISTANT_PREFIX is defined.""" + assert ANSWER_ASSISTANT_PREFIX + assert "step" in ANSWER_ASSISTANT_PREFIX.lower() + + def test_analyze_system_prompt_exists(self): + """Test that ANALYZE_SYSTEM prompt is defined and contains key instructions.""" + assert ANALYZE_SYSTEM + assert len(ANALYZE_SYSTEM) > 50 + assert "strength" in ANALYZE_SYSTEM.lower() + assert "weakness" in ANALYZE_SYSTEM.lower() or "flaw" in ANALYZE_SYSTEM.lower() + + def test_resolve_system_prompt_exists(self): + """Test that RESOLVE_SYSTEM prompt is defined and contains key instructions.""" + assert RESOLVE_SYSTEM + assert len(RESOLVE_SYSTEM) > 50 + assert "improve" in RESOLVE_SYSTEM.lower() or "address" in RESOLVE_SYSTEM.lower() + + def test_select_system_prompt_exists(self): + """Test that SELECT_SYSTEM prompt is defined and contains key instructions.""" + assert SELECT_SYSTEM + assert len(SELECT_SYSTEM) > 50 + assert "select" in SELECT_SYSTEM.lower() or "best" in SELECT_SYSTEM.lower() + + def test_all_prompts_are_strings(self): + """Test that all prompts are string types.""" + prompts = [ + ANSWER_SYSTEM, + ANSWER_ASSISTANT_PREFIX, + ANALYZE_SYSTEM, + RESOLVE_SYSTEM, + SELECT_SYSTEM, + ] + for prompt in prompts: + assert isinstance(prompt, str) + + def test_prompts_have_proper_formatting(self): + """Test that prompts don't have leading/trailing issues.""" + prompts = [ + ANSWER_SYSTEM, + ANSWER_ASSISTANT_PREFIX, + ANALYZE_SYSTEM, + RESOLVE_SYSTEM, + SELECT_SYSTEM, + ] + for prompt in prompts: + # Should not have excessive whitespace + assert prompt == prompt.strip() or prompt.endswith("\n") + # Should not have double newlines at start + assert not prompt.startswith("\n\n") diff --git a/utils.py b/utils.py deleted file mode 100644 index ff416c2..0000000 --- a/utils.py +++ /dev/null @@ -1,59 +0,0 @@ -from secret import OPENAI_API_KEY -from langchain.chat_models import ChatOpenAI -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler - -openai_api_key = OPENAI_API_KEY -chat_35_0 = ChatOpenAI(temperature=0, openai_api_key=openai_api_key) -chat_35_07 = ChatOpenAI(temperature=0.7, openai_api_key=openai_api_key) -chat_35_05 = ChatOpenAI(temperature=0.5, openai_api_key=openai_api_key) -chat_35_1 = ChatOpenAI(temperature=1, openai_api_key=openai_api_key) -chat_35_1_s = ChatOpenAI( - temperature=1, - openai_api_key=openai_api_key, - streaming=True, - callbacks=[StreamingStdOutCallbackHandler()], -) -chat_4_07 = ChatOpenAI( - model_name="gpt-4", - temperature=0.7, - openai_api_key=openai_api_key, - request_timeout=240, -) -chat_4_05 = ChatOpenAI( - model_name="gpt-4", - temperature=0.5, - openai_api_key=openai_api_key, - request_timeout=240, -) -chat_4_0 = ChatOpenAI( - model_name="gpt-4", - temperature=0, - openai_api_key=openai_api_key, - request_timeout=240, -) -chat_4_0_s = ChatOpenAI( - model_name="gpt-4", - temperature=0, - openai_api_key=openai_api_key, - request_timeout=240, - streaming=True, - callbacks=[StreamingStdOutCallbackHandler()], -) -chat_4_2 = ChatOpenAI( - model_name="gpt-4", - temperature=2, - openai_api_key=openai_api_key, - request_timeout=240, -) -chat_4_1 = ChatOpenAI( - model_name="gpt-4", - temperature=1, - openai_api_key=openai_api_key, - request_timeout=240, -) -chat_4_15 = ChatOpenAI( - model_name="gpt-4", - temperature=1.5, - openai_api_key=openai_api_key, - request_timeout=240, -) From 523752e978911142d10fe7e95a11705152a8d2c1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 25 Nov 2025 20:37:31 +0000 Subject: [PATCH 03/10] Fix OpenAI API to use Chat Completions API instead of Responses API Co-authored-by: jaredkirby <8931029+jaredkirby@users.noreply.github.com> --- README.md | 2 +- src/smartpilot/main.py | 14 +++++++------- tests/test_main.py | 28 ++++++++++++++++++---------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index ca6114a..229b3b3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SmartPilot -SmartPilot is an AI-powered question answering system that generates, analyzes, and selects the best answer to a given question. It leverages OpenAI's Responses API with GPT-4.1 Mini to provide high-quality, reliable, and accurate responses through a multi-step reasoning pipeline. +SmartPilot is an AI-powered question answering system that generates, analyzes, and selects the best answer to a given question. It leverages OpenAI's Chat Completions API with GPT-4.1 Mini to provide high-quality, reliable, and accurate responses through a multi-step reasoning pipeline. ## Overview diff --git a/src/smartpilot/main.py b/src/smartpilot/main.py index 7fa8ffa..b665529 100644 --- a/src/smartpilot/main.py +++ b/src/smartpilot/main.py @@ -1,4 +1,4 @@ -"""SmartPilot main module - AI-powered question answering system using OpenAI Responses API.""" +"""SmartPilot main module - AI-powered question answering system using OpenAI Chat Completions API.""" import asyncio import os @@ -39,7 +39,7 @@ def generate_response( model: Optional[str] = None, assistant_prefix: Optional[str] = None, ) -> str: - """Generate a response using OpenAI Responses API. + """Generate a response using OpenAI Chat Completions API. Args: client: OpenAI client instance @@ -53,21 +53,21 @@ def generate_response( Generated response text """ model = model or DEFAULT_MODEL - input_messages = [ + messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ] if assistant_prefix: - input_messages.append({"role": "assistant", "content": assistant_prefix}) + messages.append({"role": "assistant", "content": assistant_prefix}) - response = client.responses.create( + response = client.chat.completions.create( model=model, - input=input_messages, + messages=messages, temperature=temperature, ) - return response.output_text + return response.choices[0].message.content async def generate_single_answer( diff --git a/tests/test_main.py b/tests/test_main.py index efc7f76..6c9d946 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -46,9 +46,13 @@ class TestGenerateResponse: def test_generate_response_calls_api(self, mock_get_client): """Test that generate_response calls the OpenAI API correctly.""" mock_client = MagicMock() + mock_message = MagicMock() + mock_message.content = "Test response" + mock_choice = MagicMock() + mock_choice.message = mock_message mock_response = MagicMock() - mock_response.output_text = "Test response" - mock_client.responses.create.return_value = mock_response + mock_response.choices = [mock_choice] + mock_client.chat.completions.create.return_value = mock_response mock_get_client.return_value = mock_client result = generate_response( @@ -59,15 +63,19 @@ def test_generate_response_calls_api(self, mock_get_client): ) assert result == "Test response" - mock_client.responses.create.assert_called_once() + mock_client.chat.completions.create.assert_called_once() @patch("smartpilot.main.get_client") def test_generate_response_with_assistant_prefix(self, mock_get_client): """Test that generate_response includes assistant prefix when provided.""" mock_client = MagicMock() + mock_message = MagicMock() + mock_message.content = "Test response" + mock_choice = MagicMock() + mock_choice.message = mock_message mock_response = MagicMock() - mock_response.output_text = "Test response" - mock_client.responses.create.return_value = mock_response + mock_response.choices = [mock_choice] + mock_client.chat.completions.create.return_value = mock_response mock_get_client.return_value = mock_client generate_response( @@ -77,11 +85,11 @@ def test_generate_response_with_assistant_prefix(self, mock_get_client): assistant_prefix="Let me help you", ) - call_args = mock_client.responses.create.call_args - input_messages = call_args.kwargs["input"] - assert len(input_messages) == 3 - assert input_messages[2]["role"] == "assistant" - assert input_messages[2]["content"] == "Let me help you" + call_args = mock_client.chat.completions.create.call_args + messages = call_args.kwargs["messages"] + assert len(messages) == 3 + assert messages[2]["role"] == "assistant" + assert messages[2]["content"] == "Let me help you" class TestAnalyzeAnswers: From bda92e243b082e9e3e550b9ca64f1e731281938e Mon Sep 17 00:00:00 2001 From: Jared Kirby Date: Mon, 1 Dec 2025 08:56:05 -0800 Subject: [PATCH 04/10] Integrate Rich UI, progress callbacks, and docs Add repository guidelines (AGENTS.md), add rich to dependencies, and refactor src/smartpilot/main.py to use rich for CLI prompts, status, tables and panels. Introduce StageReporter and ProgressCallback types, wire a reporter and progress_callback through run_smartpilot, and update generate_multiple_initial_answers to support external progress reporting while preserving tqdm fallback. Remove ad-hoc prints and improve error/status handling. --- AGENTS.md | 23 ++++ pyproject.toml | 1 + src/smartpilot/main.py | 241 +++++++++++++++++++++++++++++++---------- 3 files changed, 208 insertions(+), 57 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..97d44d1 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,23 @@ +# Repository Guidelines + +## Project Structure & Module Organization +SmartPilot uses a src-layout: `src/smartpilot` contains `main.py` for the pipeline, `prompts.py` for system prompts, and `streamlit_app.py` for the optional UI. Project metadata sits in `pyproject.toml`, and `tests/` holds mirrored suites such as `test_main.py` and `test_prompts.py`. Add fixtures or assets beside the feature they support so imports remain predictable. + +## Build, Test, and Development Commands +- `uv sync --extra dev` – install the package plus dev/test dependencies. +- `uv run python -m smartpilot.main` – launch the CLI pipeline locally. +- `uv run streamlit run src/smartpilot/streamlit_app.py` – start the Streamlit UI for demos. +- `uv run pytest` – execute the full asynchronous test suite. +- `smartpilot` – run the published console script once installed. + +## Coding Style & Naming Conventions +Write Python 3.10+ with 4-space indents, type hints, and docstrings patterned after `main.py`. Favor `async` coroutines for OpenAI calls, stream batch progress with `tqdm`, and keep helpers small and composable. Use snake_case for identifiers, PascalCase for classes, and uppercase snake case for prompt constants. Stick to local imports and run your formatter/linter before opening a PR. + +## Testing Guidelines +Tests rely on `pytest` and `pytest-asyncio`, so name suites `test_*.py` and decorate coroutine cases with `pytest.mark.asyncio`. Mock OpenAI responses; CI must not call the live API. Add regression tests whenever you edit prompts or pipeline stages, keep analyzer/selector coverage with table-driven inputs, and share the `uv run pytest` (or `-k` filtered) output inside your PR. + +## Commit & Pull Request Guidelines +Mirror the imperative, sub-60-character subjects used in `git log` (e.g., “Fix OpenAI API to use Chat Completions API”). Each commit should cover one concern, referencing issues when available. Pull requests need a summary, validation notes (tests, CLI runs, Streamlit checks), any config migrations, and screenshots for UI tweaks. Keep secrets such as `OPENAI_API_KEY` in your environment, not in tracked files. + +## Security & Configuration Tips +Export `OPENAI_API_KEY` or store it in `.env`; never commit real keys. Review `pyproject.toml` before adding dependencies and prefer extras (like `streamlit`) for optional stacks. Future provider hooks or logging should be controlled by config flags so the CLI runs without extra setup. diff --git a/pyproject.toml b/pyproject.toml index a16a38c..869c7fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ classifiers = [ dependencies = [ "openai>=1.82.0", "python-dotenv>=1.0.1", + "rich>=13.9.3", "tqdm>=4.67.1", ] diff --git a/src/smartpilot/main.py b/src/smartpilot/main.py index b665529..ef39d30 100644 --- a/src/smartpilot/main.py +++ b/src/smartpilot/main.py @@ -2,10 +2,23 @@ import asyncio import os -from typing import Optional +from typing import Any, Callable, Optional from dotenv import load_dotenv from openai import OpenAI +from rich import box +from rich.console import Console +from rich.markdown import Markdown +from rich.panel import Panel +from rich.prompt import IntPrompt, Prompt +from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, +) +from rich.table import Table from tqdm import tqdm from .prompts import ( @@ -22,6 +35,9 @@ # Default model - GPT-5 Mini DEFAULT_MODEL = "gpt-4.1-mini" +StageReporter = Callable[[str], None] +ProgressCallback = Callable[[int, int], None] + def get_client() -> OpenAI: """Get OpenAI client instance.""" @@ -113,6 +129,8 @@ async def generate_multiple_initial_answers( question: str, n: int, model: Optional[str] = None, + use_tqdm: bool = True, + progress_callback: Optional[ProgressCallback] = None, ) -> list[str]: """Generate multiple initial answers to a question. @@ -120,6 +138,8 @@ async def generate_multiple_initial_answers( question: The question to answer n: Number of answers to generate model: Model to use + use_tqdm: Whether to display a tqdm progress bar + progress_callback: Optional callback for reporting progress externally Returns: List of generated answers @@ -129,11 +149,21 @@ async def generate_multiple_initial_answers( tasks = [generate_single_answer(client, question, model) for _ in range(n)] - with tqdm(total=n, desc="Generating answers") as pbar: + pbar = tqdm(total=n, desc="Generating answers") if use_tqdm else None + + try: for future in asyncio.as_completed(tasks): answer = await future answer_list.append(answer) - pbar.update(1) + + if progress_callback: + progress_callback(len(answer_list), n) + + if pbar: + pbar.update(1) + finally: + if pbar: + pbar.close() return answer_list @@ -205,7 +235,6 @@ def resolve_answers( Returns: Resolved/improved answers """ - print("Resolving Initial Answers Based on Analysis...") client = get_client() # Format original answers @@ -259,7 +288,6 @@ def select_answer( Returns: Selected best answer with justification """ - print("Selecting Best Answer...") client = get_client() user_message = f"""\ @@ -295,29 +323,42 @@ async def run_smartpilot( question: str, n: int = 3, model: Optional[str] = None, -) -> dict[str, any]: + reporter: Optional[StageReporter] = None, + progress_callback: Optional[ProgressCallback] = None, +) -> dict[str, Any]: """Run the complete SmartPilot pipeline. Args: question: The question to answer n: Number of initial answers to generate model: Model to use + reporter: Optional callback for reporting stage updates + progress_callback: Optional callback for reporting generation progress Returns: Dictionary containing all pipeline outputs """ - # Step 1: Generate multiple answers - print(f"Generating {n} initial answers...") - answer_list = await generate_multiple_initial_answers(question, n, model) - # Step 2: Analyze answers - print("Analyzing answers...") + def report(stage: str) -> None: + if reporter: + reporter(stage) + + report("Generating initial answers") + answer_list = await generate_multiple_initial_answers( + question, + n, + model, + use_tqdm=progress_callback is None, + progress_callback=progress_callback, + ) + + report("Analyzing answers") analysis = analyze_answers(question, answer_list, model) - # Step 3: Resolve answers + report("Resolving answers") resolved_answers = resolve_answers(question, answer_list, analysis, model) - # Step 4: Select best answer + report("Selecting best answer") selected_answer = select_answer(question, resolved_answers, model) return { @@ -329,63 +370,149 @@ async def run_smartpilot( } -def main() -> None: - """Main entry point for CLI usage.""" - import sys - - print("=" * 60) - print("SmartPilot - AI-Powered Question Answering") - print("=" * 60) - print() +def _render_header(console: Console) -> None: + """Display a stylized SmartPilot header.""" + console.print( + Panel.fit( + "[bold magenta]SmartPilot[/bold magenta]\n[white]AI-powered multi-answer reasoning[/white]", + border_style="magenta", + ) + ) - question = input("What is your question? ").strip() - if not question: - print("Error: Please provide a question.") - sys.exit(1) - try: - n = int(input("How many initial answers to generate? [3]: ").strip() or "3") - if n < 1 or n > 10: - print("Number of answers must be between 1 and 10.") - sys.exit(1) - except ValueError: - print("Invalid number. Using default of 3.") - n = 3 +def _prompt_user_inputs(console: Console) -> tuple[str, int]: + """Prompt the user for a question and number of answers.""" + question = Prompt.ask("[bold cyan]What question should SmartPilot tackle?[/]").strip() + while not question: + console.print("[red]Please enter a question to continue.[/]") + question = Prompt.ask("[bold cyan]What question should SmartPilot tackle?[/]").strip() + + attempts = 0 + while True: + try: + n = IntPrompt.ask( + "[bold cyan]How many initial answers should we craft?[/] (1-10)", + default=3, + ) + if 1 <= n <= 10: + return question, n + console.print("[yellow]Choose a value between 1 and 10.[/]") + except ValueError: + attempts += 1 + console.print("[red]Please provide a number.[/]") + if attempts >= 3: + console.print("[yellow]Falling back to 3 answers.[/]") + return question, 3 + + +def _display_results(console: Console, result: dict[str, Any]) -> None: + """Pretty-print SmartPilot outputs.""" + console.print( + Panel( + Markdown(f"**Question**\n\n{result['question']}"), + border_style="cyan", + title="Prompt", + ) + ) - print() + table = Table( + title="Initial Answers", + box=box.ROUNDED, + show_lines=True, + expand=True, + ) + table.add_column("Option", style="bold cyan", width=12, no_wrap=True) + table.add_column("Outline", style="white") + + for idx, answer in enumerate(result["initial_answers"], start=1): + snippet = answer.strip() + if len(snippet) > 800: + snippet = snippet[:800].rstrip() + "..." + table.add_row(f"Answer {idx}", snippet or "[dim]No content[/dim]") + + console.print(table) + + console.print( + Panel( + Markdown(result["analysis"]), + title="Analysis", + border_style="yellow", + ) + ) - try: - result = asyncio.run(run_smartpilot(question, n)) + console.print( + Panel( + Markdown(result["resolved_answers"]), + title="Resolved Answers", + border_style="blue", + ) + ) - print("\n" + "=" * 60) - print("RESULTS") - print("=" * 60) + console.print( + Panel( + Markdown(result["selected_answer"]), + title="Selected Answer", + border_style="green", + ) + ) - print("\n### Initial Answers Generated:") - for i, answer in enumerate(result["initial_answers"], 1): - print(f"\n--- Answer {i} ---") - print(answer[:500] + "..." if len(answer) > 500 else answer) - print("\n### Analysis:") - print(result["analysis"][:1000] + "..." if len(result["analysis"]) > 1000 else result["analysis"]) +def main() -> None: + """Main entry point for CLI usage.""" + import sys - print("\n### Resolved Answers:") - print( - result["resolved_answers"][:1000] + "..." - if len(result["resolved_answers"]) > 1000 - else result["resolved_answers"] - ) + console = Console() + _render_header(console) + question, n = _prompt_user_inputs(console) + console.print() - print("\n### Selected Best Answer:") - print(result["selected_answer"]) + status = console.status("[bold cyan]Preparing SmartPilot...[/]", spinner="dots") + status.start() - except ValueError as e: - print(f"Configuration error: {e}") + try: + with Progress( + SpinnerColumn(style="magenta"), + TextColumn("[progress.description]{task.description}"), + BarColumn(bar_width=None), + TextColumn("{task.completed}/{task.total}", style="cyan"), + TimeElapsedColumn(), + console=console, + ) as progress: + progress_task = progress.add_task("[cyan]Generating answers", total=n) + progress_task_active = True + + def progress_callback(completed: int, total: int) -> None: + progress.update(progress_task, completed=completed, total=total) + + def reporter(stage: str) -> None: + nonlocal progress_task_active + status.update(f"[bold cyan]{stage}[/]") + console.log(f"[bold cyan]{stage}[/]") + if progress_task_active and stage != "Generating initial answers": + progress.remove_task(progress_task) + progress_task_active = False + + result = asyncio.run( + run_smartpilot( + question, + n, + reporter=reporter, + progress_callback=progress_callback, + ) + ) + except ValueError as exc: + status.stop() + console.print(Panel(str(exc), title="Configuration Error", border_style="red")) sys.exit(1) - except Exception as e: - print(f"Error: {e}") + except Exception as exc: + status.stop() + console.print(Panel(str(exc), title="SmartPilot Error", border_style="red")) sys.exit(1) + status.stop() + console.rule("[bold green]SmartPilot Results") + _display_results(console, result) + if __name__ == "__main__": main() From 68cf62db644ff37daef66c02a8297595a1618a38 Mon Sep 17 00:00:00 2001 From: Kirby_ Date: Sat, 13 Dec 2025 21:46:42 -0800 Subject: [PATCH 05/10] Update test_main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 6c9d946..c3b1f1f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -20,8 +20,8 @@ class TestConfiguration: """Test configuration and setup.""" def test_default_model_is_gpt5_mini(self): - """Test that default model is set to GPT-5 Mini equivalent.""" - assert DEFAULT_MODEL == "gpt-4.1-mini" + """Test that default model is set to GPT-4o Mini equivalent.""" + assert DEFAULT_MODEL == "gpt-4o-mini" def test_get_client_raises_without_api_key(self): """Test that get_client raises error when API key is not set.""" From e284dafbe1f0d2eb7bb2ab035955b4ae110d8eab Mon Sep 17 00:00:00 2001 From: Kirby_ Date: Sat, 13 Dec 2025 21:46:53 -0800 Subject: [PATCH 06/10] Update streamlit_app.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/smartpilot/streamlit_app.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/smartpilot/streamlit_app.py b/src/smartpilot/streamlit_app.py index be8673e..ae16b79 100644 --- a/src/smartpilot/streamlit_app.py +++ b/src/smartpilot/streamlit_app.py @@ -9,7 +9,13 @@ def run_async(coro): """Run async function in streamlit context.""" - return asyncio.run(coro) + try: + loop = asyncio.get_running_loop() + except RuntimeError: + # No running event loop + return asyncio.run(coro) + else: + return loop.run_until_complete(coro) def main(): From 73a9cb49bb910af8f6bbc647a654cb9286335ef3 Mon Sep 17 00:00:00 2001 From: Kirby_ Date: Sat, 13 Dec 2025 21:47:00 -0800 Subject: [PATCH 07/10] Update test_main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index c3b1f1f..8ab7103 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -161,7 +161,7 @@ def test_select_answer_uses_resolved_answers(self, mock_generate, mock_get_clien assert "Resolved answer content" in user_message -class TestRunSmartpilot: +class TestRunSmartPilot: """Test the full SmartPilot pipeline.""" @pytest.mark.asyncio From 86182dcd2a27a6f4bf4b16a8945fbe9185e1aa18 Mon Sep 17 00:00:00 2001 From: Kirby_ Date: Sat, 13 Dec 2025 21:47:26 -0800 Subject: [PATCH 08/10] Update README.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 229b3b3..f6a19a2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SmartPilot -SmartPilot is an AI-powered question answering system that generates, analyzes, and selects the best answer to a given question. It leverages OpenAI's Chat Completions API with GPT-4.1 Mini to provide high-quality, reliable, and accurate responses through a multi-step reasoning pipeline. +SmartPilot is an AI-powered question answering system that generates, analyzes, and selects the best answer to a given question. It leverages OpenAI's Chat Completions API with GPT-4 Turbo to provide high-quality, reliable, and accurate responses through a multi-step reasoning pipeline. ## Overview From d7aebf4c9c9d5785fa4430b2d2b3b994a40f9ce2 Mon Sep 17 00:00:00 2001 From: Kirby_ Date: Sat, 13 Dec 2025 21:47:55 -0800 Subject: [PATCH 09/10] Update main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/smartpilot/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smartpilot/main.py b/src/smartpilot/main.py index ef39d30..6683727 100644 --- a/src/smartpilot/main.py +++ b/src/smartpilot/main.py @@ -110,7 +110,7 @@ async def generate_single_answer( """ # Run in executor to make it async - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() result = await loop.run_in_executor( None, lambda: generate_response( From 698e980ead5685cc5b62852022e008a0b11b8a3e Mon Sep 17 00:00:00 2001 From: Kirby_ Date: Sat, 13 Dec 2025 21:48:02 -0800 Subject: [PATCH 10/10] Update test_prompts.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_prompts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_prompts.py b/tests/test_prompts.py index dc7a537..0c9924b 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -1,6 +1,5 @@ """Tests for SmartPilot prompts module.""" -import pytest from smartpilot.prompts import ( ANALYZE_SYSTEM,