konveyor · JonahSussman · Sep 9, 2025 · Sep 4, 2025 · Sep 5, 2025 · Sep 5, 2025
diff --git a/.github/workflows/stress-test-mcp-server.yml b/.github/workflows/stress-test-mcp-server.yml
@@ -0,0 +1,109 @@
+name: MCP Server Stress Test
+
+on:
+  pull_request:
+    paths:
+      - 'kai_mcp_solution_server/**'
+      - '.github/workflows/stress-test-mcp-server.yml'
+  push:
+    branches:
+      - main
+    paths:
+      - 'kai_mcp_solution_server/**'
+      - '.github/workflows/stress-test-mcp-server.yml'
+  workflow_dispatch:
+    inputs:
+      num_clients:
+        description: 'Number of concurrent clients to test'
+        required: false
+        default: '100'
+
+jobs:
+  stress-test-postgres:
+    name: Stress Test with PostgreSQL
+    runs-on: ubuntu-latest
+
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: kai_user
+          POSTGRES_PASSWORD: kai_password
+          POSTGRES_DB: kai_test_db
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+
+    defaults:
+      run:
+        shell: bash
+        working-directory: ./kai_mcp_solution_server
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          version: "latest"
+
+      - name: Install dependencies
+        run: |
+          uv sync
+          uv pip install pytest-asyncio psycopg2-binary asyncpg
+
+      - name: Run stress test with PostgreSQL backend
+        env:
+          KAI_DB_DSN: "postgresql+asyncpg://kai_user:kai_password@localhost:5432/kai_test_db"
+          KAI_LLM_PARAMS: '{"model": "fake", "responses": ["Test response"]}'
+          MCP_SERVER_URL: "http://localhost:8000"
+          NUM_CONCURRENT_CLIENTS: ${{ github.event.inputs.num_clients || '100' }}
+        run: |
+          echo "Starting MCP server connected to PostgreSQL..."
+          uv run python -m kai_mcp_solution_server --transport streamable-http --host 0.0.0.0 --port 8000 &
+          SERVER_PID=$!
+
+          # Wait for server to be ready
+          echo "Waiting for server to start..."
+          for i in {1..30}; do
+            if curl -s http://localhost:8000/ > /dev/null 2>&1; then
+              echo "Server is ready!"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "Server failed to start in 30 seconds"
+              kill $SERVER_PID || true
+              exit 1
+            fi
+            echo -n "."
+            sleep 1
+          done
+
+          # Run the stress test
+          echo ""
+          echo "Testing with $NUM_CONCURRENT_CLIENTS concurrent clients against PostgreSQL"
+          make test-stress
+          TEST_RESULT=$?
+
+          # Stop the server
+          echo "Stopping MCP server..."
+          kill $SERVER_PID || true
+
+          exit $TEST_RESULT
-          echo "Starting MCP server connected to PostgreSQL..."
-          uv run python -m kai_mcp_solution_server --transport streamable-http --host 0.0.0.0 --port 8000 &
-          SERVER_PID=$!
-
-          # Wait for server to be ready
-          echo "Waiting for server to start..."
-          for i in {1..30}; do
-            if curl -s http://localhost:8000/ > /dev/null 2>&1; then
-              echo "Server is ready!"
-              break
-            fi
-            if [ $i -eq 30 ]; then
-              echo "Server failed to start in 30 seconds"
-              kill $SERVER_PID || true
-              exit 1
-            fi
-            echo -n "."
-            sleep 1
-          done
-
-          # Run the stress test
-          echo ""
-          echo "Testing with $NUM_CONCURRENT_CLIENTS concurrent clients against PostgreSQL"
-          make test-stress
-          TEST_RESULT=$?
-
-          # Stop the server
-          echo "Stopping MCP server..."
-          kill $SERVER_PID || true
-
-          exit $TEST_RESULT
+        run: |
+          set -euo pipefail
+          echo "Starting MCP server connected to PostgreSQL..."
+          uv run python -m kai_mcp_solution_server --transport streamable-http --host 0.0.0.0 --port 8000 &
+          SERVER_PID=$!
+          trap 'echo "Stopping MCP server..."; kill ${SERVER_PID} 2>/dev/null || true; wait ${SERVER_PID} 2>/dev/null || true' EXIT
+
+          # Wait for server to be ready
+          echo "Waiting for server to start..."
+          for i in {1..30}; do
+            if curl -s http://localhost:8000/ > /dev/null 2>&1; then
+              echo "Server is ready!"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "Server failed to start in 30 seconds"
+              kill $SERVER_PID || true
+              exit 1
+            fi
+            echo -n "."
+            sleep 1
+          done
+
+          # Run the stress test
+          echo ""
+          echo "Testing with $NUM_CONCURRENT_CLIENTS concurrent clients against PostgreSQL"
+          make test-stress
+          TEST_RESULT=$?
+
+          # Stop the server
+          echo "Stopping MCP server..."
+          kill $SERVER_PID || true
+          wait $SERVER_PID || true
+
+          exit $TEST_RESULT
-          echo "Starting MCP server connected to PostgreSQL..."
-          uv run python -m kai_mcp_solution_server --transport streamable-http --host 0.0.0.0 --port 8000 &
-          SERVER_PID=$!
-
-          # Wait for server to be ready
-          echo "Waiting for server to start..."
-          for i in {1..30}; do
-            if curl -s http://localhost:8000/ > /dev/null 2>&1; then
-              echo "Server is ready!"
-              break
-            fi
-            if [ $i -eq 30 ]; then
-              echo "Server failed to start in 30 seconds"
-              kill $SERVER_PID || true
-              exit 1
-            fi
-            echo -n "."
-            sleep 1
-          done
-
-          # Run the stress test
-          echo ""
-          echo "Testing with $NUM_CONCURRENT_CLIENTS concurrent clients against PostgreSQL"
-          make test-stress
-          TEST_RESULT=$?
-
-          # Stop the server
-          echo "Stopping MCP server..."
-          kill $SERVER_PID || true
-
-          exit $TEST_RESULT
+        run: |
+          set -euo pipefail
+          echo "Starting MCP server connected to PostgreSQL..."
+          uv run python -m kai_mcp_solution_server --transport streamable-http --host 0.0.0.0 --port 8000 &
+          SERVER_PID=$!
+          trap 'echo "Stopping MCP server..."; kill ${SERVER_PID} 2>/dev/null || true; wait ${SERVER_PID} 2>/dev/null || true' EXIT
+
+          # Wait for server to be ready
+          echo "Waiting for server to start..."
+          for i in {1..30}; do
+            if curl -s http://localhost:8000/ > /dev/null 2>&1; then
+              echo "Server is ready!"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "Server failed to start in 30 seconds"
+              kill $SERVER_PID || true
+              exit 1
+            fi
+            echo -n "."
+            sleep 1
+          done
+
+          # Run the stress test
+          echo ""
+          echo "Testing with $NUM_CONCURRENT_CLIENTS concurrent clients against PostgreSQL"
+          make test-stress
+          TEST_RESULT=$?
+
+          # Stop the server
+          echo "Stopping MCP server..."
+          kill $SERVER_PID || true
+          wait $SERVER_PID || true
+
+          exit $TEST_RESULT
+        timeout-minutes: 10
+
+      - name: Check PostgreSQL connection count
+        if: always()
+        run: |
+          PGPASSWORD=kai_password psql -h localhost -U kai_user -d kai_test_db -c \
+            "SELECT count(*), state FROM pg_stat_activity GROUP BY state;"
+
diff --git a/.gitignore b/.gitignore
@@ -206,3 +206,8 @@ lerna-debug.log*
 
 # Package lock files (optionally ignore)
 # package-lock.json
+
+# Test database files
+tests/test_*.db
+kai_mcp_solution_server/tests/test_*.db
+*.db
diff --git a/kai_mcp_solution_server/Makefile b/kai_mcp_solution_server/Makefile
@@ -86,7 +86,7 @@ run-local:
 	cd $(PROJECT_ROOT) && KAI_DB_DSN='$(KAI_DB_DSN)' KAI_LLM_PARAMS='$(KAI_LLM_PARAMS)' uv run python -m kai_mcp_solution_server --transport streamable-http --host 0.0.0.0 --port 8000 --mount-path=$(MOUNT_PATH)
 
 
-# Run with Podman for testing
+# Run with Podman for testing (flexible - any database via KAI_DB_DSN)
 .PHONY: run-podman
 run-podman: build
 	@echo "Running MCP solution server in Podman..."
@@ -96,7 +96,36 @@ run-podman: build
 		-e KAI_LLM_PARAMS='$(KAI_LLM_PARAMS)' \
 		-e KAI_DB_DSN='$(KAI_DB_DSN)' \
 		$(if $(PODMAN_ARGS),$(PODMAN_ARGS),) \
-		--name kai-mcp-solution-server $(IMAGE) 
+		--name kai-mcp-solution-server $(IMAGE)
+
+# Convenience target: Run with PostgreSQL via podman-compose
+.PHONY: podman-postgres
+podman-postgres: build
+	@echo "Starting MCP solution server with PostgreSQL using podman-compose..."
+	@if [ -z "$(KAI_LLM_PARAMS)" ]; then echo "Error: KAI_LLM_PARAMS is required"; exit 1; fi
+	IMAGE=$(IMAGE) KAI_LLM_PARAMS='$(KAI_LLM_PARAMS)' MOUNT_PATH='$(MOUNT_PATH)' \
+		podman-compose up --force-recreate
+
+
+# Run stress test against external server (e.g., one started with run-local or podman-postgres)
+# First start a server: make run-local or make podman-postgres
+# Then run: make test-stress
+.PHONY: test-stress
+test-stress:
+	@if [ -z "$${MCP_SERVER_URL}" ]; then \
+		echo "Error: MCP_SERVER_URL is required for stress testing."; \
+		echo "Start a server first:"; \
+		echo "  make run-local"; \
+		echo "  make podman-postgres"; \
+		echo "Then set MCP_SERVER_URL and run the stress test:"; \
+		echo "  MCP_SERVER_URL=http://localhost:8000 make test-stress"; \
+		exit 1; \
+	fi
+	@echo "Running stress test against external server at $${MCP_SERVER_URL}"
+	@echo "Testing with $${NUM_CONCURRENT_CLIENTS:-30} concurrent clients..."
+	MCP_SERVER_URL=$${MCP_SERVER_URL} \
+	NUM_CONCURRENT_CLIENTS=$${NUM_CONCURRENT_CLIENTS:-30} \
+	uv run python -m pytest tests/test_multiple_integration.py::TestMultipleIntegration::test_multiple_users -xvs
 
 # Test against HTTP server
 .PHONY: test-http
@@ -124,12 +153,6 @@ test-stdio-ts:
 	@echo "Running TypeScript MCP test client using stdio transport..."
 	cd $(PROJECT_ROOT)/ts-mcp-client && npm run build && node --es-module-specifier-resolution=node dist/client.js --transport stdio --server-path $(PROJECT_ROOT)
 
-# Run pytest integration tests
-.PHONY: pytest
-pytest:
-	@echo "Running MCP integration tests with pytest..."
-	cd $(PROJECT_ROOT) && python -m pytest $(TESTS_DIR)/test_integration.py -v
-
 # Run with test client in separate pod
 .PHONY: run-with-tests
 run-with-tests: build
@@ -178,7 +201,9 @@ help:
 	@echo "  clean          : Remove local container images"
 	@echo "  port-forward   : Forward port to local machine for testing"
 	@echo "  run-local      : Run server locally for testing"
-	@echo "  run-podman     : Run server in Podman container for testing"
+	@echo "  run-podman     : Run server in Podman (uses KAI_DB_DSN)"
+	@echo "  podman-postgres: Run server with PostgreSQL via podman-compose"
+	@echo "  test-stress    : Run stress test against external server (requires MCP_SERVER_URL)"
 	@echo "  test-http      : Test server using HTTP transport (Python client)"
 	@echo "  test-http-ts   : Test server using HTTP transport (TypeScript client)"
 	@echo "  test-stdio     : Test server using STDIO transport (Python client)"
@@ -193,6 +218,10 @@ help:
 	@echo "  HOST           : Server hostname for HTTP tests (default: localhost)"
 	@echo "  PORT           : Server port for HTTP tests (default: 8000)"
 	@echo "  BEARER_TOKEN   : Bearer token for HTTP authentication (optional)"
+	@echo "  KAI_DB_DSN     : Database connection string for server"
+	@echo "  KAI_LLM_PARAMS : LLM configuration as JSON (required)"
+	@echo "  NUM_CONCURRENT_CLIENTS : Number of clients for stress testing (default: 30)"
+	@echo "  MCP_SERVER_URL : URL of external server for test-stress-external (default: http://localhost:8000)"
 	@echo "  EXTRA_VARS     : Any additional variables to pass to Ansible"
 	@echo ""
 	@echo "Example usage:"
@@ -204,6 +233,11 @@ help:
 	@echo "  make deploy EXTRA_VARS='route_tls_enabled=true route_tls_termination=edge route_tls_insecure_policy=Allow'"
 	@echo "  make run-local"
 	@echo "  make test-stdio"
+	@echo "  KAI_LLM_PARAMS='{\"model\":\"gpt-4\"}' make podman-postgres"
+	@echo "  # Stress test against running server:"
+	@echo "  make run-local  # In one terminal"
+	@echo "  MCP_SERVER_URL=http://localhost:8000 NUM_CONCURRENT_CLIENTS=100 make test-stress  # In another"
 	@echo "  make test-http"
 	@echo "  make test-http BEARER_TOKEN='your-jwt-token-here'"
-	@echo "  make test-http HOST=api.example.com PORT=443 BEARER_TOKEN='token'"
+	@echo "  make test-http HOST=api.exayeah, there are a lot of little ones that have been filed. I feel pretty comfortable delaying them to a point releasemple.com PORT=443 BEARER_TOKEN='token'"
+
-	@echo "  make test-http HOST=api.exayeah, there are a lot of little ones that have been filed. I feel pretty comfortable delaying them to a point releasemple.com PORT=443 BEARER_TOKEN='token'"
-	
+	@echo "  make test-http HOST=api.example.com PORT=443 BEARER_TOKEN='token'"
-	@echo "  make test-http HOST=api.exayeah, there are a lot of little ones that have been filed. I feel pretty comfortable delaying them to a point releasemple.com PORT=443 BEARER_TOKEN='token'"
-	
+	@echo "  make test-http HOST=api.example.com PORT=443 BEARER_TOKEN='token'"
diff --git a/kai_mcp_solution_server/src/kai_mcp_solution_server/db/dao.py b/kai_mcp_solution_server/src/kai_mcp_solution_server/db/dao.py
@@ -26,6 +26,7 @@
     mapped_column,
     relationship,
 )
+from sqlalchemy.pool import StaticPool
 
 # from sqlalchemy.orm import relationship as _relationship
 from sqlalchemy.schema import (
@@ -96,17 +97,48 @@ class Base(MappedAsDataclass, DeclarativeBase):
     }
 
 
-async def get_async_engine(url: URL | str, drop_all: bool = False) -> AsyncEngine:
-    engine = create_async_engine(url)
-
+async def drop_all_tables(engine: AsyncEngine) -> None:
+    """Drop all database tables. Should be called separately from engine creation."""
     async with engine.begin() as conn:
-        # NOTE: Only do this in dev/test environments!
-        if drop_all:
-            print("Dropping all tables", file=sys.stderr)
-            await conn.run_sync(drop_everything)
+        print("Dropping all tables", file=sys.stderr)
+        await conn.run_sync(drop_everything)
+
 
+async def ensure_tables_exist(engine: AsyncEngine) -> None:
+    """Ensure all tables exist in the database."""
+    async with engine.begin() as conn:
         await conn.run_sync(Base.metadata.create_all)
 
+
+async def get_async_engine(url: URL | str) -> AsyncEngine:
+    # Convert to string if URL object
+    url_str = str(url)
+
+    # Configure connection pool based on database type
+    if "sqlite" in url_str:
+        # SQLite needs special handling due to its file-based nature
+        # and limited write concurrency (even with WAL mode)
+        engine = create_async_engine(
+            url,
+            connect_args={"check_same_thread": False},
+            poolclass=StaticPool,  # Single shared connection for SQLite
+            echo_pool=False,  # Set to True for debugging connection pool
+        )
+    else:
+        # Most production databases (PostgreSQL, MySQL, MariaDB, etc.)
+        # can handle high concurrency well
+        # NOTE: FastMCP HTTP mode uses a single shared engine for all clients.
+        # These pool settings should handle concurrent requests from all clients.
+        engine = create_async_engine(
+            url,
+            pool_size=20,  # Base connections maintained in pool
+            max_overflow=80,  # Additional connections created as needed (total max = 100)
+            pool_timeout=30,  # Timeout waiting for a connection from pool
+            pool_recycle=3600,  # Recycle connections after 1 hour
+            pool_pre_ping=True,  # Test connections before using
+            echo_pool=False,  # Set to True for debugging connection pool
+        )
+
     return engine