-n default` |
+| `use_in_pass_through` not toggling | UI bug | Set via curl in Step 5 and confirm in Raw JSON |
diff --git a/sample_solutions/HybridSearch/scripts/download_amazon_dataset.py b/sample_solutions/HybridSearch/scripts/download_amazon_dataset.py
new file mode 100644
index 00000000..79cc49dc
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/download_amazon_dataset.py
@@ -0,0 +1,165 @@
+"""
+Download and prepare Amazon Products dataset from HuggingFace
+"""
+import pandas as pd
+import re
+import random
+from datasets import load_dataset
+
+def clean_price(price_str):
+ """Extract numeric price from string"""
+ if not price_str or pd.isna(price_str):
+ return None
+
+ # Remove currency symbols and extract first number
+ match = re.search(r'[\d,]+\.?\d*', str(price_str))
+ if match:
+ price = match.group().replace(',', '')
+ try:
+ return float(price)
+ except:
+ return None
+ return None
+
+def extract_brand(text):
+ """Try to extract brand from text"""
+ if not text or pd.isna(text):
+ return "Generic"
+
+ # Common brand patterns
+ text_str = str(text)
+ words = text_str.split()
+ if len(words) > 0:
+ # Take first word as brand (often the brand name)
+ brand = words[0].strip('.,;:-')
+ if len(brand) > 2:
+ return brand
+ return "Generic"
+
+def generate_rating():
+ """Generate realistic ratings (skewed toward 4-5 stars)"""
+ weights = [0.05, 0.1, 0.15, 0.35, 0.35] # More 4s and 5s
+ return round(random.choices([1.0, 2.0, 3.0, 4.0, 5.0], weights=weights)[0] + random.uniform(0, 0.9), 1)
+
+def generate_review_count(rating):
+ """Generate review count based on rating"""
+ # Higher rated products tend to have more reviews
+ base = random.randint(10, 1000)
+ multiplier = rating / 5.0
+ return int(base * multiplier)
+
+def clean_text(text):
+ """Clean text fields"""
+ if not text or pd.isna(text):
+ return ""
+ text_str = str(text).strip()
+ # Remove excessive whitespace
+ text_str = ' '.join(text_str.split())
+ return text_str[:500] # Limit length
+
+def simplify_category(category):
+ """Simplify category to main category"""
+ if not category or pd.isna(category):
+ return "General"
+
+ cat_str = str(category)
+ # Take first category if multiple
+ if '|' in cat_str:
+ return cat_str.split('|')[0].strip()
+ if '>' in cat_str:
+ return cat_str.split('>')[0].strip()
+ return cat_str.strip()[:50]
+
+def download_and_prepare(output_file='../data/test_datasets/amazon_products.csv', max_products=200):
+ """Download and prepare Amazon dataset"""
+
+ print("📥 Downloading Amazon Products dataset from HuggingFace...")
+ print("This may take a few minutes...")
+
+ try:
+ # Load dataset (train split has 24k products)
+ dataset = load_dataset("ckandemir/amazon-products", split="train", revision="main") # nosec B615
+
+ print(f"✅ Downloaded {len(dataset)} products")
+ print("🔄 Converting to DataFrame...")
+
+ # Convert to pandas DataFrame
+ df = pd.DataFrame(dataset)
+
+ print(f"📊 Columns: {df.columns.tolist()}")
+
+ # Map columns to our schema
+ products = []
+
+ print(f"🔄 Processing products (taking first {max_products})...")
+
+ for idx, row in df.head(max_products).iterrows():
+ # Extract and clean data
+ name = clean_text(row.get('Product Name', ''))
+ if not name or len(name) < 3:
+ continue
+
+ description = clean_text(row.get('Description', ''))
+ category = simplify_category(row.get('Category', ''))
+ price = clean_price(row.get('Selling Price', ''))
+
+ # Skip if no valid price
+ if not price or price <= 0 or price > 10000:
+ continue
+
+ image_url = row.get('Image', '')
+ if not image_url or pd.isna(image_url):
+ # Use placeholder
+ image_url = f"https://via.placeholder.com/400x400/3b82f6/ffffff?text=Product"
+ else:
+ image_url = str(image_url).strip()
+
+ # Extract or generate additional fields
+ brand = extract_brand(name)
+ rating = generate_rating()
+ review_count = generate_review_count(rating)
+
+ product = {
+ 'id': f'amz_{idx:05d}',
+ 'name': name,
+ 'description': description if description else name,
+ 'category': category,
+ 'price': round(price, 2),
+ 'rating': rating,
+ 'review_count': review_count,
+ 'image_url': image_url,
+ 'brand': brand
+ }
+
+ products.append(product)
+
+ if (idx + 1) % 50 == 0:
+ print(f" Processed {idx + 1}/{max_products}...")
+
+ # Create DataFrame and save
+ products_df = pd.DataFrame(products)
+ products_df.to_csv(output_file, index=False)
+
+ # Print statistics
+ print(f"\n✅ Successfully prepared {len(products_df)} products!")
+ print(f"📁 Saved to: {output_file}")
+ print(f"\n📊 Statistics:")
+ print(f" Categories: {products_df['category'].nunique()}")
+ print(f" Price range: ${products_df['price'].min():.2f} - ${products_df['price'].max():.2f}")
+ print(f" Avg price: ${products_df['price'].mean():.2f}")
+ print(f" Rating range: {products_df['rating'].min():.1f} - {products_df['rating'].max():.1f}")
+ print(f"\n🏷️ Top 10 Categories:")
+ print(products_df['category'].value_counts().head(10))
+
+ return products_df
+
+ except Exception as e:
+ print(f"❌ Error: {e}")
+ print(f"Make sure you have the required packages:")
+ print(f" pip install datasets pandas")
+ raise
+
+if __name__ == "__main__":
+ # Download 200 products for testing (you can increase this)
+ download_and_prepare(max_products=200)
+
diff --git a/sample_solutions/HybridSearch/scripts/force_reload.py b/sample_solutions/HybridSearch/scripts/force_reload.py
new file mode 100644
index 00000000..f1e161f2
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/force_reload.py
@@ -0,0 +1,44 @@
+import httpx
+import asyncio
+import json
+
+RETRIEVAL_URL = "http://localhost:8002"
+GATEWAY_URL = "http://localhost:8000"
+
+async def verify_reload():
+ async with httpx.AsyncClient() as client:
+ print(f"1. Triggering reload at {RETRIEVAL_URL}/api/v1/reload...")
+ try:
+ response = await client.post(f"{RETRIEVAL_URL}/api/v1/reload")
+ if response.status_code == 200:
+ print("Reload successful!")
+ print(json.dumps(response.json(), indent=2))
+ else:
+ print(f"Reload failed: {response.status_code} - {response.text}")
+ return
+ except Exception as e:
+ print(f"Failed to connect to retrieval service: {e}")
+ return
+
+ print("\n2. Testing Product Search...")
+ try:
+ response = await client.post(
+ f"{GATEWAY_URL}/api/v1/search",
+ json={"query": "product", "limit": 5}
+ )
+ if response.status_code == 200:
+ data = response.json()
+ results = data.get("results", [])
+ print(f"Found {len(results)} products.")
+ if results:
+ print("First product:", results[0]['name'])
+ print("SUCCESS: Products are searchable!")
+ else:
+ print("FAILURE: No products found after reload.")
+ else:
+ print(f"Search failed: {response.status_code} - {response.text}")
+ except Exception as e:
+ print(f"Failed to connect to gateway: {e}")
+
+if __name__ == "__main__":
+ asyncio.run(verify_reload())
diff --git a/sample_solutions/HybridSearch/scripts/generate_ecommerce_dataset.py b/sample_solutions/HybridSearch/scripts/generate_ecommerce_dataset.py
new file mode 100644
index 00000000..d0737637
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/generate_ecommerce_dataset.py
@@ -0,0 +1,160 @@
+"""
+Generate a realistic e-commerce product dataset with images
+"""
+import csv
+import random
+
+# Product templates organized by category
+PRODUCTS = {
+ "Electronics": [
+ {"name": "Wireless Bluetooth Headphones", "desc": "Premium noise-canceling headphones with 30-hour battery", "price_range": (49.99, 199.99), "brand": "SoundTech"},
+ {"name": "Smartphone 5G", "desc": "Latest 5G smartphone with 6.5\" display and triple camera", "price_range": (399.99, 1299.99), "brand": "TechPro"},
+ {"name": "Laptop 15.6 inch", "desc": "Powerful laptop with Intel i7, 16GB RAM, 512GB SSD", "price_range": (699.99, 1899.99), "brand": "CompuMax"},
+ {"name": "Wireless Mouse", "desc": "Ergonomic wireless mouse with adjustable DPI", "price_range": (19.99, 79.99), "brand": "TechMouse"},
+ {"name": "Mechanical Keyboard", "desc": "RGB backlit mechanical gaming keyboard", "price_range": (59.99, 199.99), "brand": "KeyMaster"},
+ {"name": "USB-C Hub", "desc": "7-in-1 USB-C hub with HDMI, USB 3.0, SD card reader", "price_range": (29.99, 89.99), "brand": "ConnectPro"},
+ {"name": "Wireless Earbuds", "desc": "True wireless earbuds with active noise cancellation", "price_range": (49.99, 299.99), "brand": "AudioMax"},
+ {"name": "Smart Watch", "desc": "Fitness tracker smart watch with heart rate monitor", "price_range": (99.99, 499.99), "brand": "FitWatch"},
+ {"name": "Portable Charger", "desc": "20000mAh power bank with fast charging", "price_range": (24.99, 79.99), "brand": "PowerBoost"},
+ {"name": "Webcam HD", "desc": "1080p HD webcam with built-in microphone", "price_range": (39.99, 149.99), "brand": "CamPro"},
+ {"name": "External SSD", "desc": "1TB portable external SSD with USB 3.2", "price_range": (89.99, 299.99), "brand": "StorageMax"},
+ {"name": "Monitor 27 inch", "desc": "4K UHD monitor with HDR and 144Hz refresh rate", "price_range": (299.99, 799.99), "brand": "ViewPro"},
+ {"name": "Laptop Backpack", "desc": "Water-resistant laptop backpack with USB charging port", "price_range": (29.99, 89.99), "brand": "TechPack"},
+ {"name": "Wireless Charger", "desc": "Fast wireless charging pad for smartphones", "price_range": (19.99, 59.99), "brand": "ChargeFast"},
+ {"name": "Bluetooth Speaker", "desc": "Portable waterproof Bluetooth speaker", "price_range": (39.99, 199.99), "brand": "SoundWave"},
+ ],
+ "Home & Kitchen": [
+ {"name": "Coffee Maker", "desc": "Programmable drip coffee maker with thermal carafe", "price_range": (39.99, 199.99), "brand": "BrewMaster"},
+ {"name": "Blender", "desc": "High-speed blender with multiple settings", "price_range": (49.99, 299.99), "brand": "BlendPro"},
+ {"name": "Air Fryer", "desc": "6-quart digital air fryer with preset functions", "price_range": (59.99, 199.99), "brand": "CrispyChef"},
+ {"name": "Knife Set", "desc": "Professional 15-piece stainless steel knife set", "price_range": (79.99, 299.99), "brand": "ChefPro"},
+ {"name": "Vacuum Cleaner", "desc": "Cordless stick vacuum with HEPA filter", "price_range": (149.99, 599.99), "brand": "CleanMaster"},
+ {"name": "Water Bottle", "desc": "Insulated stainless steel water bottle", "price_range": (19.99, 49.99), "brand": "HydroFlask"},
+ {"name": "Cookware Set", "desc": "Non-stick 10-piece cookware set", "price_range": (99.99, 399.99), "brand": "CookPro"},
+ {"name": "Food Processor", "desc": "12-cup food processor with multiple blades", "price_range": (79.99, 299.99), "brand": "ChopMaster"},
+ {"name": "Toaster Oven", "desc": "6-slice convection toaster oven", "price_range": (49.99, 199.99), "brand": "ToastPro"},
+ {"name": "Electric Kettle", "desc": "1.7L electric kettle with temperature control", "price_range": (29.99, 99.99), "brand": "BoilFast"},
+ {"name": "Mixer Stand", "desc": "6-speed stand mixer with stainless steel bowl", "price_range": (149.99, 499.99), "brand": "MixMaster"},
+ {"name": "Cutting Board Set", "desc": "Bamboo cutting board set of 3", "price_range": (24.99, 79.99), "brand": "ChopBoard"},
+ {"name": "Storage Containers", "desc": "Glass food storage containers set of 10", "price_range": (29.99, 89.99), "brand": "StoreFresh"},
+ {"name": "Dish Rack", "desc": "Stainless steel dish drying rack", "price_range": (24.99, 79.99), "brand": "DryWell"},
+ ],
+ "Sports & Outdoors": [
+ {"name": "Yoga Mat", "desc": "Extra thick 6mm yoga mat with carrying strap", "price_range": (19.99, 79.99), "brand": "FitLife"},
+ {"name": "Resistance Bands", "desc": "5-piece resistance band set for home workouts", "price_range": (14.99, 49.99), "brand": "FitGear"},
+ {"name": "Dumbbells Set", "desc": "Adjustable dumbbell set 5-50 lbs", "price_range": (99.99, 399.99), "brand": "IronFit"},
+ {"name": "Jump Rope", "desc": "Speed jump rope with adjustable length", "price_range": (9.99, 29.99), "brand": "FitJump"},
+ {"name": "Camping Tent", "desc": "4-person waterproof camping tent", "price_range": (79.99, 299.99), "brand": "OutdoorPro"},
+ {"name": "Sleeping Bag", "desc": "Lightweight sleeping bag for camping", "price_range": (39.99, 149.99), "brand": "SleepWell"},
+ {"name": "Hiking Backpack", "desc": "50L hiking backpack with rain cover", "price_range": (59.99, 199.99), "brand": "TrailMaster"},
+ {"name": "Water Filter", "desc": "Portable water filter for camping", "price_range": (24.99, 79.99), "brand": "PureWater"},
+ {"name": "Bike Helmet", "desc": "Adjustable bike helmet with LED light", "price_range": (29.99, 99.99), "brand": "SafeRide"},
+ {"name": "Tennis Racket", "desc": "Professional tennis racket with case", "price_range": (49.99, 249.99), "brand": "GamePro"},
+ {"name": "Soccer Ball", "desc": "Official size 5 soccer ball", "price_range": (19.99, 59.99), "brand": "KickMaster"},
+ {"name": "Swim Goggles", "desc": "Anti-fog swim goggles with UV protection", "price_range": (14.99, 49.99), "brand": "SwimPro"},
+ ],
+ "Clothing & Shoes": [
+ {"name": "Running Shoes", "desc": "Lightweight breathable running shoes", "price_range": (59.99, 179.99), "brand": "RunFast"},
+ {"name": "Athletic Shorts", "desc": "Quick-dry athletic shorts with pockets", "price_range": (19.99, 49.99), "brand": "FitWear"},
+ {"name": "T-Shirt Pack", "desc": "Pack of 3 performance t-shirts", "price_range": (24.99, 79.99), "brand": "ComfortFit"},
+ {"name": "Hoodie", "desc": "Fleece pullover hoodie with pockets", "price_range": (29.99, 89.99), "brand": "CozyWear"},
+ {"name": "Jeans", "desc": "Slim fit stretch denim jeans", "price_range": (39.99, 129.99), "brand": "DenimPro"},
+ {"name": "Sneakers", "desc": "Casual sneakers with memory foam insole", "price_range": (49.99, 149.99), "brand": "StepComfort"},
+ {"name": "Winter Jacket", "desc": "Waterproof winter jacket with hood", "price_range": (79.99, 299.99), "brand": "WarmGuard"},
+ {"name": "Baseball Cap", "desc": "Adjustable baseball cap with logo", "price_range": (14.99, 39.99), "brand": "CapPro"},
+ {"name": "Socks Pack", "desc": "Pack of 6 athletic socks", "price_range": (14.99, 34.99), "brand": "ComfortSocks"},
+ {"name": "Backpack", "desc": "School/work backpack with laptop compartment", "price_range": (29.99, 99.99), "brand": "PackPro"},
+ ],
+ "Books & Media": [
+ {"name": "Fiction Novel", "desc": "Bestselling fiction novel paperback", "price_range": (9.99, 29.99), "brand": "ReadWell"},
+ {"name": "Self-Help Book", "desc": "Personal development and productivity book", "price_range": (12.99, 34.99), "brand": "GrowMind"},
+ {"name": "Cookbook", "desc": "Healthy cooking recipes cookbook", "price_range": (14.99, 39.99), "brand": "ChefBook"},
+ {"name": "Journal", "desc": "Leather-bound journal with lined pages", "price_range": (12.99, 39.99), "brand": "WriteWell"},
+ {"name": "Coloring Book", "desc": "Adult coloring book for relaxation", "price_range": (9.99, 24.99), "brand": "ColorJoy"},
+ {"name": "Board Game", "desc": "Family board game for 2-6 players", "price_range": (19.99, 79.99), "brand": "GameNight"},
+ {"name": "Puzzle 1000pc", "desc": "1000-piece jigsaw puzzle", "price_range": (14.99, 39.99), "brand": "PuzzleMaster"},
+ ],
+ "Beauty & Personal Care": [
+ {"name": "Electric Toothbrush", "desc": "Rechargeable electric toothbrush with timer", "price_range": (29.99, 149.99), "brand": "SmilePro"},
+ {"name": "Hair Dryer", "desc": "Ionic hair dryer with diffuser", "price_range": (39.99, 149.99), "brand": "StylePro"},
+ {"name": "Moisturizer", "desc": "Daily facial moisturizer with SPF", "price_range": (14.99, 49.99), "brand": "GlowCare"},
+ {"name": "Shampoo Set", "desc": "Shampoo and conditioner set", "price_range": (19.99, 59.99), "brand": "HairCare"},
+ {"name": "Perfume", "desc": "Luxury eau de parfum spray", "price_range": (39.99, 199.99), "brand": "Essence"},
+ {"name": "Makeup Brush Set", "desc": "Professional makeup brush set of 12", "price_range": (24.99, 89.99), "brand": "BeautyPro"},
+ {"name": "Face Mask Set", "desc": "Variety pack of sheet face masks", "price_range": (14.99, 39.99), "brand": "SkinCare"},
+ {"name": "Electric Shaver", "desc": "Cordless electric shaver for men", "price_range": (49.99, 199.99), "brand": "ShaveMaster"},
+ ],
+ "Toys & Games": [
+ {"name": "Building Blocks", "desc": "Creative building blocks set 500 pieces", "price_range": (29.99, 99.99), "brand": "BuildIt"},
+ {"name": "RC Car", "desc": "Remote control racing car with rechargeable battery", "price_range": (39.99, 149.99), "brand": "SpeedRacer"},
+ {"name": "Doll House", "desc": "Wooden doll house with furniture", "price_range": (49.99, 199.99), "brand": "PlayHome"},
+ {"name": "Action Figure", "desc": "Collectible action figure with accessories", "price_range": (14.99, 49.99), "brand": "HeroToys"},
+ {"name": "Art Supplies", "desc": "Complete art supplies set for kids", "price_range": (24.99, 79.99), "brand": "ArtKids"},
+ {"name": "Science Kit", "desc": "Educational science experiment kit", "price_range": (29.99, 89.99), "brand": "LearnScience"},
+ ]
+}
+
+# Placeholder image service (using placeholder.com for realistic URLs)
+def get_image_url(product_index, category):
+ """Generate placeholder image URL"""
+ colors = ["3498db", "e74c3c", "2ecc71", "f39c12", "9b59b6", "1abc9c"]
+ color = colors[product_index % len(colors)]
+ cat_short = category.replace(" & ", "-").replace(" ", "-").lower()
+ return f"https://via.placeholder.com/400x400/{color}/ffffff?text={cat_short}"
+
+def generate_dataset(output_file, num_products_per_category=10):
+ """Generate e-commerce dataset"""
+ products = []
+ product_id = 1
+
+ for category, product_templates in PRODUCTS.items():
+ for i in range(num_products_per_category):
+ # Select a product template and create variation
+ template = product_templates[i % len(product_templates)]
+
+ # Add variation to name if repeating
+ variation_suffix = ""
+ if i >= len(product_templates):
+ variations = ["Pro", "Plus", "Max", "Ultra", "Premium", "Deluxe", "Elite"]
+ variation_suffix = f" {variations[i % len(variations)]}"
+
+ # Generate price within range
+ price = round(random.uniform(*template["price_range"]), 2)
+
+ # Generate rating (skewed toward 4-5 stars)
+ rating = round(random.uniform(3.5, 5.0), 1)
+
+ # Generate review count (higher rated products have more reviews)
+ review_count = int(random.uniform(50, 2000) * (rating / 5.0))
+
+ product = {
+ "id": f"prod_{product_id:03d}",
+ "name": template["name"] + variation_suffix,
+ "description": template["desc"],
+ "category": category,
+ "price": price,
+ "rating": rating,
+ "review_count": review_count,
+ "image_url": get_image_url(product_id, category),
+ "brand": template["brand"]
+ }
+
+ products.append(product)
+ product_id += 1
+
+ # Write to CSV
+ with open(output_file, 'w', newline='', encoding='utf-8') as f:
+ writer = csv.DictWriter(f, fieldnames=['id', 'name', 'description', 'category', 'price', 'rating', 'review_count', 'image_url', 'brand'])
+ writer.writeheader()
+ writer.writerows(products)
+
+ print(f"✅ Generated {len(products)} products across {len(PRODUCTS)} categories")
+ print(f"📁 Saved to: {output_file}")
+ print(f"💰 Price range: ${min(p['price'] for p in products):.2f} - ${max(p['price'] for p in products):.2f}")
+ print(f"⭐ Rating range: {min(p['rating'] for p in products):.1f} - {max(p['rating'] for p in products):.1f}")
+ print(f"📦 Categories: {', '.join(PRODUCTS.keys())}")
+
+if __name__ == "__main__":
+ output_file = "../data/test_datasets/ecommerce_products.csv"
+ generate_dataset(output_file, num_products_per_category=10)
+
diff --git a/sample_solutions/HybridSearch/scripts/prepare_product_dataset.py b/sample_solutions/HybridSearch/scripts/prepare_product_dataset.py
new file mode 100644
index 00000000..5c5a97a5
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/prepare_product_dataset.py
@@ -0,0 +1,186 @@
+"""
+Prepare Product Dataset
+Download and prepare Amazon products dataset from HuggingFace
+"""
+
+import logging
+import pandas as pd
+import json
+from pathlib import Path
+from typing import List, Dict
+import sys
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+try:
+ from datasets import load_dataset
+except ImportError:
+ logger.error("datasets library not found. Install with: pip install datasets")
+ sys.exit(1)
+
+
+def download_dataset():
+ """
+ Download Amazon products dataset from HuggingFace
+
+ Returns:
+ Dataset object
+ """
+ logger.info("Downloading Amazon products dataset from HuggingFace...")
+ try:
+ dataset = load_dataset("ckandemir/amazon-products", split="train", revision="main") # nosec B615
+ logger.info(f"Downloaded dataset with {len(dataset)} products")
+ return dataset
+ except Exception as e:
+ logger.error(f"Error downloading dataset: {e}")
+ raise
+
+
+def clean_product(product: Dict) -> Dict:
+ """
+ Clean and normalize a product
+
+ Args:
+ product: Raw product dictionary
+
+ Returns:
+ Cleaned product dictionary
+ """
+ cleaned = {}
+
+ # Map fields (adjust based on actual dataset structure)
+ cleaned['name'] = product.get('title') or product.get('name') or product.get('product_name', '')
+ cleaned['description'] = product.get('description') or product.get('desc') or product.get('details', '')
+ cleaned['category'] = product.get('category') or product.get('categories', '')
+ cleaned['price'] = product.get('price') or product.get('cost') or product.get('list_price')
+ cleaned['rating'] = product.get('rating') or product.get('stars') or product.get('avg_rating')
+ cleaned['review_count'] = product.get('review_count') or product.get('reviews') or product.get('num_reviews')
+ cleaned['image_url'] = product.get('image_url') or product.get('image') or product.get('img')
+ cleaned['brand'] = product.get('brand') or product.get('manufacturer')
+
+ # Generate ID if missing
+ if not product.get('id') and not product.get('product_id'):
+ import uuid
+ cleaned['id'] = f"prod_{uuid.uuid4().hex[:12]}"
+ else:
+ cleaned['id'] = product.get('id') or product.get('product_id')
+
+ # Clean price
+ if cleaned['price']:
+ try:
+ if isinstance(cleaned['price'], str):
+ # Remove currency symbols
+ price_str = cleaned['price'].replace('$', '').replace(',', '').strip()
+ cleaned['price'] = float(price_str) if price_str else None
+ else:
+ cleaned['price'] = float(cleaned['price'])
+ except (ValueError, TypeError):
+ cleaned['price'] = None
+
+ # Clean rating (normalize to 0-5)
+ if cleaned['rating']:
+ try:
+ rating = float(cleaned['rating'])
+ if rating > 5:
+ rating = rating / 2.0 # Assume out of 10
+ cleaned['rating'] = rating if 0 <= rating <= 5 else None
+ except (ValueError, TypeError):
+ cleaned['rating'] = None
+
+ # Clean review count
+ if cleaned['review_count']:
+ try:
+ cleaned['review_count'] = int(cleaned['review_count'])
+ except (ValueError, TypeError):
+ cleaned['review_count'] = None
+
+ # Ensure name is not empty
+ if not cleaned['name']:
+ cleaned['name'] = f"Product {cleaned['id']}"
+
+ # Ensure description is not empty (use name as fallback)
+ if not cleaned['description']:
+ cleaned['description'] = cleaned['name']
+
+ return cleaned
+
+
+def create_test_subsets(dataset, output_dir: Path):
+ """
+ Create test subsets from dataset
+
+ Args:
+ dataset: Dataset object
+ output_dir: Output directory
+ """
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Convert to list of dictionaries
+ logger.info("Converting dataset to list...")
+ products = []
+ for item in dataset:
+ cleaned = clean_product(item)
+ # Only include products with name and description
+ if cleaned['name'] and cleaned['description']:
+ products.append(cleaned)
+
+ logger.info(f"Cleaned {len(products)} valid products")
+
+ # Create subsets
+ subsets = {
+ 'test_100.csv': 100,
+ 'test_1000.csv': 1000,
+ 'test_10000.csv': 10000
+ }
+
+ for filename, count in subsets.items():
+ if len(products) >= count:
+ subset = products[:count]
+ output_path = output_dir / filename
+
+ # Save as CSV
+ df = pd.DataFrame(subset)
+ df.to_csv(output_path, index=False)
+ logger.info(f"Created {filename} with {len(subset)} products")
+ else:
+ logger.warning(f"Not enough products for {filename} (have {len(products)}, need {count})")
+
+ # Also save full dataset if requested
+ if len(products) > 0:
+ full_path = output_dir / "full_dataset.csv"
+ df = pd.DataFrame(products)
+ df.to_csv(full_path, index=False)
+ logger.info(f"Created full_dataset.csv with {len(products)} products")
+
+
+def main():
+ """Main function"""
+ # Set output directory
+ script_dir = Path(__file__).parent
+ project_root = script_dir.parent
+ output_dir = project_root / "data" / "test_datasets"
+
+ logger.info(f"Output directory: {output_dir}")
+
+ try:
+ # Download dataset
+ dataset = download_dataset()
+
+ # Create test subsets
+ create_test_subsets(dataset, output_dir)
+
+ logger.info("Dataset preparation complete!")
+ logger.info(f"Test datasets saved to: {output_dir}")
+
+ except Exception as e:
+ logger.error(f"Error preparing dataset: {e}", exc_info=True)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/sample_solutions/HybridSearch/scripts/test_product_upload.py b/sample_solutions/HybridSearch/scripts/test_product_upload.py
new file mode 100644
index 00000000..88be9bcb
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/test_product_upload.py
@@ -0,0 +1,70 @@
+import asyncio
+import httpx
+import json
+import os
+
+# Configuration
+API_URL = "http://localhost:8004" # Ingestion service
+
+async def verify_upload_fix():
+ async with httpx.AsyncClient(timeout=60.0) as client:
+ print("1. Clearing product catalog...")
+ response = await client.delete(f"{API_URL}/api/v1/products/clear")
+ if response.status_code != 200:
+ print(f"Failed to clear products: {response.text}")
+ return
+ print("Products cleared.")
+
+ print("\n2. Uploading Product Catalog (should auto-process)...")
+ # Create a dummy CSV file with standard headers
+ with open("test_products.csv", "w") as f:
+ f.write("id,name,description,category,price,rating,review_count,image_url,brand\n")
+ f.write("1,Test Product,A test product.,Test Category,10.00,4.5,10,http://example.com/image.jpg,Test Brand\n")
+
+ files = {'file': ('test_products.csv', open('test_products.csv', 'rb'), 'text/csv')}
+ response = await client.post(f"{API_URL}/api/v1/products/upload", files=files)
+
+ if response.status_code != 202:
+ print(f"Failed to upload products: {response.text}")
+ return
+
+ data = response.json()
+ job_id = data['job_id']
+ status = data['status']
+ requires_confirmation = data['requires_confirmation']
+
+ print(f"Upload response status: {status}")
+ print(f"Requires confirmation: {requires_confirmation}")
+
+ if requires_confirmation:
+ print("FAILURE: Upload still requires confirmation for standard headers!")
+ return
+
+ if status != "processing":
+ print(f"FAILURE: Status should be 'processing', got '{status}'")
+ return
+
+ print(f"Job started: {job_id}")
+
+ # Poll for completion
+ print("Waiting for processing...")
+ for _ in range(10):
+ await asyncio.sleep(1)
+ response = await client.get(f"{API_URL}/api/v1/products/status/{job_id}")
+ job_status = response.json()
+ print(f"Job status: {job_status['status']} ({job_status['products_processed']}/{job_status['products_total']})")
+
+ if job_status['status'] == 'complete':
+ print("SUCCESS: Product upload auto-processed successfully!")
+ break
+ if job_status['status'] == 'error':
+ print(f"FAILURE: Job failed with error: {job_status['errors']}")
+ break
+ else:
+ print("FAILURE: Timeout waiting for processing")
+
+ # Cleanup
+ os.remove("test_products.csv")
+
+if __name__ == "__main__":
+ asyncio.run(verify_upload_fix())
diff --git a/sample_solutions/HybridSearch/scripts/verify_separation.py b/sample_solutions/HybridSearch/scripts/verify_separation.py
new file mode 100644
index 00000000..c0ba6068
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/verify_separation.py
@@ -0,0 +1,125 @@
+import asyncio
+import httpx
+import json
+import os
+
+# Configuration
+API_URL = "http://localhost:8004" # Ingestion service
+RETRIEVAL_URL = "http://localhost:8002" # Retrieval service
+
+async def verify_separation():
+ async with httpx.AsyncClient(timeout=60.0) as client:
+ print("1. Clearing all indexes...")
+ response = await client.delete(f"{API_URL}/api/v1/documents/clear-all")
+ if response.status_code != 200:
+ print(f"Failed to clear indexes: {response.text}")
+ return
+ print("Indexes cleared.")
+
+ print("\n2. Uploading Document (California Drivers License)...")
+ # Create a dummy PDF file
+ with open("drivers_license.txt", "w") as f:
+ f.write("The California Drivers License Handbook covers rules of the road, traffic signs, and safe driving practices.")
+
+ files = {'file': ('drivers_license.txt', open('drivers_license.txt', 'rb'), 'text/plain')}
+ response = await client.post(f"{API_URL}/api/v1/documents/upload", files=files)
+ if response.status_code != 202:
+ print(f"Failed to upload document: {response.text}")
+ return
+ doc_id = response.json()['document_id']
+ print(f"Document uploaded: {doc_id}")
+
+ # Wait for processing
+ print("Waiting for document processing...")
+ await asyncio.sleep(5)
+
+ print("\n3. Uploading Product Catalog (Shoes)...")
+ # Create a dummy CSV file
+ with open("products.csv", "w") as f:
+ f.write("id,name,description,category,price\n")
+ f.write("1,Running Shoes,High performance running shoes for athletes.,Footwear,99.99\n")
+ f.write("2,Hiking Boots,Durable boots for rough terrain.,Footwear,129.99\n")
+
+ files = {'file': ('products.csv', open('products.csv', 'rb'), 'text/csv')}
+ response = await client.post(f"{API_URL}/api/v1/products/upload", files=files)
+ if response.status_code != 202:
+ print(f"Failed to upload products: {response.text}")
+ return
+ job_data = response.json()
+ job_id = job_data['job_id']
+ print(f"Product upload job started: {job_id}")
+
+ # Confirm mapping
+ mapping = {
+ "name": "Product Catalog",
+ "id_field": "id",
+ "name_field": "name",
+ "description_field": "description",
+ "category_field": "category",
+ "price_field": "price"
+ }
+
+ response = await client.post(
+ f"{API_URL}/api/v1/products/confirm",
+ data={
+ "job_id": job_id,
+ "field_mapping": json.dumps(mapping)
+ }
+ )
+ if response.status_code != 202:
+ print(f"Failed to confirm mapping: {response.text}")
+ return
+ print("Product mapping confirmed.")
+
+ # Wait for processing
+ print("Waiting for product processing...")
+ await asyncio.sleep(5)
+
+ # Reload indexes
+ print("\n4. Reloading indexes...")
+ await client.post(f"{RETRIEVAL_URL}/api/v1/reload")
+
+ print("\n5. Verifying Document Search (Query: 'shoes')...")
+ # Should NOT find products
+ response = await client.post(
+ f"{RETRIEVAL_URL}/api/v1/retrieve/hybrid",
+ json={
+ "query": "shoes",
+ "top_k_candidates": 10,
+ "top_k_fusion": 5,
+ "top_k_final": 5
+ }
+ )
+ results = response.json()['results']
+ print(f"Found {len(results)} results.")
+ for res in results:
+ print(f" - {res.get('text', '')[:50]}... (Source: {res.get('metadata', {}).get('filename', 'Unknown')})")
+ if "Running Shoes" in res.get('text', ''):
+ print("FAILURE: Product found in document search!")
+ return
+
+ print("\n6. Verifying Product Search (Query: 'license')...")
+ # Should NOT find documents
+ response = await client.post(
+ f"{RETRIEVAL_URL}/api/v1/search/products",
+ json={
+ "query_text": "license",
+ "top_k": 5
+ }
+ )
+ results = response.json()['results']
+ print(f"Found {len(results)} results.")
+ for res in results:
+ print(f" - {res.get('name', '')}: {res.get('description', '')[:50]}...")
+ if "California Drivers License" in res.get('description', ''):
+ print("FAILURE: Document found in product search!")
+ return
+
+ print("\nSUCCESS: Contexts are properly separated!")
+
+ # Cleanup
+ os.remove("drivers_license.txt")
+ os.remove("products.csv")
+
+if __name__ == "__main__":
+ asyncio.run(verify_separation())
diff --git a/sample_solutions/HybridSearch/scripts/verify_setup.sh b/sample_solutions/HybridSearch/scripts/verify_setup.sh
new file mode 100755
index 00000000..d6e76967
--- /dev/null
+++ b/sample_solutions/HybridSearch/scripts/verify_setup.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+# Hybrid Search RAG - Setup Verification Script
+# This script verifies that the project structure is complete
+
+set -e
+
+echo "======================================"
+echo "Hybrid Search RAG - Setup Verification"
+echo "======================================"
+echo ""
+
+# Color codes
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Function to check if directory exists
+check_dir() {
+ if [ -d "$1" ]; then
+ echo -e "${GREEN}✓${NC} Directory exists: $1"
+ return 0
+ else
+ echo -e "${RED}✗${NC} Directory missing: $1"
+ return 1
+ fi
+}
+
+# Function to check if file exists
+check_file() {
+ if [ -f "$1" ]; then
+ echo -e "${GREEN}✓${NC} File exists: $1"
+ return 0
+ else
+ echo -e "${RED}✗${NC} File missing: $1"
+ return 1
+ fi
+}
+
+echo "Checking project structure..."
+echo ""
+
+# Check main directories
+echo "Main Directories:"
+check_dir "api"
+check_dir "ui"
+check_dir "data"
+check_dir "tests"
+check_dir "scripts"
+echo ""
+
+# Check API services
+echo "API Services:"
+check_dir "api/gateway"
+check_dir "api/embedding"
+check_dir "api/retrieval"
+check_dir "api/llm"
+check_dir "api/ingestion"
+echo ""
+
+# Check configuration files
+echo "Configuration Files:"
+check_file "env.example"
+check_file "docker-compose.yml"
+check_file ".gitignore"
+check_file "README.md"
+check_file "IMPLEMENTATION_PLAN.md"
+check_file "SETUP_SUMMARY.md"
+check_file "architecture.md"
+echo ""
+
+# Check requirements files
+echo "Requirements Files:"
+check_file "api/gateway/requirements.txt"
+check_file "api/embedding/requirements.txt"
+check_file "api/retrieval/requirements.txt"
+check_file "api/llm/requirements.txt"
+check_file "api/ingestion/requirements.txt"
+check_file "ui/requirements.txt"
+echo ""
+
+# Check subdirectories
+echo "Service Subdirectories:"
+check_dir "api/gateway/routers"
+check_dir "api/gateway/services"
+check_dir "api/retrieval/services"
+check_dir "api/llm/models"
+check_dir "api/llm/prompts"
+check_dir "api/ingestion/services"
+check_dir "ui/pages"
+check_dir "ui/components"
+echo ""
+
+# Check data directories
+echo "Data Directories:"
+check_dir "data/documents"
+check_dir "data/indexes"
+check_file "data/documents/.gitkeep"
+check_file "data/indexes/.gitkeep"
+echo ""
+
+# Check for .env file
+echo "Environment Configuration:"
+if [ -f ".env" ]; then
+ echo -e "${GREEN}✓${NC} .env file exists"
+
+ # Check if OpenAI API key is set
+ if grep -q "OPENAI_API_KEY=sk-" .env 2>/dev/null; then
+ echo -e "${GREEN}✓${NC} OpenAI API key is configured"
+ elif grep -q "OPENAI_API_KEY=your-openai-api-key-here" .env 2>/dev/null; then
+ echo -e "${YELLOW}!${NC} OpenAI API key needs to be updated"
+ else
+ echo -e "${YELLOW}!${NC} OpenAI API key not found in .env"
+ fi
+else
+ echo -e "${YELLOW}!${NC} .env file not found (copy from env.example)"
+fi
+echo ""
+
+# Summary
+echo "======================================"
+echo "Verification Complete!"
+echo "======================================"
+echo ""
+echo "Next Steps:"
+echo "1. Copy env.example to .env: cp env.example .env"
+echo "2. Add your OpenAI API key to .env"
+echo "3. Review SETUP_SUMMARY.md for implementation roadmap"
+echo "4. Start implementing services in this order:"
+echo " a. Embedding Service"
+echo " b. LLM Service"
+echo " c. Document Ingestion Service"
+echo " d. Retrieval Service"
+echo " e. Gateway Service"
+echo " f. UI Service"
+echo ""
+echo "See SETUP_SUMMARY.md for detailed implementation guide"
+echo ""
+
diff --git a/sample_solutions/HybridSearch/ui/.streamlit/config.toml b/sample_solutions/HybridSearch/ui/.streamlit/config.toml
new file mode 100644
index 00000000..42f97628
--- /dev/null
+++ b/sample_solutions/HybridSearch/ui/.streamlit/config.toml
@@ -0,0 +1,15 @@
+[server]
+port = 8501
+headless = true
+enableCORS = false
+enableXsrfProtection = true
+
+[browser]
+gatherUsageStats = false
+
+[theme]
+primaryColor = "#667eea"
+backgroundColor = "#ffffff"
+secondaryBackgroundColor = "#f0f2f6"
+textColor = "#262730"
+font = "sans serif"
diff --git a/sample_solutions/HybridSearch/ui/Dockerfile b/sample_solutions/HybridSearch/ui/Dockerfile
new file mode 100644
index 00000000..bdcabb20
--- /dev/null
+++ b/sample_solutions/HybridSearch/ui/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application and create non-root user
+COPY config.py .
+COPY app.py .
+RUN useradd -m -u 1000 appuser && \
+ chown -R appuser:appuser /app
+USER appuser
+
+# Expose Streamlit port
+EXPOSE 8501
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+ CMD curl -f http://localhost:8501/_stcore/health || exit 1
+
+# Run Streamlit
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
+
diff --git a/sample_solutions/HybridSearch/ui/app.py b/sample_solutions/HybridSearch/ui/app.py
new file mode 100644
index 00000000..c98d8842
--- /dev/null
+++ b/sample_solutions/HybridSearch/ui/app.py
@@ -0,0 +1,1610 @@
+"""
+Streamlit UI for InsightMapper Lite - Hybrid Search RAG Application
+Simplified Chat Interface with Document Upload
+"""
+import streamlit as st
+import os
+import httpx
+import logging
+import re
+import time
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+import json
+from config import settings
+from streamlit_keycloak import login
+
+# Configure logging
+logging.basicConfig(
+ level=getattr(logging, settings.log_level),
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Page configuration
+st.set_page_config(
+ page_title="RAG Chatbot",
+ page_icon="💬",
+ layout="wide",
+ initial_sidebar_state="collapsed"
+)
+
+# Custom CSS for simplified chat interface
+st.markdown("""
+
+""", unsafe_allow_html=True)
+
+
+class UIService:
+ """
+ Handle UI operations and API calls.
+
+ Manages communication with the backend services (Gateway, Ingestion, Retrieval)
+ via HTTP requests. Handles authentication, file uploads, query submission,
+ and status polling.
+ """
+
+ def __init__(self):
+ """
+ Initialize UI Service.
+
+ Sets up API endpoints from environment variables and initializes the HTTP client.
+ """
+ import os
+ gateway_host = os.getenv("GATEWAY_SERVICE_URL", settings.gateway_service_url)
+ ingestion_host = os.getenv("INGESTION_SERVICE_URL", "http://localhost:8004")
+ retrieval_host = os.getenv("RETRIEVAL_SERVICE_URL", "http://localhost:8002")
+
+ self.gateway_url = gateway_host if gateway_host.startswith("http") else f"http://{gateway_host}"
+ self.ingestion_url = ingestion_host if ingestion_host.startswith("http") else f"http://{ingestion_host}"
+ self.retrieval_url = retrieval_host if retrieval_host.startswith("http") else f"http://{retrieval_host}"
+ self.llm_url = "http://localhost:8003"
+ self.client = httpx.Client(timeout=60.0)
+ self.token = None
+
+ def set_token(self, token: str):
+ """
+ Set authentication token for client headers.
+
+ Args:
+ token (str): JWT access token.
+ """
+ self.token = token
+ self.client.headers.update({"Authorization": f"Bearer {token}"})
+
+ def check_health(self) -> Dict[str, Any]:
+ """
+ Check health of all services.
+
+ Returns:
+ Dict[str, Any]: Health status of backend services.
+ """
+ try:
+ response = self.client.get(f"{self.gateway_url}/api/v1/health/services")
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Health check failed: {e}")
+ return {"status": "error", "message": str(e)}
+
+ def submit_query(self, query: str, include_debug: bool = False) -> Dict[str, Any]:
+ """
+ Submit a query to the RAG system.
+
+ Args:
+ query (str): The user's question.
+ include_debug (bool): Whether to request debug info in the response.
+
+ Returns:
+ Dict[str, Any]: Normalized response with answer, citations, and metadata.
+ """
+ try:
+ payload = {
+ "query": query,
+ "include_debug_info": include_debug
+ }
+ response = self.client.post(
+ f"{self.gateway_url}/api/v1/query",
+ json=payload
+ )
+ response.raise_for_status()
+ data = response.json()
+
+ # Normalize response fields to match UI expectations
+ normalized = {
+ "answer": data.get("answer", ""),
+ "citations": data.get("citations", []),
+ "query_type": data.get("query_complexity", data.get("query_type", "unknown")),
+ "model_used": data.get("llm_model", data.get("model_used", "unknown")),
+ "response_time_ms": data.get("processing_time_ms", data.get("response_time_ms", 0)),
+ "debug_info": data.get("debug_info"),
+ "retrieval_results_count": data.get("retrieval_results_count", 0)
+ }
+
+ return normalized
+ except httpx.HTTPStatusError as e:
+ logger.error(f"Query failed with status {e.response.status_code}: {e}")
+ return {
+ "error": True,
+ "message": f"Server error: {e.response.status_code}",
+ "detail": e.response.text
+ }
+ except Exception as e:
+ logger.error(f"Query failed: {e}")
+ return {"error": True, "message": str(e)}
+
+ def upload_document(self, file_data: bytes, filename: str) -> Dict[str, Any]:
+ """
+ Upload a document for indexing.
+
+ Args:
+ file_data (bytes): Raw file content.
+ filename (str): Name of the file.
+
+ Returns:
+ Dict[str, Any]: Upload result containing document_id or error info.
+ """
+ try:
+ # Verify ingestion service is accessible
+ logger.info(f"Uploading {filename} ({len(file_data)} bytes) to {self.ingestion_url}")
+
+ files = {"file": (filename, file_data, "application/octet-stream")}
+
+ # Use longer timeout for large files
+ timeout = 120.0 if len(file_data) > 10 * 1024 * 1024 else 60.0
+
+ with httpx.Client(timeout=timeout) as client:
+ headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
+ response = client.post(
+ f"{self.ingestion_url}/api/v1/documents/upload",
+ files=files,
+ headers=headers
+ )
+ logger.info(f"Upload response status: {response.status_code}")
+
+ if response.status_code != 200 and response.status_code != 202:
+ error_text = response.text[:500] if response.text else "No error details"
+ logger.error(f"Upload failed: {response.status_code} - {error_text}")
+ return {
+ "error": True,
+ "message": f"Server error '{response.status_code} {response.reason_phrase}' for url '{self.ingestion_url}/api/v1/documents/upload'",
+ "detail": error_text
+ }
+
+ response.raise_for_status()
+ return response.json()
+
+ except httpx.ConnectError as e:
+ logger.error(f"Connection error to {self.ingestion_url}: {e}")
+ return {
+ "error": True,
+ "message": f"Cannot connect to ingestion service at {self.ingestion_url}. Is the service running?",
+ "detail": str(e)
+ }
+ except httpx.TimeoutException as e:
+ logger.error(f"Timeout uploading to {self.ingestion_url}: {e}")
+ return {
+ "error": True,
+ "message": f"Upload timeout. The file may be too large or the service is slow.",
+ "detail": str(e)
+ }
+ except httpx.HTTPStatusError as e:
+ logger.error(f"HTTP error {e.response.status_code}: {e}")
+ error_text = e.response.text[:500] if e.response.text else str(e)
+ return {
+ "error": True,
+ "message": f"Server error '{e.response.status_code} {e.response.reason_phrase}' for url '{self.ingestion_url}/api/v1/documents/upload'",
+ "detail": error_text
+ }
+ except Exception as e:
+ logger.error(f"Document upload failed: {e}", exc_info=True)
+ return {
+ "error": True,
+ "message": f"Upload failed: {str(e)}",
+ "detail": str(e)
+ }
+
+ def get_document_status(self, doc_id: str) -> Dict[str, Any]:
+ """
+ Get status of an uploaded document.
+
+ Args:
+ doc_id (str): Document ID.
+
+ Returns:
+ Dict[str, Any]: Status info (processing_status, chunk_count, etc.).
+ """
+ try:
+ response = self.client.get(
+ f"{self.ingestion_url}/api/v1/documents/{doc_id}/status"
+ )
+ response.raise_for_status()
+ return response.json()
+ except httpx.ConnectError as e:
+ logger.error(f"Connection error to {self.ingestion_url}: {e}")
+ return {"error": True, "message": f"Cannot connect to ingestion service: {str(e)}"}
+ except Exception as e:
+ logger.error(f"Status check failed: {e}")
+ return {"error": True, "message": str(e)}
+
+ def poll_document_status(self, doc_id: str, max_wait_seconds: int = 120) -> Dict[str, Any]:
+ """
+ Poll document status until completion or timeout.
+
+ Args:
+ doc_id (str): Document ID.
+ max_wait_seconds (int): Maximum seconds to wait.
+
+ Returns:
+ Dict[str, Any]: Final status or timeout error.
+ """
+ start_time = time.time()
+ while (time.time() - start_time) < max_wait_seconds:
+ status = self.get_document_status(doc_id)
+ if "error" in status:
+ return status
+
+ processing_status = status.get("processing_status", "")
+ if processing_status in ["completed", "failed"]:
+ return status
+
+ time.sleep(2) # Poll every 2 seconds
+
+ return {"error": True, "message": "Timeout waiting for document processing"}
+
+ def clear_all_indexes(self) -> Dict[str, Any]:
+ """
+ Clear all vector indexes and metadata.
+
+ Returns:
+ Dict[str, Any]: Operation result.
+ """
+ try:
+ # Clear indexes in ingestion service
+ response = self.client.delete(f"{self.ingestion_url}/api/v1/documents/clear-all")
+ response.raise_for_status()
+ logger.info("Successfully cleared all indexes and metadata")
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to clear indexes: {e}")
+ return {"error": True, "message": str(e)}
+
+ def generate_document_summary(self, filename: str, preview_text: str = "") -> Dict[str, Any]:
+ """
+ Generate a summary of the uploaded document.
+
+ Constructs a query to ask the LLM for a summary of the provided text/document.
+
+ Args:
+ filename (str): Name of the file.
+ preview_text (str): Optional text content to aid summarization.
+
+ Returns:
+ Dict[str, Any]: Response containing the summary.
+ """
+ try:
+ # Create a simple query to summarize the document
+ query = f"Please provide a brief summary of the document '{filename}'"
+
+ # If we have preview text, use it as context
+ if preview_text:
+ response = self.submit_query(
+ f"Based on this document, provide a brief summary: {preview_text[:1000]}",
+ include_debug=False
+ )
+ else:
+ # Otherwise just make a general query
+ response = self.submit_query(
+ "What is this document about? Provide a brief overview.",
+ include_debug=False
+ )
+
+ return response
+ except Exception as e:
+ logger.error(f"Failed to generate summary: {e}")
+ return {
+ "answer": f"Document '{filename}' has been successfully uploaded and indexed. You can now start asking questions about it!",
+ "citations": []
+ }
+
+ # Product Catalog Methods
+ def get_system_mode(self) -> str:
+ """
+ Get current system mode.
+
+ Returns:
+ str: 'document' or 'product'.
+ """
+ try:
+ response = self.client.get(f"{self.ingestion_url}/api/v1/products/mode")
+ response.raise_for_status()
+ return response.json().get("mode", "document")
+ except Exception as e:
+ logger.error(f"Failed to get system mode: {e}")
+ return "document"
+
+ def set_system_mode(self, mode: str) -> Dict[str, Any]:
+ """
+ Switch system mode between document and product.
+
+ Args:
+ mode (str): Target mode ('document' or 'product').
+
+ Returns:
+ Dict[str, Any]: Operation result.
+ """
+ try:
+ response = self.client.post(
+ f"{self.ingestion_url}/api/v1/products/mode",
+ data={"mode": mode}
+ )
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to set system mode: {e}")
+ return {"error": True, "message": str(e)}
+
+ def upload_product_catalog(self, file_data: bytes, filename: str) -> Dict[str, Any]:
+ """
+ Upload product catalog CSV/JSON file.
+
+ Args:
+ file_data (bytes): Raw file content.
+ filename (str): Name of the file.
+
+ Returns:
+ Dict[str, Any]: Job info including job_id.
+ """
+ try:
+ files = {"file": (filename, file_data)}
+ response = self.client.post(
+ f"{self.ingestion_url}/api/v1/products/upload",
+ files=files
+ )
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to upload product catalog: {e}")
+ return {"error": True, "message": str(e)}
+
+ def confirm_product_mapping(self, job_id: str, catalog_name: str, field_mapping: Dict) -> Dict[str, Any]:
+ """
+ Confirm product field mapping and start processing.
+
+ Args:
+ job_id (str): Ingestion job ID.
+ catalog_name (str): Name for the catalog.
+ field_mapping (Dict): Mapping of file columns to standard product fields.
+
+ Returns:
+ Dict[str, Any]: Confirmation result.
+ """
+ try:
+ response = self.client.post(
+ f"{self.ingestion_url}/api/v1/products/confirm",
+ data={
+ "job_id": job_id,
+ "catalog_name": catalog_name,
+ "field_mapping": json.dumps(field_mapping)
+ }
+ )
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to confirm product mapping: {e}")
+ return {"error": True, "message": str(e)}
+
+ def get_product_ingestion_status(self, job_id: str) -> Dict[str, Any]:
+ """
+ Get product ingestion job status.
+
+ Args:
+ job_id (str): Job ID.
+
+ Returns:
+ Dict[str, Any]: Job status.
+ """
+ try:
+ response = self.client.get(
+ f"{self.ingestion_url}/api/v1/products/status/{job_id}"
+ )
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to get ingestion status: {e}")
+ return {"error": True, "message": str(e)}
+
+ def get_catalog_info(self) -> Dict[str, Any]:
+ """
+ Get current catalog information.
+
+ Returns:
+ Dict[str, Any]: Catalog statistics (product count, categories).
+ """
+ try:
+ response = self.client.get(f"{self.ingestion_url}/api/v1/products/catalog/info")
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to get catalog info: {e}")
+ return {"loaded": False, "message": str(e)}
+
+ def get_all_products(self, limit: int = 100) -> List[Dict[str, Any]]:
+ """
+ Get all products from catalog.
+
+ Args:
+ limit (int): Maximum number of products to return.
+
+ Returns:
+ List[Dict[str, Any]]: List of product dictionaries.
+ """
+ try:
+ # Use a generic query to get all products
+ response = self.client.post(
+ f"{self.gateway_url}/api/v1/search",
+ json={"query": "product", "limit": limit}
+ )
+ response.raise_for_status()
+ result = response.json()
+ return result.get("results", [])
+ except Exception as e:
+ logger.error(f"Failed to get products: {e}")
+ return []
+
+ def search_products(self, query: str, filters: Optional[Dict] = None, limit: int = 100) -> Dict[str, Any]:
+ """
+ Search products using natural language query.
+
+ Args:
+ query (str): Search query.
+ filters (Optional[Dict]): Filters to apply.
+ limit (int): Max results.
+
+ Returns:
+ Dict[str, Any]: Search results and interpreted filters.
+ """
+ try:
+ payload = {
+ "query": query,
+ "limit": limit
+ }
+ if filters:
+ payload["filters"] = filters
+
+ response = self.client.post(
+ f"{self.gateway_url}/api/v1/search",
+ json=payload
+ )
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Product search failed: {e}")
+ return {"error": True, "message": str(e), "results": []}
+
+ def clear_product_catalog(self) -> Dict[str, Any]:
+ """
+ Clear all products from catalog.
+
+ Returns:
+ Dict[str, Any]: Operation result.
+ """
+ try:
+ response = self.client.delete(f"{self.ingestion_url}/api/v1/products/clear")
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to clear product catalog: {e}")
+ return {"error": True, "message": str(e)}
+
+ def reload_retrieval_indexes(self) -> Dict[str, Any]:
+ """
+ Reload retrieval indexes.
+
+ Forces the retrieval service to reload indexes from disk.
+
+ Returns:
+ Dict[str, Any]: Operation result.
+ """
+ try:
+ logger.info(f"Reloading indexes at {self.retrieval_url}")
+ response = self.client.post(f"{self.retrieval_url}/api/v1/reload")
+ response.raise_for_status()
+ return response.json()
+ except Exception as e:
+ logger.error(f"Failed to reload indexes: {e}")
+ return {"error": True, "message": str(e)}
+
+
+def initialize_session_state():
+ """
+ Initialize session state variables.
+
+ Sets up default values for chat history, UI service, document status,
+ and product catalog state if they don't exist.
+ """
+ if "chat_history" not in st.session_state:
+ st.session_state.chat_history = []
+ if "ui_service" not in st.session_state:
+ st.session_state.ui_service = UIService()
+ if "current_document" not in st.session_state:
+ st.session_state.current_document = None
+ if "document_ready" not in st.session_state:
+ st.session_state.document_ready = False
+ if "active_citations" not in st.session_state:
+ st.session_state.active_citations = {}
+ if "upload_status" not in st.session_state:
+ st.session_state.upload_status = None
+ # Product catalog state
+ if "system_mode" not in st.session_state:
+ st.session_state.system_mode = "document"
+ if "catalog_loaded" not in st.session_state:
+ st.session_state.catalog_loaded = False
+ if "catalog_info" not in st.session_state:
+ st.session_state.catalog_info = None
+ if "product_upload_job" not in st.session_state:
+ st.session_state.product_upload_job = None
+ if "all_products" not in st.session_state:
+ st.session_state.all_products = []
+ if "filtered_products" not in st.session_state:
+ st.session_state.filtered_products = []
+ if "search_query" not in st.session_state:
+ st.session_state.search_query = ""
+ if "applied_filters" not in st.session_state:
+ st.session_state.applied_filters = {}
+
+
+def process_citations_in_text(answer: str, citations: List[Dict]) -> str:
+ """
+ Process citation markers in answer text.
+
+ Replaces citation markers (e.g., [Page X]) with styled HTML badges.
+
+ Args:
+ answer (str): Text response from LLM.
+ citations (List[Dict]): List of citation objects.
+
+ Returns:
+ str: HTML-formatted answer with styled citations.
+ """
+ # Find all citation patterns: [Page X], [Page X-Y], [Doc, Page X], or [X]
+ citation_pattern = r'\[(Page \d+(?:-\d+)?|[^\]]+, Page \d+|\d+)\]'
+
+ def replace_citation(match):
+ citation_text = match.group(1)
+ return f'[{citation_text}]'
+
+ # Replace all citations
+ processed_answer = re.sub(citation_pattern, replace_citation, answer)
+ return processed_answer
+
+
+def render_header():
+ """
+ Render page header with mode switcher.
+
+ Displays title and buttons to switch between 'RAG Chatbot' (Document)
+ and 'Product Catalog Search' modes.
+ """
+ col1, col2 = st.columns([4, 1])
+ with col1:
+ if st.session_state.system_mode == "product":
+ st.markdown('🛍️ Product Catalog Search
', unsafe_allow_html=True)
+ st.markdown('', unsafe_allow_html=True)
+ else:
+ st.markdown('💬 RAG Chatbot
', unsafe_allow_html=True)
+ st.markdown('', unsafe_allow_html=True)
+ with col2:
+ # Mode switcher
+ current_mode = st.session_state.system_mode
+ if current_mode == "document":
+ if st.button("🛍️ Switch to Products", use_container_width=True):
+ result = st.session_state.ui_service.set_system_mode("product")
+ if not result.get("error"):
+ st.session_state.system_mode = "product"
+ # Re-check catalog status instead of setting to False
+ st.session_state.catalog_info = None
+ st.session_state.all_products = []
+ st.session_state.filtered_products = []
+ st.rerun()
+ else:
+ if st.button("📄 Switch to Documents", use_container_width=True):
+ result = st.session_state.ui_service.set_system_mode("document")
+ if not result.get("error"):
+ st.session_state.system_mode = "document"
+ st.session_state.all_products = []
+ st.session_state.filtered_products = []
+ st.rerun()
+
+
+def render_upload_panel():
+ """
+ Render left panel with document upload.
+
+ Handles file upload widget, status display, and processing feedback loop.
+ """
+ st.markdown('', unsafe_allow_html=True)
+
+ # Section header
+ st.markdown('', unsafe_allow_html=True)
+ st.markdown('
Upload a PDF to start asking questions
', unsafe_allow_html=True)
+
+ # Show upload status
+ if not st.session_state.current_document:
+ st.markdown(
+ '
⚠️ No document uploaded
',
+ unsafe_allow_html=True
+ )
+ else:
+ doc_name = st.session_state.current_document.get("filename", "Unknown")
+ chunk_count = st.session_state.current_document.get("chunk_count", 0)
+ st.markdown(
+ f'
✅ {doc_name}
'
+ f'{chunk_count} chunks indexed
',
+ unsafe_allow_html=True
+ )
+
+ # Upload interface
+ st.markdown('''
+
+
📤
+
Drop your PDF here
+
or
+
+ ''', unsafe_allow_html=True)
+
+ uploaded_file = st.file_uploader(
+ "Choose a file",
+ type=["pdf", "docx", "txt"],
+ help="Supported formats: PDF, DOCX, TXT (max 100MB per file)",
+ label_visibility="collapsed",
+ key="file_uploader"
+ )
+
+ if uploaded_file:
+ st.info(f"📄 {uploaded_file.name} ({uploaded_file.size / 1024:.1f} KB)")
+
+ upload_button = st.button(
+ "🚀 Upload",
+ type="primary",
+ use_container_width=True,
+ disabled=(uploaded_file is None)
+ )
+
+ if upload_button and uploaded_file is not None:
+ # Create placeholder for status updates
+ status_placeholder = st.empty()
+ progress_bar = st.progress(0)
+
+ # Clear existing indexes silently (single document mode)
+ # This ensures we always start fresh with just one document
+ status_placeholder.info("🗑️ Clearing previous data...")
+ progress_bar.progress(10)
+ st.session_state.ui_service.clear_all_indexes()
+
+ # Upload document
+ status_placeholder.info("⬆️ Uploading document...")
+ progress_bar.progress(20)
+
+ result = st.session_state.ui_service.upload_document(
+ uploaded_file.read(),
+ uploaded_file.name
+ )
+
+ if "error" in result:
+ status_placeholder.error(f"❌ Upload failed: {result['message']}")
+ progress_bar.empty()
+ else:
+ doc_id = result.get("document_id", result.get("doc_id"))
+
+ # Poll for processing status
+ status_placeholder.info("🔄 Processing document...")
+ progress_bar.progress(40)
+
+ max_attempts = 60
+ attempt = 0
+ while attempt < max_attempts:
+ status_info = st.session_state.ui_service.get_document_status(doc_id)
+
+ if "error" in status_info:
+ status_placeholder.error(f"❌ Status check failed: {status_info['message']}")
+ break
+
+ processing_status = status_info.get("processing_status", "unknown")
+ chunk_count = status_info.get("chunk_count", 0)
+
+ if processing_status == "completed":
+ progress_bar.progress(100)
+ status_placeholder.success(f"✅ Document processed! ({chunk_count} chunks indexed)")
+
+ # Reload retrieval service to pick up new indexes
+ try:
+ logger.info("Reloading retrieval service with new document data")
+ retrieval_url = "http://localhost:8002"
+ reload_response = st.session_state.ui_service.client.post(f"{retrieval_url}/api/v1/reload")
+ reload_response.raise_for_status()
+ logger.info("Successfully reloaded retrieval service after document upload")
+ except Exception as reload_error:
+ logger.warning(f"Failed to reload retrieval service: {reload_error}")
+
+ # Store document info
+ st.session_state.current_document = {
+ "doc_id": doc_id,
+ "filename": uploaded_file.name,
+ "timestamp": datetime.now().isoformat(),
+ "chunk_count": chunk_count
+ }
+ st.session_state.document_ready = True
+
+ # Reload retrieval indexes so the new document can be found
+ with st.spinner("Reloading search indexes..."):
+ st.session_state.ui_service.reload_retrieval_indexes()
+
+ # Generate summary and add as first message
+ with st.spinner("Generating document summary..."):
+ summary_response = st.session_state.ui_service.generate_document_summary(
+ uploaded_file.name
+ )
+
+ # Clear chat and add welcome message
+ st.session_state.chat_history = []
+
+ welcome_message = {
+ "type": "assistant",
+ "response": summary_response,
+ "timestamp": datetime.now().strftime("%I:%M %p"),
+ "id": "welcome_message",
+ "is_welcome": True
+ }
+ st.session_state.chat_history.append(welcome_message)
+
+ time.sleep(1)
+ status_placeholder.empty()
+ progress_bar.empty()
+ st.rerun()
+ break
+
+ elif processing_status == "failed":
+ error_msg = status_info.get("error_message", "Unknown error")
+ status_placeholder.error(f"❌ Processing failed: {error_msg}")
+ progress_bar.empty()
+ break
+
+ elif processing_status == "processing":
+ progress = min(40 + (attempt * 50 // max_attempts), 90)
+ progress_bar.progress(progress)
+ status_placeholder.info(f"🔄 Processing document... ({attempt * 2}s)")
+
+ time.sleep(2)
+ attempt += 1
+
+ if attempt >= max_attempts:
+ status_placeholder.warning("⏱️ Processing is taking longer than expected.")
+ progress_bar.empty()
+
+ # Instructions
+ st.markdown('''
+
+
Instructions:
+
+ - Upload a PDF document (max 100MB)
+ - Wait for processing to complete
+ - Start asking questions in the chat
+ - Get intelligent answers based on your document
+
+
+ ''', unsafe_allow_html=True)
+
+ st.markdown('
', unsafe_allow_html=True) # Close upload-panel
+
+
+def render_chat_panel():
+ """
+ Render right panel with chat interface.
+
+ Displays chat history, empty state (if no doc), and chat input.
+ Handles message submission and response rendering.
+ """
+ st.markdown('', unsafe_allow_html=True)
+
+ # Section header
+ st.markdown('', unsafe_allow_html=True)
+ st.markdown('
Upload a document to start chatting
', unsafe_allow_html=True)
+
+ if not st.session_state.document_ready:
+ # Empty state
+ st.markdown('''
+
+
🤖
+
No Document Loaded
+
Upload a PDF document on the left to start asking questions and get intelligent answers powered by AI
+
+ ''', unsafe_allow_html=True)
+ else:
+
+ # Chat messages container
+ chat_container = st.container()
+
+ with chat_container:
+ if st.session_state.chat_history:
+ for message in st.session_state.chat_history:
+ render_chat_message(message)
+
+ # Chat input at bottom
+ st.markdown("---")
+
+ col1, col2 = st.columns([5, 1])
+
+ with col1:
+ query = st.text_input(
+ "Type your question...",
+ placeholder="Upload a document first...",
+ key="chat_input",
+ label_visibility="collapsed"
+ )
+
+ with col2:
+ submit_button = st.button("📤 Send", type="primary", use_container_width=True)
+
+ # Help text
+ st.caption("Press Enter to send • The AI will answer based on your uploaded document")
+
+ # Process query
+ if submit_button and query.strip():
+ # Add user message
+ user_message = {
+ "type": "user",
+ "content": query,
+ "timestamp": datetime.now().strftime("%I:%M %p"),
+ "id": f"user_{len(st.session_state.chat_history)}"
+ }
+ st.session_state.chat_history.append(user_message)
+
+ # Get response
+ with st.spinner("🤔 Thinking..."):
+ response = st.session_state.ui_service.submit_query(query, include_debug=False)
+
+ # Add assistant message
+ assistant_message = {
+ "type": "assistant",
+ "response": response,
+ "timestamp": datetime.now().strftime("%I:%M %p"),
+ "id": f"assistant_{len(st.session_state.chat_history)}"
+ }
+ st.session_state.chat_history.append(assistant_message)
+
+ st.rerun()
+
+ elif submit_button:
+ st.warning("⚠️ Please enter a question")
+
+ st.markdown('
', unsafe_allow_html=True) # Close chat-panel
+
+
+def render_chat_message(message: Dict[str, Any]):
+ """
+ Render a single chat message (user or assistant).
+
+ Args:
+ message (Dict[str, Any]): Message object containing type, content/response, etc.
+ """
+ message_type = message.get("type", "assistant")
+
+ if message_type == "user":
+ # User message bubble
+ st.markdown(
+ f'{message["content"]}
',
+ unsafe_allow_html=True
+ )
+ else:
+ # Assistant message
+ response = message.get("response", {})
+
+ if "error" in response:
+ st.error(f"❌ {response.get('message', 'An error occurred')}")
+ return
+
+ # Get answer and process citations
+ answer = response.get("answer", "No answer generated")
+ citations = response.get("citations", [])
+
+ # Check if this is the welcome message
+ is_welcome = message.get("is_welcome", False)
+
+ if is_welcome:
+ # Format welcome message differently
+ welcome_text = f"""
+
+
📄 Document Summary
+
{answer}
+
+ 💡 Let me know how I can help you with this document!
+
+
+ """
+ st.markdown(welcome_text, unsafe_allow_html=True)
+ else:
+ # Regular message with citations
+ if citations:
+ processed_answer = process_citations_in_text(answer, citations)
+ else:
+ processed_answer = answer
+
+ st.markdown(f'{processed_answer}
', unsafe_allow_html=True)
+
+ # Show sources if available
+ if citations:
+ with st.expander(f"📚 View {len(citations)} Source(s)", expanded=False):
+ for i, citation in enumerate(citations[:5], 1):
+ doc_id = citation.get('document_id', 'N/A')
+ page_num = citation.get('page_number', 'N/A')
+ snippet = citation.get('relevant_text_snippet', '')
+
+ st.markdown(f"**[{i}]** Page {page_num}")
+ if snippet:
+ st.text(snippet[:200] + "..." if len(snippet) > 200 else snippet)
+ st.markdown("---")
+
+
+
+
+def render_catalog_sidebar():
+ """
+ Render compact sidebar for catalog management.
+
+ Displays catalog status, clear button, and file uploader for new catalogs.
+ """
+ with st.sidebar:
+ st.markdown("### 📦 Catalog Management")
+
+ # Check catalog status
+ if st.session_state.catalog_info is None:
+ catalog_info = st.session_state.ui_service.get_catalog_info()
+ st.session_state.catalog_info = catalog_info
+ st.session_state.catalog_loaded = catalog_info.get("loaded", False)
+
+ if st.session_state.catalog_loaded:
+ info = st.session_state.catalog_info
+ st.success(f"✅ {info.get('product_count', 0)} Products Loaded")
+ st.caption(f"Categories: {len(info.get('categories', []))}")
+
+ if st.button("🗑️ Clear Catalog", use_container_width=True):
+ result = st.session_state.ui_service.clear_product_catalog()
+ if not result.get("error"):
+ st.session_state.catalog_loaded = False
+ st.session_state.catalog_info = None
+ st.session_state.all_products = []
+ st.session_state.filtered_products = []
+ st.rerun()
+ else:
+ st.warning("⚠️ No catalog loaded")
+
+ # File upload
+ st.markdown("---")
+ st.markdown("**Upload New Catalog**")
+ uploaded_file = st.file_uploader(
+ "Select File",
+ type=["csv", "json", "xlsx"],
+ help="Upload CSV/JSON with products"
+ )
+
+ if uploaded_file is not None:
+ if st.button("📤 Upload", type="primary", use_container_width=True):
+ with st.spinner("Processing..."):
+ file_data = uploaded_file.read()
+ result = st.session_state.ui_service.upload_product_catalog(
+ file_data, uploaded_file.name
+ )
+
+ if result.get("error"):
+ st.error(f"Upload failed")
+ else:
+ job_id = result.get("job_id")
+
+ # Auto-confirm mapping
+ if result.get("requires_confirmation"):
+ suggested = result.get("suggested_mapping", {})
+ st.session_state.ui_service.confirm_product_mapping(
+ job_id, "Products", suggested
+ )
+
+ # Monitor processing
+ progress_bar = st.progress(0)
+ max_attempts = 30
+
+ for attempt in range(max_attempts):
+ status = st.session_state.ui_service.get_product_ingestion_status(job_id)
+ if status.get("status") == "complete":
+ progress_bar.progress(100)
+ st.session_state.catalog_loaded = True
+ st.session_state.catalog_info = None
+ st.session_state.all_products = []
+
+ # Reload retrieval indexes
+ with st.spinner("Reloading search indexes..."):
+ st.session_state.ui_service.reload_retrieval_indexes()
+
+ time.sleep(1)
+ st.rerun()
+ break
+ elif status.get("status") == "failed":
+ st.error("Processing failed")
+ break
+ else:
+ progress = min(10 + (attempt * 80 // max_attempts), 90)
+ progress_bar.progress(progress)
+ time.sleep(2)
+
+
+def render_ecommerce_store():
+ """
+ Render main e-commerce product display.
+
+ Shows product grid, search bar, and empty states.
+ """
+
+ if not st.session_state.catalog_loaded:
+ st.markdown('''
+
+
🛍️
+
No Products Available
+
Upload a product catalog from the sidebar to get started
+
+ ''', unsafe_allow_html=True)
+ return
+
+ # Load all products if not already loaded
+ if not st.session_state.all_products:
+ with st.spinner("Loading products..."):
+ products = st.session_state.ui_service.get_all_products(limit=100)
+ st.session_state.all_products = products
+ st.session_state.filtered_products = products
+
+ # Search and filter bar
+ render_search_bar()
+
+ # Display products
+ products_to_show = st.session_state.filtered_products if st.session_state.filtered_products else st.session_state.all_products
+
+ if not products_to_show:
+ st.info("🔍 No products found matching your search. Try different keywords or filters.")
+ return
+
+ # Results header
+ st.markdown(f'', unsafe_allow_html=True)
+
+ # Product grid
+ render_product_grid(products_to_show)
+
+
+def render_search_bar():
+ """
+ Render search bar with filters.
+
+ Handles text input, search button, clear button, and active filter display.
+ """
+ st.markdown('', unsafe_allow_html=True)
+
+ # Search input
+ col1, col2, col3 = st.columns([6, 1, 1])
+
+ with col1:
+ query = st.text_input(
+ "Search",
+ placeholder="Search for products...",
+ key="search_input",
+ label_visibility="collapsed"
+ )
+
+ with col2:
+ search_clicked = st.button("🔍 Search", type="primary", use_container_width=True)
+
+ with col3:
+ clear_clicked = st.button("Clear", use_container_width=True)
+
+ # Process search
+ if search_clicked and query.strip():
+ st.session_state.search_query = query
+ with st.spinner("Searching..."):
+ results = st.session_state.ui_service.search_products(query, limit=100)
+ if not results.get("error"):
+ st.session_state.filtered_products = results.get("results", [])
+ st.session_state.applied_filters = results.get("query_interpretation", {}).get("extracted_filters", {})
+ st.rerun()
+
+ # Clear search
+ if clear_clicked:
+ st.session_state.search_query = ""
+ st.session_state.filtered_products = st.session_state.all_products
+ st.session_state.applied_filters = {}
+ st.rerun()
+
+ # Show active filters
+ if st.session_state.search_query or st.session_state.applied_filters:
+ st.markdown('
', unsafe_allow_html=True)
+
+ if st.session_state.search_query:
+ st.markdown(f'🔍 "{st.session_state.search_query}"', unsafe_allow_html=True)
+
+ filters = st.session_state.applied_filters
+ if filters.get("price_max"):
+ st.markdown(f'💰 Under ${filters["price_max"]}', unsafe_allow_html=True)
+ if filters.get("price_min"):
+ st.markdown(f'💰 Over ${filters["price_min"]}', unsafe_allow_html=True)
+ if filters.get("rating_min"):
+ st.markdown(f'⭐ {filters["rating_min"]}+ stars', unsafe_allow_html=True)
+ if filters.get("categories"):
+ for cat in filters["categories"]:
+ st.markdown(f'📂 {cat}', unsafe_allow_html=True)
+
+ st.markdown('
', unsafe_allow_html=True)
+
+ st.markdown('
', unsafe_allow_html=True)
+
+
+def render_product_grid(products: List[Dict[str, Any]]):
+ """
+ Render products in a responsive grid.
+
+ Args:
+ products (List[Dict[str, Any]]): List of product dictionaries to display.
+ """
+ # Display 4 products per row
+ cols_per_row = 4
+
+ for i in range(0, len(products), cols_per_row):
+ cols = st.columns(cols_per_row)
+ for j, col in enumerate(cols):
+ if i + j < len(products):
+ with col:
+ render_product_card(products[i + j])
+
+
+def render_product_card(product: Dict[str, Any]):
+ """
+ Render a single product card with image.
+
+ Starts HTML block for product card styling.
+
+ Args:
+ product (Dict[str, Any]): Product data.
+ """
+ # Extract product data
+ name = product.get("name", "Unknown Product")
+ price = product.get("price")
+ rating = product.get("rating")
+ review_count = product.get("review_count", 0)
+ category = product.get("category", "")
+ image_url = product.get("image_url") or product.get("metadata", {}).get("image_url", "")
+ brand = product.get("brand") or product.get("metadata", {}).get("brand", "")
+
+ # Fallback image if none provided
+ if not image_url:
+ image_url = "https://via.placeholder.com/400x400/e5e7eb/6b7280?text=No+Image"
+
+ # Rating stars
+ stars_filled = int(rating) if rating else 0
+ stars_empty = 5 - stars_filled
+ stars_html = "★" * stars_filled + "☆" * stars_empty
+
+ # Product card HTML
+ card_html = f"""
+
+

+
+
{name}
+
+ {stars_html}
+ ({review_count:,})
+
+
${price:.2f}
+
{category}
+
+
+ """
+
+ st.markdown(card_html, unsafe_allow_html=True)
+
+
+def main():
+ """
+ Main application entry point.
+
+ Initializes session state, routing, and proper page layout based on system mode.
+ """
+ # Keycloak Login
+ keycloak_config = {
+ "url": os.getenv("KEYCLOAK_URL", os.getenv("BASE_URL", "http://localhost:8080")),
+ "realm": os.getenv("KEYCLOAK_REALM", "master"),
+ "client_id": os.getenv("KEYCLOAK_CLIENT_ID", "api")
+ }
+
+ # keycloak = login(
+ # url=keycloak_config["url"],
+ # realm=keycloak_config["realm"],
+ # client_id=keycloak_config["client_id"],
+ # init_options={'checkLoginIframe': False}
+ # )
+
+ # if not keycloak.authenticated:
+ # st.warning("Please login to access the system.")
+ # st.stop()
+
+ initialize_session_state()
+
+ # Note: Keycloak authentication is handled at the service level (embedding, llm, etc.)
+ # The UI communicates with services through the gateway without needing to pass tokens
+
+ # Get current system mode
+ if "system_mode_initialized" not in st.session_state:
+ current_mode = st.session_state.ui_service.get_system_mode()
+ st.session_state.system_mode = current_mode
+ st.session_state.system_mode_initialized = True
+
+ render_header()
+
+ st.markdown('', unsafe_allow_html=True)
+
+ if st.session_state.system_mode == "product":
+ # E-commerce mode: sidebar + full-width store
+ render_catalog_sidebar()
+ render_ecommerce_store()
+ else:
+ # Document mode: two-column layout
+ col1, col2 = st.columns([1, 2], gap="medium")
+ with col1:
+ render_upload_panel()
+ with col2:
+ render_chat_panel()
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/sample_solutions/HybridSearch/ui/config.py b/sample_solutions/HybridSearch/ui/config.py
new file mode 100644
index 00000000..7670dedf
--- /dev/null
+++ b/sample_solutions/HybridSearch/ui/config.py
@@ -0,0 +1,37 @@
+"""
+Configuration for UI Service
+"""
+from pydantic_settings import BaseSettings
+from pathlib import Path
+
+
+class Settings(BaseSettings):
+ # Service URLs (use localhost for local dev, gateway for Docker)
+ gateway_service_url: str = "http://localhost:8000"
+
+ # UI Configuration
+ ui_title: str = "InsightMapper Lite"
+ ui_page_icon: str = "📚"
+ ui_layout: str = "wide"
+
+ # Feature flags
+ enable_debug_mode: bool = True
+ enable_document_upload: bool = True
+ enable_query_history: bool = True
+
+ # Display settings
+ max_results_display: int = 5
+ show_confidence_scores: bool = True
+ show_source_preview: bool = True
+
+ # Logging
+ log_level: str = "INFO"
+
+ class Config:
+ env_file = Path(__file__).parent.parent / ".env"
+ case_sensitive = False
+ extra = "ignore"
+
+
+settings = Settings()
+
diff --git a/sample_solutions/HybridSearch/ui/public/citations.png b/sample_solutions/HybridSearch/ui/public/citations.png
new file mode 100644
index 00000000..9f20587e
Binary files /dev/null and b/sample_solutions/HybridSearch/ui/public/citations.png differ
diff --git a/sample_solutions/HybridSearch/ui/public/product_catalog.png b/sample_solutions/HybridSearch/ui/public/product_catalog.png
new file mode 100644
index 00000000..74274f52
Binary files /dev/null and b/sample_solutions/HybridSearch/ui/public/product_catalog.png differ
diff --git a/sample_solutions/HybridSearch/ui/public/rag_chatbot.png b/sample_solutions/HybridSearch/ui/public/rag_chatbot.png
new file mode 100644
index 00000000..1f926b02
Binary files /dev/null and b/sample_solutions/HybridSearch/ui/public/rag_chatbot.png differ
diff --git a/sample_solutions/HybridSearch/ui/requirements.txt b/sample_solutions/HybridSearch/ui/requirements.txt
new file mode 100644
index 00000000..a17ef62a
--- /dev/null
+++ b/sample_solutions/HybridSearch/ui/requirements.txt
@@ -0,0 +1,9 @@
+streamlit==1.29.0
+requests==2.31.0
+httpx==0.28.1
+pandas==2.1.4
+plotly==5.18.0
+pydantic==2.5.0
+pydantic-settings==2.1.0
+python-dotenv==1.0.0
+streamlit-keycloak==1.1.1