diff --git a/Dockerfile.server b/Dockerfile.server index 4d4679ebc..4d159cd21 100644 --- a/Dockerfile.server +++ b/Dockerfile.server @@ -14,6 +14,7 @@ COPY examples/ ./examples/ COPY chat-ui/ ./chat-ui/ COPY scripts/provider-list.yaml ./scripts/ COPY scripts/filter_guardrails.py ./scripts/ +COPY scripts/setup_otel.py ./scripts/ COPY scripts/entrypoint.sh ./scripts/ RUN chmod +x ./scripts/entrypoint.sh @@ -24,7 +25,11 @@ ENV POETRY_VIRTUALENVS_IN_PROJECT=1 \ POETRY_NO_INTERACTION=1 RUN poetry install --no-ansi --extras="sdd jailbreak openai nvidia tracing" && \ - poetry run pip install "spacy>=3.4.4,<4.0.0" && \ + poetry run pip install \ + "spacy>=3.4.4,<4.0.0" \ + "opentelemetry-api>=1.27.0,<2.0.0" \ + "opentelemetry-sdk>=1.27.0,<2.0.0" \ + "opentelemetry-exporter-otlp>=1.27.0,<2.0.0" && \ poetry run python -m spacy download en_core_web_lg FROM registry.access.redhat.com/ubi9/python-312 diff --git a/docs/user-guides/tracing/jaegar-tracing.md b/docs/user-guides/tracing/jaegar-tracing.md new file mode 100644 index 000000000..0fbd981f1 --- /dev/null +++ b/docs/user-guides/tracing/jaegar-tracing.md @@ -0,0 +1,30 @@ +# Local testing + +1. Create shared network +``` +docker network create nemo-otel +``` + +2. Start Jaegar +``` +docker run -d --name jaeger --network nemo-otel \ + -p 16686:16686 -p 4317:4317 -p 4318:4318 \ + jaegertracing/all-in-one:1.62.0 +``` + +3. Build NeMo Guardrails +```bash +docker buildx build -f Dockerfile.server -t nemo-guardrails-server:latest . +``` + +4. Start NeMo Guardrails with OpenTelemetry + +**Note**: OpenTelemetry setup is now built into the container with `setup_otel.py` in `/app/scripts`. You only need to provide the Guardrails configuration. + +```bash +docker run --rm --name nemo-guardrails --network nemo-otel \ + -p 8000:8000 \ + -v /Users/chrxu/Documents/NeMo-Guardrails/tests/test_configs/otel:/app/config/otel \ + -e "OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317" \ + nemo-guardrails-server:latest otel +``` diff --git a/poetry.lock b/poetry.lock index 2222572f7..4eca9db29 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "accessible-pygments" diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index a4fdd9e69..d8c752b5b 100644 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -19,9 +19,54 @@ if [[ ! -f "$CONFIG_DIR/rails.co" ]]; then exit 1 fi +# Setup OpenTelemetry +ENABLE_OTEL=$(echo "$ENABLE_OTEL" | tr '[:upper:]' '[:lower:]') +if [[ "$ENABLE_OTEL" == "true" ]]; then + echo "🔧 Configuring OpenTelemetry:" + export OTEL_SERVICE_NAME="${OTEL_SERVICE_NAME:-guardrails}" + export OTEL_SERVICE_VERSION="${OTEL_SERVICE_VERSION:-1.0.0}" + export OTEL_ENVIRONMENT="${OTEL_ENVIRONMENT:-production}" + export OTEL_ENABLE_CONSOLE="${OTEL_ENABLE_CONSOLE:-false}" + export OTEL_EXPORTER_OTLP_INSECURE="${OTEL_EXPORTER_OTLP_INSECURE:-true}" + export OTEL_EXPORTER_OTLP_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT:-http://jaeger:4317}" + +else + echo "📝 OpenTelemetry disabled - starting without tracing" +fi + +echo "" echo "✅ Configuration validated. Starting server..." -exec /app/.venv/bin/nemoguardrails server \ - --config "/app/config" \ - --port "$PORT" \ - --default-config-id "$CONFIG_ID" \ - --disable-chat-ui \ No newline at end of file + +exec /app/.venv/bin/python3 -c " +import os +import sys + +# Setup OpenTelemetry if enabled +if os.getenv('ENABLE_OTEL', '').lower() == 'true': + print('🔍 Setting up OpenTelemetry within server process...') + try: + import logging + logging.basicConfig(level=logging.INFO) + sys.path.insert(0, '/app/scripts') + from setup_otel import setup_opentelemetry + setup_opentelemetry() + print("✅ OpenTelemetry configured successfully. Starting server...") + except (ImportError, ModuleNotFoundError) as e: + print(f"❌ OpenTelemetry dependencies missing: {e}. Starting server without tracing.") + except Exception as e: + print(f"❌ OpenTelemetry setup failed: {e}. Starting server without tracing.") + import traceback + traceback.print_exc() + +# Start the NeMo Guardrails server +import uvicorn +from nemoguardrails.server import api + +# Set up the server configuration +api.app.rails_config_path = '/app/config' +api.app.disable_chat_ui = True +api.set_default_config_id('${CONFIG_ID}') + +# Start the server using uvicorn +uvicorn.run(api.app, port=${PORT}, log_level='info', host='0.0.0.0') +" diff --git a/scripts/setup_otel.py b/scripts/setup_otel.py new file mode 100644 index 000000000..51fb53665 --- /dev/null +++ b/scripts/setup_otel.py @@ -0,0 +1,78 @@ +import logging +import os + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + +logger = logging.getLogger(__name__) + + +def setup_opentelemetry(): + try: + # Get configuration from environment variables + service_name = os.getenv("OTEL_SERVICE_NAME", "guardrails") + service_version = os.getenv("OTEL_SERVICE_VERSION", "1.0.0") + environment = os.getenv("OTEL_ENVIRONMENT", "production") + otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") + enable_console = os.getenv("OTEL_ENABLE_CONSOLE", "false").lower() == "true" + + # Configure resource (metadata about the service) + resource = Resource.create( + { + "service.name": service_name, + "service.version": service_version, + "deployment.environment": environment, + "service.namespace": "nemo-guardrails", + } + ) + + # Set up the tracer provider + tracer_provider = TracerProvider(resource=resource) + trace.set_tracer_provider(tracer_provider) + + exporters_configured = [] + + # Configure OTLP exporter if endpoint is provided + if otlp_endpoint: + logger.info(f"📝 Configuring OTLP exporter for endpoint: {otlp_endpoint}") + try: + otlp_exporter = OTLPSpanExporter( + endpoint=otlp_endpoint, + insecure=os.getenv("OTEL_EXPORTER_OTLP_INSECURE", "true").lower() + == "true", + ) + tracer_provider.add_span_processor(BatchSpanProcessor(otlp_exporter)) + exporters_configured.append("OTLP") + logger.info("✅ OTLP exporter configured successfully") + except Exception as e: + logger.error(f"❌ Failed to configure OTLP exporter: {e}") + logger.debug("OTLP exporter error details:", exc_info=True) + + # Configure console exporter for debugging + if enable_console: + logger.info("📝 Enabling console exporter for debugging") + try: + console_exporter = ConsoleSpanExporter() + tracer_provider.add_span_processor(BatchSpanProcessor(console_exporter)) + exporters_configured.append("Console") + logger.info("✅ Console exporter configured successfully") + except Exception as e: + logger.error(f"❌ Failed to configure console exporter: {e}") + logger.debug("Console exporter error details:", exc_info=True) + + if exporters_configured: + logger.info( + f"✅ OpenTelemetry configured with {', '.join(exporters_configured)} exporter(s)" + ) + return True + else: + logger.warning("⚠️ No exporters configured. Tracing will not be exported.") + return False + + except Exception as e: + logger.error(f"❌ OpenTelemetry setup failed with unexpected error: {e}") + logger.debug("OpenTelemetry setup error details:", exc_info=True) + return False