Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions ChatQnA/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,22 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards

![chatqna dashboards](./assets/img/chatqna_dashboards.png)
![tgi dashboard](./assets/img/tgi_dashboard.png)

## Tracing Services with OpenTelemetry Tracing and Jaeger

> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature.

OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed.

Tracing data is exported to http://{EXTERNAL_IP}:4318/v1/traces via Jaeger.
Users could also get the external IP via below command.

```bash
ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+'
```

For TGI serving on Gaudi, users could see different services like opea, TEI and TGI.
![Screenshot from 2024-12-27 11-58-18](https://github.com/user-attachments/assets/6126fa70-e830-4780-bd3f-83cb6eff064e)

Here is a screenshot for one tracing of TGI serving request.
![Screenshot from 2024-12-27 11-26-25](https://github.com/user-attachments/assets/3a7c51c6-f422-41eb-8e82-c3df52cd48b8)
24 changes: 21 additions & 3 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ services:
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
Expand All @@ -37,7 +38,7 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
Expand All @@ -55,6 +56,7 @@ services:
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:1.5.0
Expand All @@ -76,7 +78,7 @@ services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
container_name: tgi-gaudi-server
Expand All @@ -101,7 +103,22 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
jaeger:
image: jaegertracing/all-in-one:latest
container_name: jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
- "9411:9411"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
COLLECTOR_ZIPKIN_HOST_PORT: 9411
restart: unless-stopped
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
Expand All @@ -127,6 +144,7 @@ services:
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
- TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
ipc: host
restart: always
chatqna-gaudi-ui-server:
Expand Down
4 changes: 4 additions & 0 deletions ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ export INDEX_NAME="rag-redis"
# Set it as a non-null string, such as true, if you want to enable logging facility,
# otherwise, keep it as "" to disable it.
export LOGFLAG=""
# Set OpenTelemetry Tracing Endpoint
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
3 changes: 3 additions & 0 deletions ChatQnA/tests/test_compose_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ function start_services() {
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces

# Start Docker Containers
sed -i "s|container_name: chatqna-gaudi-backend-server|container_name: chatqna-gaudi-backend-server\n volumes:\n - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
Expand Down