-
Notifications
You must be signed in to change notification settings - Fork 4
Description
Hi, I've been trying out Yeti in docker (I've not tried a non-docker install). Each time I can get it running fine, but after turning on various feeds, over a few hours or a couple of days, the systems becomes unresponsive resulting in a reboot or rebuild to try some other optimization ideas. Mostly, I've been use a 6 CPU/16GB ram in a Ubuntu 24 Server VM, 120GB, so no storage issue.
I've tried varying amounts of feeds, from few to many, and end up with the same result of the system becoming unresponsive.
Are there recommendations for how to keep this stable?
I've added on some monitoring to help troubleshoot. The biggest consumer is arangodb, which makes sense. You can see from the screenshot how the memory continues to climb and swap starts to get heavily used.
These are some of the settings I that I used to help stabilize it, but it's not working as well as I hoped. I've also tried, as you can see here, pulling the latest arangodb for use. So I figured it's time to ask. Thanks in advance for consideration here.
This is my env and docker compose file:
BASE=/opt/yeti
YETI_CONF=/opt/yeti/yeti.conf
ENVFILE=/opt/yeti/prod.env
COMPOSE=/opt/yeti/docker-compose.yml
FRONTCONF=/opt/yeti/00-frontend.conf
EXPORTS_DIR=/opt/yeti/exports
TEMPLATES_DIR=/opt/yeti/templates
LOG_DIR=/opt/yeti/logs
BLOOM_DIR=/opt/yeti/bloomfilters
BACKUP_DIR=/opt/yeti/backups
CACHE_DIR=/opt/yeti/cache
ARANGO_VOL=yeti-db
ARANGO_TAG=3.12.5.2
REDIS_TAG=8.2.2
YETI_IMAGE=yetiplatform/yeti:latest
FRONTEND_IMAGE=yetiplatform/yeti-frontend:latest
BLOOM_IMAGE=yetiplatform/bloomcheck:dev
#### Memory sizing
ROCKSDB_BLOCK_CACHE_MB=2048
ROCKSDB_WRITE_BUF_TOTAL_MB=1024
ARANGO_QUERY_MEM_MB=512
ARANGO_QUERY_MEM_GLOBAL_MB=2048
ARANGO_CACHE_MB=512
JS_V8_MAX_HEAP_MB=384
#### Celery sizing
FEEDS_CONCURRENCY=2
EVENTS_CONCURRENCY=2
PREFETCH=1
=====================
name: yeti
services:
frontend:
container_name: yeti-frontend
image: ${FRONTEND_IMAGE}
environment:
NGINX_ENTRYPOINT_QUIET_LOGS: "1"
depends_on:
api:
condition: service_healthy
ports:
- "9000:80"
volumes:
- ${FRONTCONF}:/etc/nginx/conf.d/default.conf:ro
logging:
driver: "local"
options: { max-size: "10m", max-file: "5" }
api:
container_name: yeti-api
image: ${YETI_IMAGE}
env_file:
- ${ENVFILE}
command: ["webserver"]
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/docs >/dev/null || exit 1"]
interval: 10s
timeout: 5s
retries: 5
start_period: 20s
ports:
- "8000:8000"
depends_on:
redis: { condition: service_healthy }
arangodb: { condition: service_started }
volumes:
- ${EXPORTS_DIR}:/opt/yeti/exports
- ${YETI_CONF}:/app/yeti.conf
- ${TEMPLATES_DIR}:/opt/yeti/templates:ro
- ${BASE}/dfiq:/dfiq:ro
- ${LOG_DIR}:/opt/yeti/logs
- ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
logging:
driver: "local"
options: { max-size: "50m", max-file: "5" }
restart: unless-stopped
stop_grace_period: 30s
tasks:
container_name: yeti-tasks
image: ${YETI_IMAGE}
env_file:
- ${ENVFILE}
environment:
HOME: /app
XDG_CACHE_HOME: /app/.cache
UV_CACHE_DIR: /app/.cache/uv
command: >
uv run celery -A core.taskscheduler worker -E
--loglevel=INFO -P threads
--concurrency=${FEEDS_CONCURRENCY}
--prefetch-multiplier=${PREFETCH}
-Ofair
depends_on:
api: { condition: service_healthy }
redis: { condition: service_healthy }
arangodb: { condition: service_started }
volumes:
- ${EXPORTS_DIR}:/opt/yeti/exports
- ${YETI_CONF}:/app/yeti.conf
- ${TEMPLATES_DIR}:/opt/yeti/templates:ro
- ${BASE}/dfiq:/dfiq:ro
- ${LOG_DIR}:/var/log/yeti
- ${CACHE_DIR}:/app/.cache
- ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
logging: { driver: "local" }
restart: unless-stopped
events-tasks:
container_name: yeti-events-tasks
image: ${YETI_IMAGE}
env_file:
- ${ENVFILE}
environment:
HOME: /app
XDG_CACHE_HOME: /app/.cache
UV_CACHE_DIR: /app/.cache/uv
command: >
uv run python -m core.events.consumers events
--concurrency ${EVENTS_CONCURRENCY}
depends_on:
api: { condition: service_healthy }
redis: { condition: service_healthy }
arangodb: { condition: service_started }
volumes:
- ${EXPORTS_DIR}:/opt/yeti/exports
- ${YETI_CONF}:/app/yeti.conf
- ${TEMPLATES_DIR}:/opt/yeti/templates:ro
- ${BASE}/dfiq:/dfiq:ro
- ${LOG_DIR}:/var/log/yeti
- ${CACHE_DIR}:/app/.cache
- ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
logging: { driver: "local" }
restart: unless-stopped
tasks-beat:
container_name: yeti-tasks-beat
image: ${YETI_IMAGE}
env_file:
- ${ENVFILE}
environment:
HOME: /app
XDG_CACHE_HOME: /app/.cache
UV_CACHE_DIR: /app/.cache/uv
command: ['tasks-beat']
depends_on:
api: { condition: service_healthy }
redis: { condition: service_healthy }
arangodb: { condition: service_started }
volumes:
- ${YETI_CONF}:/app/yeti.conf
- ${CACHE_DIR}:/app/.cache
- ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
restart: unless-stopped
redis:
container_name: yeti-redis
image: redis:${REDIS_TAG}
command:
- redis-server
- --protected-mode
- "no"
- --appendonly
- "no"
- --save
- ""
- --maxmemory
- "1536mb"
- --maxmemory-policy
- allkeys-lru
- --tcp-backlog
- "4096"
healthcheck:
test: ["CMD-SHELL", "redis-cli ping | grep -q PONG"]
interval: 5s
timeout: 3s
retries: 10
logging:
driver: "local"
options: { max-size: "10m", max-file: "3" }
arangodb:
container_name: yeti-arangodb
image: arangodb:${ARANGO_TAG}
command:
- arangod
- --rocksdb.block-cache-size=${ROCKSDB_BLOCK_CACHE_MB}M
- --rocksdb.total-write-buffer-size=${ROCKSDB_WRITE_BUF_TOTAL_MB}M
- --rocksdb.cache-index-and-filter-blocks=true
- --cache.size=${ARANGO_CACHE_MB}M
- --query.memory-limit=${ARANGO_QUERY_MEM_MB}M
- --query.global-memory-limit=${ARANGO_QUERY_MEM_GLOBAL_MB}M
- --javascript.v8-contexts=2
- --javascript.v8-max-heap=${JS_V8_MAX_HEAP_MB}
environment:
- ARANGO_NO_AUTH=1
- ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY=8GB
ulimits:
nofile: 1048576
volumes:
- arangodb_data:/var/lib/arangodb3
restart: unless-stopped
logging:
driver: "local"
options: { max-size: "50m", max-file: "5" }
networks:
default:
aliases: [arangodb, yeti-arangodb]
networks:
default:
name: yeti_network
volumes:
arangodb_data:
name: ${ARANGO_VOL}
driver: local