Skip to content

Stability issues #27

@kev365

Description

@kev365

Hi, I've been trying out Yeti in docker (I've not tried a non-docker install). Each time I can get it running fine, but after turning on various feeds, over a few hours or a couple of days, the systems becomes unresponsive resulting in a reboot or rebuild to try some other optimization ideas. Mostly, I've been use a 6 CPU/16GB ram in a Ubuntu 24 Server VM, 120GB, so no storage issue.

I've tried varying amounts of feeds, from few to many, and end up with the same result of the system becoming unresponsive.

Are there recommendations for how to keep this stable?

I've added on some monitoring to help troubleshoot. The biggest consumer is arangodb, which makes sense. You can see from the screenshot how the memory continues to climb and swap starts to get heavily used.

Image

These are some of the settings I that I used to help stabilize it, but it's not working as well as I hoped. I've also tried, as you can see here, pulling the latest arangodb for use. So I figured it's time to ask. Thanks in advance for consideration here.

This is my env and docker compose file:

BASE=/opt/yeti
YETI_CONF=/opt/yeti/yeti.conf
ENVFILE=/opt/yeti/prod.env
COMPOSE=/opt/yeti/docker-compose.yml
FRONTCONF=/opt/yeti/00-frontend.conf
EXPORTS_DIR=/opt/yeti/exports
TEMPLATES_DIR=/opt/yeti/templates
LOG_DIR=/opt/yeti/logs
BLOOM_DIR=/opt/yeti/bloomfilters
BACKUP_DIR=/opt/yeti/backups
CACHE_DIR=/opt/yeti/cache
ARANGO_VOL=yeti-db
ARANGO_TAG=3.12.5.2
REDIS_TAG=8.2.2
YETI_IMAGE=yetiplatform/yeti:latest
FRONTEND_IMAGE=yetiplatform/yeti-frontend:latest
BLOOM_IMAGE=yetiplatform/bloomcheck:dev

#### Memory sizing
ROCKSDB_BLOCK_CACHE_MB=2048
ROCKSDB_WRITE_BUF_TOTAL_MB=1024
ARANGO_QUERY_MEM_MB=512
ARANGO_QUERY_MEM_GLOBAL_MB=2048
ARANGO_CACHE_MB=512
JS_V8_MAX_HEAP_MB=384

#### Celery sizing
FEEDS_CONCURRENCY=2
EVENTS_CONCURRENCY=2
PREFETCH=1

=====================
name: yeti
services:
  frontend:
    container_name: yeti-frontend
    image: ${FRONTEND_IMAGE}
    environment:
      NGINX_ENTRYPOINT_QUIET_LOGS: "1"
    depends_on:
      api:
        condition: service_healthy
    ports:
      - "9000:80"
    volumes:
      - ${FRONTCONF}:/etc/nginx/conf.d/default.conf:ro
    logging:
      driver: "local"
      options: { max-size: "10m", max-file: "5" }

  api:
    container_name: yeti-api
    image: ${YETI_IMAGE}
    env_file:
      - ${ENVFILE}
    command: ["webserver"]
    healthcheck:
      test: ["CMD-SHELL", "curl -fsS http://127.0.0.1:8000/docs >/dev/null || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 20s
    ports:
      - "8000:8000"
    depends_on:
      redis: { condition: service_healthy }
      arangodb: { condition: service_started }
    volumes:
      - ${EXPORTS_DIR}:/opt/yeti/exports
      - ${YETI_CONF}:/app/yeti.conf
      - ${TEMPLATES_DIR}:/opt/yeti/templates:ro
      - ${BASE}/dfiq:/dfiq:ro
      - ${LOG_DIR}:/opt/yeti/logs
      - ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
    logging:
      driver: "local"
      options: { max-size: "50m", max-file: "5" }
    restart: unless-stopped
    stop_grace_period: 30s

  tasks:
    container_name: yeti-tasks
    image: ${YETI_IMAGE}
    env_file:
      - ${ENVFILE}
    environment:
      HOME: /app
      XDG_CACHE_HOME: /app/.cache
      UV_CACHE_DIR: /app/.cache/uv
    command: >
      uv run celery -A core.taskscheduler worker -E
      --loglevel=INFO -P threads
      --concurrency=${FEEDS_CONCURRENCY}
      --prefetch-multiplier=${PREFETCH}
      -Ofair
    depends_on:
      api: { condition: service_healthy }
      redis: { condition: service_healthy }
      arangodb: { condition: service_started }
    volumes:
      - ${EXPORTS_DIR}:/opt/yeti/exports
      - ${YETI_CONF}:/app/yeti.conf
      - ${TEMPLATES_DIR}:/opt/yeti/templates:ro
      - ${BASE}/dfiq:/dfiq:ro
      - ${LOG_DIR}:/var/log/yeti
      - ${CACHE_DIR}:/app/.cache
      - ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
    logging: { driver: "local" }
    restart: unless-stopped

  events-tasks:
    container_name: yeti-events-tasks
    image: ${YETI_IMAGE}
    env_file:
      - ${ENVFILE}
    environment:
      HOME: /app
      XDG_CACHE_HOME: /app/.cache
      UV_CACHE_DIR: /app/.cache/uv
    command: >
      uv run python -m core.events.consumers events
      --concurrency ${EVENTS_CONCURRENCY}
    depends_on:
      api: { condition: service_healthy }
      redis: { condition: service_healthy }
      arangodb: { condition: service_started }
    volumes:
      - ${EXPORTS_DIR}:/opt/yeti/exports
      - ${YETI_CONF}:/app/yeti.conf
      - ${TEMPLATES_DIR}:/opt/yeti/templates:ro
      - ${BASE}/dfiq:/dfiq:ro
      - ${LOG_DIR}:/var/log/yeti
      - ${CACHE_DIR}:/app/.cache
      - ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
    logging: { driver: "local" }
    restart: unless-stopped

  tasks-beat:
    container_name: yeti-tasks-beat
    image: ${YETI_IMAGE}
    env_file:
      - ${ENVFILE}
    environment:
      HOME: /app
      XDG_CACHE_HOME: /app/.cache
      UV_CACHE_DIR: /app/.cache/uv
    command: ['tasks-beat']
    depends_on:
      api: { condition: service_healthy }
      redis: { condition: service_healthy }
      arangodb: { condition: service_started }
    volumes:
      - ${YETI_CONF}:/app/yeti.conf
      - ${CACHE_DIR}:/app/.cache
      - ${BASE}/sitecustomize.py:/app/sitecustomize.py:ro
    restart: unless-stopped

  redis:
    container_name: yeti-redis
    image: redis:${REDIS_TAG}
    command:
      - redis-server
      - --protected-mode
      - "no"
      - --appendonly
      - "no"
      - --save
      - ""
      - --maxmemory
      - "1536mb"
      - --maxmemory-policy
      - allkeys-lru
      - --tcp-backlog
      - "4096"
    healthcheck:
      test: ["CMD-SHELL", "redis-cli ping | grep -q PONG"]
      interval: 5s
      timeout: 3s
      retries: 10
    logging:
      driver: "local"
      options: { max-size: "10m", max-file: "3" }

  arangodb:
    container_name: yeti-arangodb
    image: arangodb:${ARANGO_TAG}
    command:
      - arangod
      - --rocksdb.block-cache-size=${ROCKSDB_BLOCK_CACHE_MB}M
      - --rocksdb.total-write-buffer-size=${ROCKSDB_WRITE_BUF_TOTAL_MB}M
      - --rocksdb.cache-index-and-filter-blocks=true
      - --cache.size=${ARANGO_CACHE_MB}M
      - --query.memory-limit=${ARANGO_QUERY_MEM_MB}M
      - --query.global-memory-limit=${ARANGO_QUERY_MEM_GLOBAL_MB}M
      - --javascript.v8-contexts=2
      - --javascript.v8-max-heap=${JS_V8_MAX_HEAP_MB}
    environment:
      - ARANGO_NO_AUTH=1
      - ARANGODB_OVERRIDE_DETECTED_TOTAL_MEMORY=8GB
    ulimits:
      nofile: 1048576
    volumes:
      - arangodb_data:/var/lib/arangodb3
    restart: unless-stopped
    logging:
      driver: "local"
      options: { max-size: "50m", max-file: "5" }
    networks:
      default:
        aliases: [arangodb, yeti-arangodb]

networks:
  default:
    name: yeti_network

volumes:
  arangodb_data:
    name: ${ARANGO_VOL}
    driver: local

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions