Skip to content

Commit 776dcec

Browse files
authored
Disable outlines cache by default (#14837)
1 parent ccf02fc commit 776dcec

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

vllm/envs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
VLLM_DP_MASTER_IP: str = ""
9696
VLLM_DP_MASTER_PORT: int = 0
9797
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
98+
VLLM_V0_USE_OUTLINES_CACHE: bool = False
9899

99100

100101
def get_default_cache_root():
@@ -623,6 +624,12 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
623624
# Whether to use atomicAdd reduce in gptq/awq marlin kernel.
624625
"VLLM_MARLIN_USE_ATOMIC_ADD":
625626
lambda: os.environ.get("VLLM_MARLIN_USE_ATOMIC_ADD", "0") == "1",
627+
628+
# Whether to turn on the outlines cache for V0
629+
# This cache is unbounded and on disk, so it's not safe to use in
630+
# an environment with potentially malicious users.
631+
"VLLM_V0_USE_OUTLINES_CACHE":
632+
lambda: os.environ.get("VLLM_V0_USE_OUTLINES_CACHE", "0") == "1",
626633
}
627634

628635
# end-env-vars-definition

vllm/model_executor/guided_decoding/outlines_logits_processors.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,28 @@
2424
import numpy as np
2525
import torch
2626
from outlines import grammars
27-
from outlines.caching import cache
27+
from outlines.caching import cache, disable_cache
2828
from outlines.fsm.guide import (CFGGuide, CFGState, Generate, Guide,
2929
RegexGuide, Write)
3030
from outlines.fsm.parsing import PartialLark
3131
from outlines_core.fsm.json_schema import build_regex_from_schema
3232
from pydantic import BaseModel
3333
from transformers import PreTrainedTokenizerBase
3434

35+
import vllm.envs as envs
3536
from vllm.logger import init_logger
3637
from vllm.model_executor.guided_decoding.reasoner import Reasoner
3738
from vllm.platforms import current_platform
3839

3940
logger = init_logger(__name__)
4041

42+
if envs.VLLM_V0_USE_OUTLINES_CACHE:
43+
logger.warning("Enabling outlines cache. This is an unbounded on-disk "
44+
"cache. It may consume a lot of disk space and should "
45+
"not be used with untrusted clients.")
46+
else:
47+
disable_cache()
48+
4149

4250
class BaseLogitsProcessor:
4351

0 commit comments

Comments
 (0)