Skip to content

Commit 6bccc70

Browse files
committed
Tree: Formatting
1 parent 5335704 commit 6bccc70

File tree

4 files changed

+24
-9
lines changed

4 files changed

+24
-9
lines changed

backends/exllamav2/model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,8 @@ async def create(cls, model_directory: pathlib.Path, hf_model: HFModel, **kwargs
270270
self.config.max_seq_len = self.adjust_max_seq_len(user_max_seq_len)
271271
else:
272272
self.config.max_seq_len = unwrap(
273-
user_max_seq_len, min(hf_model.hf_config.get_max_position_embeddings(), 4096)
273+
user_max_seq_len,
274+
min(hf_model.hf_config.get_max_position_embeddings(), 4096),
274275
)
275276
self.cache_size = self.config.max_seq_len
276277

backends/exllamav3/model.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -225,27 +225,35 @@ async def create(cls, model_directory: pathlib.Path, hf_model: HFModel, **kwargs
225225

226226
# Determine max_seq_len and cache_size
227227
max_seq_len_user = kwargs.get("max_seq_len")
228-
max_seq_len_model = self.hf_model.hf_config.get_max_position_embeddings(default = None)
228+
max_seq_len_model = self.hf_model.hf_config.get_max_position_embeddings(
229+
default=None
230+
)
229231
max_seq_len_default = 8192
230232

231233
if max_seq_len_model and not max_seq_len_user:
232-
logger.info(f'Using default max_seq_len from model: {max_seq_len_model} tokens.')
234+
logger.info(
235+
f"Using default max_seq_len from model: {max_seq_len_model} tokens."
236+
)
233237
max_seq_len = max_seq_len_model
234238
elif max_seq_len_user:
235-
logger.info(f'Using configured max_seq_len: {max_seq_len_user} tokens.')
239+
logger.info(f"Using configured max_seq_len: {max_seq_len_user} tokens.")
236240
max_seq_len = max_seq_len_user
237241
else:
238-
logger.warning(f"max_seq_len is undefined. Defaulting to {max_seq_len_default} tokens.")
242+
logger.warning(
243+
f"max_seq_len is undefined. Defaulting to {max_seq_len_default} tokens."
244+
)
239245
max_seq_len = max_seq_len_default
240246

241247
cache_size_user = kwargs.get("cache_size")
242248
cache_size_default = 8192
243249

244250
if cache_size_user:
245-
logger.info(f'Using configured cache_size: {cache_size_user} tokens.')
251+
logger.info(f"Using configured cache_size: {cache_size_user} tokens.")
246252
cache_size = cache_size_user
247253
else:
248-
logger.warning(f"cache_size is undefined. Defaulting to {cache_size_default} tokens.")
254+
logger.warning(
255+
f"cache_size is undefined. Defaulting to {cache_size_default} tokens."
256+
)
249257
cache_size = cache_size_default
250258

251259
if max_seq_len < cache_size:

common/transformers_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,17 @@ def eos_tokens(self):
8383
return []
8484

8585
def get_max_position_embeddings(self, default: int | None = 4096) -> int:
86-
if self.text_config is not None and self.text_config.max_position_embeddings is not None:
86+
if (
87+
self.text_config is not None
88+
and self.text_config.max_position_embeddings is not None
89+
):
8790
return self.text_config.max_position_embeddings
8891
elif self.max_position_embeddings is not None:
8992
return self.max_position_embeddings
9093
else:
9194
return default
9295

96+
9397
class TokenizerConfig(BaseModel):
9498
"""
9599
An abridged version of HuggingFace's tokenizer config.

endpoints/OAI/utils/chat_completion.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ def _extract_think_content(text: str) -> tuple[Optional[str], Optional[str]]:
4343
return None, text
4444
elif model.container.reasoning_start_token in text:
4545
start_reasoning = text.split(model.container.reasoning_start_token)[1]
46-
reasoning_content = start_reasoning.split(model.container.reasoning_end_token)[0]
46+
reasoning_content = start_reasoning.split(model.container.reasoning_end_token)[
47+
0
48+
]
4749
content = start_reasoning.split(model.container.reasoning_end_token)[1]
4850
return reasoning_content.strip(), content.strip()
4951
else:

0 commit comments

Comments
 (0)