We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 720ccc2 commit f2d7eaaCopy full SHA for f2d7eaa
1 file changed
optimum/habana/transformers/models/llama/modeling_llama.py
@@ -119,7 +119,7 @@ def __init__(
119
self.rope_type = "default"
120
self.max_seq_len_cached = config.max_position_embeddings
121
# Truncate the cached max sequence length to 8k to limit cached register buffer size
122
- if config.max_position_embeddings >= 8192:
+ if config.max_position_embeddings > 8192 and self.rope_type == "llama3":
123
self.max_seq_len_cached = 8192
124
self.original_max_seq_len = config.max_position_embeddings
125
0 commit comments