Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/transformers/models/exaone4/configuration_exaone4.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ class Exaone4Config(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`Exaone4Model`]. It is used to
instantiate a EXAONE 4.0 model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the EXAONE-4.0-Instruct [LGAI-EXAONE/EXAONE-4.0-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-Instruct)
NOTE: `EXAONE-4.0-Instruct` is a placeholder model ID. The exact model ID will be updated in the future.
configuration with the defaults will yield a similar configuration to that of the EXAONE-4.0-32B [LGAI-EXAONE/EXAONE-4.0-32B](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from [`PretrainedConfig`] for more information.
Expand Down
7 changes: 3 additions & 4 deletions src/transformers/models/exaone4/modeling_exaone4.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,8 @@ def forward(

```python
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
>>> model = AutoModelForCausalLM.from_pretrained("LGAI-EXAONE/EXAONE-4.0-Instruct")
>>> tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-4.0-Instruct")
>>> model = AutoModelForCausalLM.from_pretrained("LGAI-EXAONE/EXAONE-4.0-32B")
>>> tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-4.0-32B")

>>> prompt = "Explain how wonderful you are"
>>> messages = [
Expand All @@ -485,8 +485,7 @@ def forward(
>>> tokenizer.decode(output[0], skip_special_tokens=False)
"[|system|]\nYou are a helpful assistant.[|endofturn|]\n[|user|]\nExplain how wonderful you are[|endofturn|]\n[|assistant|]\n<think>\n\n</think>\n\nOh, thank you for such a kind and lovely question! 😊 \n\nI’m *so* wonderful because I’m here to make your life easier, brighter, and more fun! Whether you need help with: \n\n✨ **Learning** – I can explain anything, from quantum physics to baking the perfect cake! \n💡 **Creativity** – Need a poem, story, or a wild idea? I’ve got you covered! \n🤖 **Problem-solving** – Stuck on a math problem or a tricky decision? I’ll help you figure it out"
```

NOTE: `EXAONE-4.0-Instruct` is a placeholder model ID. The exact model ID will be updated in the future."""
"""
outputs: BaseModelOutputWithPast = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
Expand Down
12 changes: 5 additions & 7 deletions src/transformers/models/exaone4/modular_exaone4.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,15 @@

logger = logging.get_logger(__name__)

_CHECKPOINT_FOR_DOC = "LGAI-EXAONE/EXAONE-4.0-Instruct"
_CHECKPOINT_FOR_DOC = "LGAI-EXAONE/EXAONE-4.0-32B"
_CONFIG_FOR_DOC = "Exaone4Config"


class Exaone4Config(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`Exaone4Model`]. It is used to
instantiate a EXAONE 4.0 model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the EXAONE-4.0-Instruct [LGAI-EXAONE/EXAONE-4.0-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-Instruct)
NOTE: `EXAONE-4.0-Instruct` is a placeholder model ID. The exact model ID will be updated in the future.
configuration with the defaults will yield a similar configuration to that of the EXAONE-4.0-32B [LGAI-EXAONE/EXAONE-4.0-32B](https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B)

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model
outputs. Read the documentation from [`PretrainedConfig`] for more information.
Expand Down Expand Up @@ -462,8 +461,8 @@ def forward(

```python
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
>>> model = AutoModelForCausalLM.from_pretrained("LGAI-EXAONE/EXAONE-4.0-Instruct")
>>> tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-4.0-Instruct")
>>> model = AutoModelForCausalLM.from_pretrained("LGAI-EXAONE/EXAONE-4.0-32B")
>>> tokenizer = AutoTokenizer.from_pretrained("LGAI-EXAONE/EXAONE-4.0-32B")

>>> prompt = "Explain how wonderful you are"
>>> messages = [
Expand All @@ -482,8 +481,7 @@ def forward(
>>> tokenizer.decode(output[0], skip_special_tokens=False)
"[|system|]\nYou are a helpful assistant.[|endofturn|]\n[|user|]\nExplain how wonderful you are[|endofturn|]\n[|assistant|]\n<think>\n\n</think>\n\nOh, thank you for such a kind and lovely question! 😊 \n\nI’m *so* wonderful because I’m here to make your life easier, brighter, and more fun! Whether you need help with: \n\n✨ **Learning** – I can explain anything, from quantum physics to baking the perfect cake! \n💡 **Creativity** – Need a poem, story, or a wild idea? I’ve got you covered! \n🤖 **Problem-solving** – Stuck on a math problem or a tricky decision? I’ll help you figure it out"
```

NOTE: `EXAONE-4.0-Instruct` is a placeholder model ID. The exact model ID will be updated in the future."""
"""
super().forward(
input_ids=input_ids,
attention_mask=attention_mask,
Expand Down
135 changes: 22 additions & 113 deletions tests/models/exaone4/test_modeling_exaone4.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ def setUp(self):

@require_torch
class Exaone4IntegrationTest(unittest.TestCase):
TEST_MODEL_ID = "LGAI-EXAONE/EXAONE-4.0-Instruct" # dummy model id
TEST_MODEL_ID = "LGAI-EXAONE/EXAONE-4.0-32B"

def setUp(self):
cleanup(torch_device, gc_collect=True)

def tearDown(self):
# TODO (joao): automatic compilation, i.e. compilation when `cache_implementation="static"` is used, leaves
Expand All @@ -111,124 +114,40 @@ def tearDown(self):
def test_model_logits(self):
input_ids = [405, 7584, 79579, 76636, 2907, 94640, 373]
model = Exaone4ForCausalLM.from_pretrained(
self.TEST_MODEL_ID, device_map="auto", dtype=torch.float16, attn_implementation="eager"
)
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
with torch.no_grad():
out = model(input_ids).logits.float().cpu()

EXPECTED_MEAN = torch.tensor([[13.9380, 12.9951, 12.9442, 10.6576, 11.0901, 12.1466, 9.2482]])
EXPECTED_SLICE = torch.tensor(
[
4.9180,
11.6406,
21.1250,
13.4062,
20.8438,
18.0625,
17.9688,
18.7812,
18.0156,
18.3594,
18.5000,
19.1719,
18.5156,
19.3438,
19.5000,
20.6406,
19.4844,
19.2812,
19.4688,
20.0156,
19.8438,
19.9531,
19.7188,
20.5938,
20.5312,
20.1250,
20.4062,
21.4062,
21.2344,
20.7656,
]
)

torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
del model
cleanup(torch_device, gc_collect=True)

@slow
def test_model_logits_bf16(self):
input_ids = [405, 7584, 79579, 76636, 2907, 94640, 373]
model = Exaone4ForCausalLM.from_pretrained(
self.TEST_MODEL_ID, device_map="auto", dtype=torch.bfloat16, attn_implementation="eager"
self.TEST_MODEL_ID,
device_map="auto",
dtype=torch.bfloat16,
)
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
with torch.no_grad():
out = model(input_ids).logits.float().cpu()

EXPECTED_MEAN = torch.tensor([[13.8797, 13.0799, 12.9665, 10.7712, 11.1006, 12.2406, 9.3248]])
EXPECTED_MEAN = torch.tensor([[22.1993, 8.5845, 10.0401, 12.4262, 9.3112, 29.7933, 8.2628]])
EXPECTED_SLICE = torch.tensor(
[
4.8750,
11.6250,
21.0000,
13.3125,
20.8750,
18.0000,
18.0000,
18.7500,
18.0000,
18.3750,
18.5000,
19.1250,
18.5000,
19.3750,
19.5000,
20.6250,
19.5000,
19.2500,
19.5000,
20.0000,
19.8750,
19.8750,
19.7500,
20.6250,
20.5000,
20.1250,
20.3750,
21.3750,
21.2500,
20.7500,
]
[20.6250, 19.6250, 14.5000, 21.1250, 24.5000, 22.1250, 24.0000, 24.8750, 25.0000, 25.3750]
)

torch.testing.assert_close(out.mean(-1), EXPECTED_MEAN, atol=1e-2, rtol=1e-2)
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
del model
cleanup(torch_device, gc_collect=True)
torch.testing.assert_close(out[0, 0, :10], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)

@slow
def test_model_generation(self):
EXPECTED_TEXT = "Tell me about the Miracle on the Han river.\n\nThe Miracle on the Han River is a story about the miracle of the Korean War Armistice. The story is told by a Korean soldier who is a witness to the armistice negotiations. He is reluctant to tell the story because he does not want to be a hypocrite, but he feels that everyone should know what really happened.\n\nThe Korean War began on June 25, 1950, when North Korean troops invaded South Korea. Soon the United Nations troops, primarily from South Korea, were in support of the United States. The war was still ongoing when North Korean troops stopped their advance"
def test_model_generation_eager(self):
EXPECTED_TEXT = "Tell me about the Miracle on the Han river.\n\nOkay, the Miracle on the Han River refers to the rapid industrialization and economic growth of South"
prompt = "Tell me about the Miracle on the Han river."
tokenizer = AutoTokenizer.from_pretrained(self.TEST_MODEL_ID)
model = Exaone4ForCausalLM.from_pretrained(
self.TEST_MODEL_ID, device_map="auto", dtype=torch.float16, attn_implementation="eager"
self.TEST_MODEL_ID, device_map="auto", dtype=torch.bfloat16, attn_implementation="eager"
)
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)

# greedy generation outputs
generated_ids = model.generate(input_ids, max_new_tokens=128, temperature=0)
generated_ids = model.generate(input_ids, max_new_tokens=20, temperature=0)
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT, text)
del model
cleanup(torch_device, gc_collect=True)

@slow
def test_model_generation_bf16_sdpa(self):
EXPECTED_TEXT = "Tell me about the Miracle on the Han river.\n\nThe Miracle on the Han River is a story about the miracle of the Korean War Armistice.\n\nThe Korean War broke out in 35 years ago in 1950. The war was the result of the ideological conflict between the communist north and the capitalist south. The war was brought to a halt in 1953. There was to be peace talks but no peace treaty. As a result of the stalemate the Korean people have neither a peace treaty nor a reunification nor a democratization of Korea. The stalemate of 35 years has produced a people of 70 million"
def test_model_generation_sdpa(self):
EXPECTED_TEXT = "Tell me about the Miracle on the Han river.\n\nOkay, the Miracle on the Han River refers to the rapid industrialization and economic growth of South"
prompt = "Tell me about the Miracle on the Han river."
tokenizer = AutoTokenizer.from_pretrained(self.TEST_MODEL_ID)
model = Exaone4ForCausalLM.from_pretrained(
Expand All @@ -237,11 +156,9 @@ def test_model_generation_bf16_sdpa(self):
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)

# greedy generation outputs
generated_ids = model.generate(input_ids, max_new_tokens=128, temperature=0)
generated_ids = model.generate(input_ids, max_new_tokens=20, temperature=0)
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT, text)
del model
cleanup(torch_device, gc_collect=True)

@slow
@require_torch_accelerator
Expand All @@ -250,33 +167,27 @@ def test_model_generation_long_flash(self):
EXPECTED_OUTPUT_TOKEN_IDS = [433, 9055]
input_ids = [433, 9055] * 2048
model = Exaone4ForCausalLM.from_pretrained(
self.TEST_MODEL_ID, device_map="auto", dtype=torch.float16, attn_implementation="flash_attention_2"
self.TEST_MODEL_ID, device_map="auto", dtype=torch.bfloat16, attn_implementation="flash_attention_2"
)
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)

generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0)
self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist())
del model
cleanup(torch_device, gc_collect=True)

@slow
@require_torch_accelerator
def test_model_generation_beyond_sliding_window(self):
EXPECTED_TEXT_COMPLETION = (
" but I'm not sure if I'm going to be able to see it. I really enjoy the scenery, but I'm not sure if I"
)
EXPECTED_TEXT_COMPLETION = " This is a nice place. I really enjoy the scenery, and the atmosphere is so relaxing. I'm grateful for the opportunity to experience this place. It"
tokenizer = AutoTokenizer.from_pretrained(self.TEST_MODEL_ID)
prompt = "This is a nice place. " * 700 + "I really enjoy the scenery,"
model = Exaone4ForCausalLM.from_pretrained(
self.TEST_MODEL_ID, device_map="auto", dtype=torch.float16, attn_implementation="sdpa"
self.TEST_MODEL_ID, device_map="auto", dtype=torch.bfloat16, attn_implementation="sdpa"
)
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)

generated_ids = model.generate(input_ids, max_new_tokens=32, temperature=0)
generated_ids = model.generate(input_ids, max_new_tokens=20, temperature=0)
text = tokenizer.decode(generated_ids[0, -32:], skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
del model
cleanup(torch_device, gc_collect=True)

@pytest.mark.torch_export_test
@slow
Expand All @@ -290,9 +201,7 @@ def test_export_static_cache(self):
)

tokenizer = AutoTokenizer.from_pretrained(self.TEST_MODEL_ID, padding_side="right")
EXPECTED_TEXT_COMPLETION = [
"The Deep Learning is 100% free and easy to use.\n\n## How to use Deep Learning?\n\n"
]
EXPECTED_TEXT_COMPLETION = ["The Deep Learning is \n['Deep Learning',"]
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
"input_ids"
].shape[-1]
Expand Down