From d32969d346d3f3e1584b720f3672f47c438faab1 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 10:34:42 -0600 Subject: [PATCH 1/3] initial commit for branch --- convert_hf_to_gguf.py | 6 +++++- convert_hf_to_gguf_update.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 464ecbaab9..c716402677 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1255,6 +1255,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "6c81ce329e0802883b22eabab0d3fa48357337ef1ecb45443828bf1f6254833f": # ref: https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B res = "exaone-moe" + if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267": + # ref: https://huggingface.co/zai-org/GLM-4.7-Flash + res = "glm-4.7-flash" if res is None: logger.warning("\n") @@ -7458,7 +7461,8 @@ def prepare_tensors(self): "DeepseekV3ForCausalLM", "KimiVLForConditionalGeneration", "YoutuForCausalLM", - "YoutuVLForConditionalGeneration" + "YoutuVLForConditionalGeneration", + "Glm4MoeLiteForCausalLM" ) class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index aa9843ea17..460198be69 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -148,6 +148,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "youtu", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", }, {"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", }, {"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", }, + {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", }, ] # some models are known to be broken upstream, so we will skip them as exceptions From 38e4882469a57eab0d022b1ae1de7bdb33eec4be Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 11:01:07 -0600 Subject: [PATCH 2/3] add glm-4.7-flash, move tokenizer hash --- convert_hf_to_gguf_update.py | 3 +-- src/llama-vocab.cpp | 4 ++++ src/llama-vocab.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 460198be69..1a8fbd5bb3 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -148,7 +148,6 @@ class TOKENIZER_TYPE(IntEnum): {"name": "youtu", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Youtu-LLM-2B", }, {"name": "solar-open", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/upstage/Solar-Open-100B", }, {"name": "exaone-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/K-EXAONE-236B-A23B", }, - {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", }, ] # some models are known to be broken upstream, so we will skip them as exceptions @@ -171,9 +170,9 @@ class TOKENIZER_TYPE(IntEnum): {"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"}, # jina-v2-de variants {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"}, + {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, ] - def download_file_with_auth(url, token, save_path): headers = {"Authorization": f"Bearer {token}"} if token else None response = sess.get(url, headers=headers) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index a23950d007..f332dbe7e5 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2041,6 +2041,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "solar-open") { pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN; clean_spaces = false; + } else if ( + tokenizer_pre == "glm-4.7-flash") { + pre_type = LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH; + clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 28c3a82b91..20f94102a0 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -54,6 +54,7 @@ enum llama_vocab_pre_type { LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43, LLAMA_VOCAB_PRE_TYPE_YOUTU = 44, LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE = 45, + LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH = 46, }; struct LLM_KV; From eb630d4a408cd585c65428da2ba93760c35091c5 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Mon, 19 Jan 2026 11:22:17 -0600 Subject: [PATCH 3/3] use `glm4` pretok --- convert_hf_to_gguf.py | 2 +- convert_hf_to_gguf_update.py | 2 +- src/llama-vocab.cpp | 4 ---- src/llama-vocab.h | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index c716402677..894839aa95 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1257,7 +1257,7 @@ def get_vocab_base_pre(self, tokenizer) -> str: res = "exaone-moe" if chkhsh == "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267": # ref: https://huggingface.co/zai-org/GLM-4.7-Flash - res = "glm-4.7-flash" + res = "glm4" if res is None: logger.warning("\n") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 1a8fbd5bb3..2d3883fb40 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -170,7 +170,7 @@ class TOKENIZER_TYPE(IntEnum): {"name": "grok-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/alvarobartt/grok-2-tokenizer", "chkhsh": "66b8d4e19ab16c3bfd89bce5d785fb7e0155e8648708a1f42077cb9fe002c273"}, # jina-v2-de variants {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/aari1995/German_Semantic_V3", "chkhsh": "b3d1dd861f1d4c5c0d2569ce36baf3f90fe8a102db3de50dd71ff860d91be3df"}, - {"name": "glm-4.7-flash", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, + {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/zai-org/GLM-4.7-Flash", "chkhsh": "cdf5f35325780597efd76153d4d1c16778f766173908894c04afc20108536267"}, ] def download_file_with_auth(url, token, save_path): diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index f332dbe7e5..a23950d007 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2041,10 +2041,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "solar-open") { pre_type = LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN; clean_spaces = false; - } else if ( - tokenizer_pre == "glm-4.7-flash") { - pre_type = LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH; - clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 20f94102a0..28c3a82b91 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -54,7 +54,6 @@ enum llama_vocab_pre_type { LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43, LLAMA_VOCAB_PRE_TYPE_YOUTU = 44, LLAMA_VOCAB_PRE_TYPE_EXAONE_MOE = 45, - LLAMA_VOCAB_PRE_TYPE_GLM_4_7_FLASH = 46, }; struct LLM_KV;