Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions convert_imatrix_gguf_to_dat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
from __future__ import annotations

import os
import sys
import logging
import argparse

from pathlib import Path
from dataclasses import dataclass

import numpy as np
import numpy.typing as npt

if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf


logger = logging.getLogger("gguf-to-imatrix")


def _key_names(attr: str, fallback: str) -> set[str]:
"""Get possible GGUF key names, tolerating missing attributes."""
names = {fallback}
try:
names.add(getattr(gguf.Keys.IMatrix, attr))
except AttributeError:
pass
return names


CHUNK_COUNT_KEYS = _key_names('CHUNK_COUNT', 'imatrix.chunk_count')
CHUNK_SIZE_KEYS = _key_names('CHUNK_SIZE', 'imatrix.chunk_size')
DATASET_KEYS = _key_names('DATASETS', 'imatrix.datasets')


@dataclass
class IMatrixEntry:
values: npt.NDArray[np.float32]
counts: npt.NDArray[np.float32]


class IMatrixDatWriter:
"""Writes the old binary imatrix .dat format."""

def __init__(self, outfile: Path):
self.outfile = outfile
self.chunk_size: int = 512
self.chunk_count: int = 0
self.dataset: str = ""
self.entries: dict[str, IMatrixEntry] = {}

def write(self) -> None:
if self.chunk_size == 0:
raise ValueError("chunk_size is 0, cannot write imatrix")

with open(self.outfile, "wb") as f:
np.array([len(self.entries)], dtype=np.int32).tofile(f)

for name, entry in self.entries.items():
name_bytes = name.encode("utf-8")
np.array([len(name_bytes)], dtype=np.int32).tofile(f)
f.write(name_bytes)

ncall = int(entry.counts[0] / self.chunk_size)
np.array([ncall], dtype=np.int32).tofile(f)
np.array([len(entry.values)], dtype=np.int32).tofile(f)

(entry.values / np.float32(self.chunk_size)).astype(np.float32).tofile(f)

logger.debug(" %s: ncall=%d, nval=%d", name, ncall, len(entry.values))

np.array([self.chunk_count], dtype=np.int32).tofile(f)

dataset_bytes = self.dataset.encode("utf-8")
np.array([len(dataset_bytes)], dtype=np.int32).tofile(f)
if dataset_bytes:
f.write(dataset_bytes)


class GGUFIMatrixReader:
"""Reads imatrix data from a GGUF file."""

SUMS_SUFFIXES = (".sums", ".in_sum2")
COUNTS_SUFFIX = ".counts"

def __init__(self, gguf_path: Path):
reader = gguf.GGUFReader(gguf_path)

self.chunk_count: int = 0
self.chunk_size: int = 512
self.dataset: str = ""
self.entries: dict[str, IMatrixEntry] = {}

# --- Read KV metadata ---
for field in reader.fields.values():
key = field.name
if key in CHUNK_COUNT_KEYS:
val = int(field.parts[field.data[0]][0])
self.chunk_count = val
elif key in CHUNK_SIZE_KEYS:
val = int(field.parts[field.data[0]][0])
self.chunk_size = val
elif key in DATASET_KEYS:
val = bytes(field.parts[field.data[0]]).decode("utf-8")
self.dataset = val

# --- Read all tensors (copy + ensure float32) ---
tensor_map: dict[str, npt.NDArray[np.float32]] = {}
for tensor in reader.tensors:
tensor_map[tensor.name] = np.array(tensor.data, dtype=np.float32)
logger.debug(" Tensor: %s shape=%s", tensor.name, tensor_map[tensor.name].shape)

# --- Match sums/counts pairs ---
sums_tensors: dict[str, npt.NDArray[np.float32]] = {}
counts_tensors: dict[str, npt.NDArray[np.float32]] = {}

for tname, tdata in tensor_map.items():
matched_sum = False
for suffix in self.SUMS_SUFFIXES:
if tname.endswith(suffix):
sums_tensors[tname[:-len(suffix)]] = tdata
matched_sum = True
break
if not matched_sum and tname.endswith(self.COUNTS_SUFFIX):
counts_tensors[tname[:-len(self.COUNTS_SUFFIX)]] = tdata

for name, sums in sums_tensors.items():
counts = counts_tensors.get(name)
if counts is None:
logger.warning("No counts tensor for %r, assuming 0", name)
counts = np.array([0.0], dtype=np.float32)
self.entries[name] = IMatrixEntry(values=sums, counts=counts)

logger.info("Loaded %d imatrix entries from GGUF", len(self.entries))

# --- Diagnostic output if nothing matched ---
if not self.entries:
logger.error("No imatrix tensor pairs found!")
logger.error(
"Expected pairs like '<name>%s' + '<name>%s'",
self.SUMS_SUFFIXES[0], self.COUNTS_SUFFIX
)
if tensor_map:
logger.error("Tensors actually present in the file (%d):", len(tensor_map))
for n in sorted(tensor_map):
logger.error(" %s", n)
else:
logger.error("The file contains no tensors at all.")
logger.error(
"This file may not be a GGUF imatrix, or it may use a "
"naming convention this script doesn't recognize yet."
)

def to_writer(self, outfile: Path) -> IMatrixDatWriter:
writer = IMatrixDatWriter(outfile)
writer.chunk_count = self.chunk_count
writer.chunk_size = self.chunk_size
writer.dataset = self.dataset
writer.entries = self.entries
return writer


def parse_args():
parser = argparse.ArgumentParser(
description="Convert a GGUF imatrix file to the old imatrix.dat format")
parser.add_argument(
"--outfile", type=Path,
help="path to write to; default: based on input.",
)
parser.add_argument(
"--verbose", action="store_true",
help="increase output verbosity",
)
parser.add_argument(
"imatrix", type=Path,
help="path to a GGUF imatrix file",
)
return parser.parse_args()


if __name__ == "__main__":
args = parse_args()
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)

if args.outfile is None:
input_file: Path = args.imatrix
if input_file.suffix == ".gguf":
args.outfile = input_file.with_suffix(".dat")
else:
args.outfile = Path(str(input_file) + ".dat")

if args.outfile.exists():
logger.error(
"Default output already exists, use --outfile to overwrite: %s",
args.outfile
)
sys.exit(1)

reader = GGUFIMatrixReader(args.imatrix)

if not reader.entries:
logger.error("Nothing to write (no entries). Re-run with --verbose for details.")
sys.exit(1)

writer = reader.to_writer(args.outfile)
writer.write()

logger.info("Wrote %d entries to %s", len(writer.entries), args.outfile)
6 changes: 6 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@ class Adapter:
TYPE = "adapter.type"
LORA_ALPHA = "adapter.lora.alpha"

class IMatrix:
CHUNK_COUNT = "imatrix.chunk_count"
CHUNK_SIZE = "imatrix.chunk_size"
DATASETS = "imatrix.datasets"

#
# recommended mapping of model tensor names for storage in gguf
#
Expand All @@ -194,6 +199,7 @@ class Adapter:
class GGUFType:
MODEL = "model"
ADAPTER = "adapter"
IMATRIX = "imatrix"


class MODEL_ARCH(IntEnum):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
-r ./requirements/requirements-convert_llama_ggml_to_gguf.txt
-r ./requirements/requirements-convert_lora_to_gguf.txt
-r ./requirements/requirements-tool_bench.txt
-r ./requirements/requirements-convert_imatrix_gguf_to_dat.txt
1 change: 1 addition & 0 deletions requirements/requirements-convert_imatrix_gguf_to_dat.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
numpy~=1.26.4