Allow converting Falcon models one part at a time

KerfuffleV2 · KerfuffleV2 · commit bc4dadb817d7 · 2023-06-14T03:07:50.000-06:00
diff --git a/examples/falcon/convert-hf-to-ggml.py b/examples/falcon/convert-hf-to-ggml.py
@@ -2,7 +2,7 @@
 #
 # Usage:
 #
-#   python3 models/convert-h5-to-ggml.py 
+#   python3 models/convert-h5-to-ggml.py
 #
 # This script is similar to "convert-pt-to-ggml.py"
 #
@@ -40,15 +40,17 @@ def bytes_to_unicode():
     cs = [chr(n) for n in cs]
     return dict(zip(bs, cs))
 
-if len(sys.argv) < 3:
-    print("Usage: python convert-hf-to-ggml.py model_name dir-output [use-f32]")
+if len(sys.argv) < 4:
+    print("Usage: python convert-hf-to-ggml.py num_parts model_name dir-output [use-f32]")
+    print("  num_parts: number of pytorch parts, use 0 if not a multipart model. example: 9")
     print("  model_name: name of the model to convert. Example: 'bigscience/bloomz-560m'")
     print("  dir-output: directory where the output file will be written")
     print("  use-f32:    if present, use float32 instead of float16")
     sys.exit(1)
 
-model_name = sys.argv[1]
-dir_out = sys.argv[2]
+num_parts = int(sys.argv[1])
+model_name = sys.argv[2]
+dir_out = sys.argv[3]
 
 # make sure the output directory exists
 os.makedirs(dir_out, exist_ok=True)
@@ -60,16 +62,13 @@ def bytes_to_unicode():
 # map from ftype to string
 ftype_str = ["f32", "f16"]
 ftype = 1
-if len(sys.argv) > 3:
+if len(sys.argv) > 4:
     ftype = 0
 
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
 hparams = config.to_dict()
-print("Loading model: ", model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, config=config, torch_dtype=torch.float16 if ftype == 1 else torch.float32, low_cpu_mem_usage=True, trust_remote_code=True)
-print("Model loaded: ", model_name)
-
+print("* Loading model from: ", model_name)
 
 fname_out = dir_out + f"/ggml-model-{model_name.split('/')[-1]}-{ftype_str[ftype]}.bin"
 fout = open(fname_out, "wb")
@@ -90,32 +89,31 @@ def bytes_to_unicode():
     text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
     fout.write(struct.pack("i", len(text)))
     fout.write(text)
-    
-list_vars = model.state_dict()
-for name in list_vars.keys():
-    src = name
-    data = list_vars[src].squeeze().numpy()
-    data = data.astype(np.float32)
-
-    n_dims = len(data.shape)
-    print(name, n_dims, data.shape)
-
-    # default type is fp32
-    ftype_cur = 0
-    if ftype == 1 and n_dims > 1:
-        print("  Converting to float16")
-        data = data.astype(np.float16)
-        ftype_cur = 1
-
-    # header
-    str = name.encode('utf-8')
-    fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
-    for i in range(n_dims):
-        fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
-    fout.write(str)
-
-    # data
-    data.tofile(fout)
+if num_parts == 0:
+    partnames= ('pytorch_model.bin',)
+else:
+    partnames = (f'pytorch_model-{n:05}-of-{num_parts:05}.bin' for n in range(1, num_parts + 1))
+for partname in partnames:
+    filename = f'{model_name}/{partname}'
+    print(f'\n* Loading part: {partname}')
+    model = torch.load(filename, map_location = 'cpu')
+    for name in model.keys():
+        src = name
+        data = model[src].squeeze()
+        n_dims = len(data.shape)
+        # default type is fp32
+        ftype_cur = 1 if ftype == 1 and n_dims > 1 else 0
+        data = data.to(dtype = torch.float16 if ftype_cur == 1 else torch.float32).numpy()
+        print(f'  |', name, data.shape, '->', data.dtype)
+        # header
+        str = name.encode('utf-8')
+        fout.write(struct.pack("iii", n_dims, len(str), ftype_cur))
+        for i in range(n_dims):
+            fout.write(struct.pack("i", data.shape[n_dims - 1 - i]))
+        fout.write(str)
+
+        # data
+        data.tofile(fout)
 
 fout.close()