From 4324a5fe207542746586332e998036a2706c4d6b Mon Sep 17 00:00:00 2001
From: Mohammad Ali Sadraei <mohammadalisadraei@gmail.com>
Date: Tue, 13 Feb 2024 13:44:32 +0330
Subject: [PATCH] An auto converstion for torch if the dataset format is uint16
 or uint32

---
 src/datasets/formatting/torch_formatter.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/datasets/formatting/torch_formatter.py b/src/datasets/formatting/torch_formatter.py
index a4e899ac43b..cf287e67eb8 100644
--- a/src/datasets/formatting/torch_formatter.py
+++ b/src/datasets/formatting/torch_formatter.py
@@ -58,6 +58,12 @@ def _tensorize(self, value):
 
         if isinstance(value, (np.number, np.ndarray)) and np.issubdtype(value.dtype, np.integer):
             default_dtype = {"dtype": torch.int64}
+
+            # Convert dtype to np.int64 if it's either np.uint16 or np.uint32 to ensure compatibility.
+            # np.uint64 is excluded from this conversion as there is no compatible PyTorch dtype that can handle it without loss.
+            if value.dtype in [np.uint16, np.uint32]:
+                value = value.astype(np.int64)
+
         elif isinstance(value, (np.number, np.ndarray)) and np.issubdtype(value.dtype, np.floating):
             default_dtype = {"dtype": torch.float32}
         elif config.PIL_AVAILABLE and "PIL" in sys.modules: