update docs

lhoestq · lhoestq · commit 5ebad86723a9 · 2022-10-05T15:04:04.000+02:00
diff --git a/docs/source/use_with_pytorch.mdx b/docs/source/use_with_pytorch.mdx
@@ -86,20 +86,36 @@ To get a single tensor, you must explicitly use the [`Array`] feature type and s
 {'data': tensor([0, 0, 1])}
 ```
 
-However, since it's not possible to convert text data to PyTorch tensors, you can't format a `string` column to PyTorch.
-Instead, you can explicitly format certain columns and leave the other columns unformatted:
+String and binary objects are unchanged, since PyTorch only supports numbers.
+
+The [`Image`] and [`Audio`] feature types are also supported:
 
 ```py
->>> from datasets import Dataset, Features 
->>> text = ["foo", "bar"]
->>> data = [0, 1] 
->>> ds = Dataset.from_dict({"text": text, "data": data})  
->>> ds = ds.with_format("torch", columns=["data"], output_all_columns=True) 
->>> ds[:2]                                                                                                                                                     
-{'data': tensor([0, 1]), 'text': ['foo', 'bar']}
+>>> from datasets import Dataset, Features, Audio, Image
+>>> data = ["path/to/image.png"]
+>>> features = Features({"data": Image()})
+>>> ds = Dataset.from_dict({"data": data}, features=features) 
+>>> ds = ds.with_format("torch")  
+>>> ds[0]
+{'data': tensor([[[255, 215, 106, 255],
+         [255, 215, 106, 255],
+         ...,
+         [255, 255, 255, 255],
+         [255, 255, 255, 255]]], dtype=torch.uint8)}
 ```
 
-The [`Image`] and [`Audio`] feature types are not supported yet.
+```py
+>>> from datasets import Dataset, Features, Audio, Image
+>>> data = ["path/to/audio.wav"]
+>>> features = Features({"data": Audio()})
+>>> ds = Dataset.from_dict({"data": data}, features=features) 
+>>> ds = ds.with_format("torch")  
+>>> ds[0]["data"]["array"]
+tensor([ 6.1035e-05,  1.5259e-05,  1.6785e-04,  ..., -1.5259e-05,
+        -1.5259e-05,  1.5259e-05])
+>>> ds[0]["data"]["sampling_rate"]
+tensor(44100)
+```
 
 ## Data loading
 
diff --git a/docs/source/use_with_tensorflow.mdx b/docs/source/use_with_tensorflow.mdx
@@ -89,7 +89,7 @@ To get a single tensor, you must explicitly use the Array feature type and speci
 {'data': <tf.Tensor: shape=(3,), dtype=int64, numpy=array([0, 0, 1])>
 ```
 
-Strings are also supported:
+Strings and binary objects are also supported:
 
 ```py
 >>> from datasets import Dataset, Features 
@@ -111,7 +111,38 @@ You can also explicitly format certain columns and leave the other columns unfor
  'text': ['foo', 'bar']}
 ```
 
-The [`Image`] and [`Audio`] feature types are not supported yet.
+String and binary objects are unchanged, since PyTorch only supports numbers.
+
+The [`Image`] and [`Audio`] feature types are also supported:
+
+```py
+>>> from datasets import Dataset, Features, Audio, Image
+>>> data = ["path/to/image.png"]
+>>> features = Features({"data": Image()})
+>>> ds = Dataset.from_dict({"data": data}, features=features) 
+>>> ds = ds.with_format("tf")  
+>>> ds[0]
+{'data': <tf.Tensor: shape=(215, 1200, 4), dtype=uint8, numpy=
+ array([[[255, 215, 106, 255],
+         [255, 215, 106, 255],
+         ...,
+         [255, 255, 255, 255],
+         [255, 255, 255, 255]]], dtype=uint8)>}
+```
+
+```py
+>>> from datasets import Dataset, Features, Audio, Image
+>>> data = ["path/to/audio.wav"]
+>>> features = Features({"data": Audio()})
+>>> ds = Dataset.from_dict({"data": data}, features=features) 
+>>> ds = ds.with_format("tf")  
+>>> ds[0]["data"]["array"]
+<tf.Tensor: shape=(202311,), dtype=float32, numpy=
+array([ 6.1035156e-05,  1.5258789e-05,  1.6784668e-04, ...,
+       -1.5258789e-05, -1.5258789e-05,  1.5258789e-05], dtype=float32)>
+>>> ds[0]["data"]["sampling_rate"]
+<tf.Tensor: shape=(), dtype=int32, numpy=44100>
+```
 
 ## Data loading