File tree Expand file tree Collapse file tree 1 file changed +4
-1
lines changed
src/datasets/packaged_modules/text Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change @@ -19,6 +19,7 @@ class TextConfig(datasets.BuilderConfig):
1919
2020 features : Optional [datasets .Features ] = None
2121 encoding : str = "utf-8"
22+ errors : str = "strict"
2223 chunksize : int = 10 << 20 # 10MB
2324 keep_linebreaks : bool = False
2425 sample_by : str = "line"
@@ -70,7 +71,9 @@ def _generate_tables(self, files):
7071 pa_table_names = list (self .config .features ) if self .config .features is not None else ["text" ]
7172 for file_idx , file in enumerate (itertools .chain .from_iterable (files )):
7273 # open in text mode, by default translates universal newlines ("\n", "\r\n" and "\r") into "\n"
73- with open (file , encoding = self .config .encoding ) as f :
74+ with open (file ,
75+ encoding = self .config .encoding ,
76+ errors = self .config .errors ) as f :
7477 if self .config .sample_by == "line" :
7578 batch_idx = 0
7679 while True :
You can’t perform that action at this time.
0 commit comments