Skip to content

Commit 108caa9

Browse files
ChengYehLiChengYehLi
authored andcommitted
add quoting=csv.QUOTE_NONE
1 parent c232e63 commit 108caa9

2 files changed

Lines changed: 6 additions & 2 deletions

File tree

libmultilabel/linear/preprocessor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from sklearn.feature_extraction.text import TfidfVectorizer
1313
from sklearn.preprocessing import MultiLabelBinarizer
1414

15+
import csv
16+
1517
__all__ = ['Preprocessor']
1618

1719

@@ -141,7 +143,7 @@ def _generate_label_mapping(self, labels, classes=None):
141143
def read_libmultilabel_format(path: str) -> 'dict[str,list[str]]':
142144
data = pd.read_csv(path, sep='\t', header=None,
143145
dtype=str,
144-
on_bad_lines='skip').fillna('')
146+
on_bad_lines='skip', quoting=csv.QUOTE_NONE).fillna('')
145147
if data.shape[1] == 2:
146148
data.columns = ['label', 'text']
147149
data = data.reset_index()

libmultilabel/nn/data_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from torchtext.vocab import build_vocab_from_iterator, pretrained_aliases
1414
from tqdm import tqdm
1515

16+
import csv
17+
1618
transformers.logging.set_verbosity_error()
1719
warnings.simplefilter(action='ignore', category=FutureWarning)
1820

@@ -136,7 +138,7 @@ def _load_raw_data(path, is_test=False, tokenize_text=True, remove_no_label_data
136138
"""
137139
logging.info(f'Load data from {path}.')
138140
data = pd.read_csv(path, sep='\t', header=None,
139-
error_bad_lines=False, warn_bad_lines=True).fillna('')
141+
error_bad_lines=False, warn_bad_lines=True, quoting=csv.QUOTE_NONE).fillna('')
140142
if data.shape[1] == 2:
141143
data.columns = ['label', 'text']
142144
data = data.reset_index()

0 commit comments

Comments
 (0)