Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datasets/xtreme/dataset_infos.json

Large diffs are not rendered by default.

Binary file added datasets/xtreme/dummy/PAN-X.ace/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.af/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.als/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.am/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.an/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ang/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ar/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.arc/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.arz/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.as/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ast/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ay/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.az/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ba/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.bar/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.be/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.bg/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.bh/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.bn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.bo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.br/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.bs/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ca/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.cdo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ce/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ceb/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ckb/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.co/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.crh/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.cs/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.cv/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.cy/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.da/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.de/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.diq/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.dv/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.eml/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.en/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.eo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.es/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.et/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.eu/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ext/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.fa/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.fi/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.fo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.fr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.frr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.fur/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.fy/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ga/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.gan/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.gd/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.gl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.gn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.gu/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.hak/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.he/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.hi/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.hr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.hsb/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.hu/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.hy/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ia/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.id/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ig/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ilo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.io/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.is/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.it/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.jbo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.jv/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ka/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.kk/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.km/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.kn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ko/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ksh/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ku/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ky/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.la/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.lb/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.li/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.lij/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.lmo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ln/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.lt/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.lv/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mg/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mhr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mi/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.min/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mk/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ml/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ms/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mt/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mwl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.mzn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.nap/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.nds/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ne/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.nl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.nn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.no/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.nov/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.oc/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.or/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.os/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.pa/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.pdc/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.pl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.pms/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.pnb/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ps/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.pt/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.qu/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.rm/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ro/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ru/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.rw/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sa/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sah/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.scn/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sco/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sh/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.si/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sk/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.so/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sq/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.su/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sv/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.sw/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.szl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ta/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.te/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.tk/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.tl/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.tr/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.tt/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ug/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.uk/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.ur/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.uz/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.vec/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.vep/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.vi/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.vls/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.vo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.wa/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.war/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.wuu/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.xmf/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.yi/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.yo/1.1.0/dummy_data.zip
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.zea/1.1.0/dummy_data.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added datasets/xtreme/dummy/PAN-X.zh/1.1.0/dummy_data.zip
Binary file not shown.
39 changes: 9 additions & 30 deletions datasets/xtreme/xtreme.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@
"yo",
"zh",
]
_PAN_X_FOLDER = "AmazonPhotos.zip"

_NAMES = ["XNLI", "tydiqa", "SQuAD"]
for lang in _PAN_X_LANG:
_NAMES.append("PAN-X.{}".format(lang))
Expand Down Expand Up @@ -373,7 +373,7 @@
"tatoeba": "https://github.com/facebookresearch/LASER/raw/master/data/tatoeba/v1",
"udpos": "https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11234/1-3105/ud-treebanks-v2.5.tgz",
"SQuAD": "https://rajpurkar.github.io/SQuAD-explorer/dataset/",
"PAN-X": "",
"PAN-X": "https://www.dropbox.com/s/12h3qqog6q4bjve/panx_dataset.tar?dl=1",
}

_URLS = {
Expand All @@ -386,7 +386,7 @@
"tatoeba": "https://github.com/facebookresearch/LASER/blob/master/data/tatoeba/v1/README.md",
"udpos": "https://universaldependencies.org/",
"SQuAD": "https://rajpurkar.github.io/SQuAD-explorer/",
"PAN-X": "",
"PAN-X": "https://github.com/afshinrahimi/mmner",
}


Expand Down Expand Up @@ -427,16 +427,6 @@ class Xtreme(datasets.GeneratorBasedBuilder):
for name in _NAMES
]

@property
def manual_download_instructions(self):
if self.config.name.startswith("PAN-X"):
return """\
You need to manually download the AmazonPhotos.zip file on Amazon Cloud Drive
(https://www.amazon.com/clouddrive/share/d3KGCRCIYwhKJF0H3eWA26hjg2ZCRhjpEQtDL70FSBN). The folder containing the saved file
can be used to load the dataset via `datasets.load_dataset("xtreme", data_dir="<path/to/folder>").
"""
return None

def _info(self):
# TODO(xtreme): Specifies the datasets.DatasetInfo object
features = {text_feature: datasets.Value("string") for text_feature in self.config.text_features.keys()}
Expand Down Expand Up @@ -704,6 +694,7 @@ def _split_generators(self, dl_manager):
]
else:
return [
# We exclude Arabic-NYUAD which does not contains any words, only _
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
# These kwargs will be passed to _generate_examples
Expand All @@ -714,7 +705,6 @@ def _split_generators(self, dl_manager):
for file in sorted(os.listdir(folder))
if "NYUAD" not in folder and "dev" in file and file.endswith(".conllu")
]
# we exclude Arabic NYUAD which deos not contains any word, only _
},
),
datasets.SplitGenerator(
Expand Down Expand Up @@ -759,26 +749,15 @@ def _split_generators(self, dl_manager):
]

if self.config.name.startswith("PAN-X"):
path_to_manual_folder = os.path.abspath(os.path.expanduser(dl_manager.manual_dir))
panx_path = os.path.join(path_to_manual_folder, _PAN_X_FOLDER)
if not os.path.exists(panx_path):
raise FileNotFoundError(
"{} does not exist. Make sure you insert a manual dir via `datasets.load_dataset('xtreme', data_dir=...)` that includes {}. Manual download instructions: {}".format(
panx_path, _PAN_X_FOLDER, self.manual_download_instructions
)
)

panx_dl_dir = dl_manager.extract(panx_path)
panx_dl_dir = dl_manager.download_and_extract(self.config.data_url)
lang = self.config.name.split(".")[1]
lang_folder = dl_manager.extract(os.path.join(panx_dl_dir, "panx_dataset", lang + ".tar.gz"))
lang_folder = dl_manager.extract(os.path.join(panx_dl_dir, lang + ".tar.gz"))

return [
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
# These kwargs will be passed to _generate_examples
gen_kwargs={
"filepath": os.path.join(lang_folder, "dev")
# we exclude Arabic NYUAD which deos not contains any word, only _
},
gen_kwargs={"filepath": os.path.join(lang_folder, "dev")},
),
datasets.SplitGenerator(
name=datasets.Split.TEST,
Expand Down Expand Up @@ -922,7 +901,7 @@ def _generate_examples(self, filepath):
with open(file, encoding="utf-8") as f:
data = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
for id_row, row in enumerate(data):
if len(row) >= 10 and row[1] != "_":
if len(row) >= 10 and row[1] != "_" and row[3] != "_":
yield str(id_file) + "_" + str(id_row), {"token": row[1], "pos_tag": row[3]}
if self.config.name.startswith("PAN-X"):
guid_index = 1
Expand Down