Skip to content

Commit acad411

Browse files
authored
Merge pull request #2049 from PierreMesure/no_ascii
Add force_ascii=False when generating JSONL
2 parents 750db5c + 25b4e61 commit acad411

File tree

3 files changed

+57
-6
lines changed

3 files changed

+57
-6
lines changed

giskard/rag/report.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,18 +156,24 @@ def save(self, folder_path: str):
156156

157157
report_details = {"recommendation": self._recommendation}
158158
with open(path / "report_details.json", "w", encoding="utf-8") as f:
159-
json.dump(report_details, f)
159+
json.dump(report_details, f, ensure_ascii=False)
160160

161-
self._knowledge_base._knowledge_base_df.to_json(path / "knowledge_base.jsonl", orient="records", lines=True)
161+
self._knowledge_base._knowledge_base_df.to_json(
162+
path / "knowledge_base.jsonl", orient="records", lines=True, force_ascii=False
163+
)
162164
with open(path / "knowledge_base_meta.json", "w", encoding="utf-8") as f:
163-
json.dump(self._knowledge_base.get_savable_data(), f)
165+
json.dump(self._knowledge_base.get_savable_data(), f, ensure_ascii=False)
164166

165167
with open(path / "agent_answer.json", "w", encoding="utf-8") as f:
166-
json.dump([{"message": output.message, "documents": output.documents} for output in self._model_outputs], f)
168+
json.dump(
169+
[{"message": output.message, "documents": output.documents} for output in self._model_outputs],
170+
f,
171+
ensure_ascii=False,
172+
)
167173

168174
if self._metrics_results is not None:
169175
with open(path / "metrics_results.json", "w", encoding="utf-8") as f:
170-
json.dump(self._metrics_results, f)
176+
json.dump(self._metrics_results, f, ensure_ascii=False)
171177

172178
@classmethod
173179
def load(

giskard/rag/testset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def save(self, path):
9696
path : str
9797
The path to the output JSONL file.
9898
"""
99-
self._dataframe.reset_index().to_json(path, orient="records", lines=True)
99+
self._dataframe.reset_index().to_json(path, orient="records", lines=True, force_ascii=False)
100100

101101
@classmethod
102102
def load(cls, path):

tests/rag/test_qa_testset.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,37 @@ def make_testset_samples():
8787
]
8888

8989

90+
def make_swedish_testset_samples():
91+
return [
92+
QuestionSample(
93+
id="1",
94+
question="Vilken mjölk används för att göra Camembert?",
95+
reference_answer="Komjölk används för att göra Camembert.",
96+
reference_context="Camembert är en fuktig, mjuk, krämig, ytmognad ost av komjölk.",
97+
conversation_history=[],
98+
metadata={
99+
"question_type": "enkel",
100+
"color": "blå",
101+
"topic": "Ost_1",
102+
"seed_document_id": "1",
103+
},
104+
),
105+
QuestionSample(
106+
id="2",
107+
question="Varifrån kommer Scamorza?",
108+
reference_answer="Scamorza kommer från södra Italien.",
109+
reference_context="Scamorza är en ost av komjölk från södra Italien.",
110+
conversation_history=[],
111+
metadata={
112+
"question_type": "enkel",
113+
"color": "röd",
114+
"topic": "Ost_1",
115+
"seed_document_id": "2",
116+
},
117+
),
118+
]
119+
120+
90121
def test_qa_testset_creation():
91122
question_samples = make_testset_samples()
92123
testset = QATestset(question_samples)
@@ -146,6 +177,20 @@ def test_qa_testset_saving_loading(tmp_path):
146177
)
147178

148179

180+
def test_qa_testset_saving_loading_swedish(tmp_path):
181+
testset = QATestset(make_swedish_testset_samples())
182+
path = tmp_path / "testset.jsonl"
183+
testset.save(path)
184+
loaded_testset = QATestset.load(path)
185+
186+
assert all(
187+
[
188+
original == loaded
189+
for original, loaded in zip(testset._dataframe["metadata"], loaded_testset._dataframe["metadata"])
190+
]
191+
)
192+
193+
149194
def test_metadata_value_retrieval():
150195
testset = QATestset(make_testset_samples())
151196

0 commit comments

Comments
 (0)