1818
1919"""
2020Convenience methods for calculating a number of similarity error
21- measures between a reference and hypothesis sentence.
22- These measures are
23- commonly used to measure the performance for an automatic speech recognition
24- (ASR) system.
21+ measures between one or more reference and hypothesis sentences.
22+ These measures are commonly used to measure the performance of
23+ an automatic speech recognition (ASR) system.
2524
2625The following measures are implemented:
2726
4241[jiwer.CharacterOutput][process.CharacterOutput]
4342classes.
4443"""
45- import warnings
46-
47- from typing import List , Union , Dict , Any
44+ from typing import List , Union
4845
4946from jiwer import transforms as tr
5047from jiwer .transformations import wer_default , cer_default
5653 "wil" ,
5754 "wip" ,
5855 "cer" ,
59- "compute_measures" ,
6056]
6157
6258########################################################################################
@@ -68,8 +64,6 @@ def wer(
6864 hypothesis : Union [str , List [str ]] = None ,
6965 reference_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
7066 hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
71- truth : Union [str , List [str ]] = None ,
72- truth_transform : Union [tr .Compose , tr .AbstractTransform ] = None ,
7367) -> float :
7468 """
7569 Calculate the word error rate (WER) between one or more reference and
@@ -80,37 +74,15 @@ def wer(
8074 hypothesis: The hypothesis sentence(s)
8175 reference_transform: The transformation(s) to apply to the reference string(s)
8276 hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)
83- truth: Deprecated, renamed to `reference`
84- truth_transform: Deprecated, renamed to `reference_transform`
85-
86- Deprecated:
87- Arguments `truth` and `truth_transform` have been renamed to respectively
88- `reference` and `reference_transform`. Therefore, the keyword arguments
89- `truth` and `truth_transform` will be removed in the next release.
90- At the same time, `reference` and `reference_transform` will lose their
91- default value.
9277
9378 Returns:
9479 (float): The word error rate of the given reference and
9580 hypothesis sentence(s).
9681 """
97- (
98- reference ,
99- hypothesis ,
100- reference_transform ,
101- hypothesis_transform ,
102- ) = _deprecate_truth (
103- reference = reference ,
104- hypothesis = hypothesis ,
105- truth = truth ,
106- reference_transform = reference_transform ,
107- truth_transform = truth_transform ,
108- hypothesis_transform = hypothesis_transform ,
109- )
110-
11182 output = process_words (
11283 reference , hypothesis , reference_transform , hypothesis_transform
11384 )
85+
11486 return output .wer
11587
11688
@@ -119,8 +91,6 @@ def mer(
11991 hypothesis : Union [str , List [str ]] = None ,
12092 reference_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
12193 hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
122- truth : Union [str , List [str ]] = None ,
123- truth_transform : Union [tr .Compose , tr .AbstractTransform ] = None ,
12494) -> float :
12595 """
12696 Calculate the match error rate (MER) between one or more reference and
@@ -131,34 +101,11 @@ def mer(
131101 hypothesis: The hypothesis sentence(s)
132102 reference_transform: The transformation(s) to apply to the reference string(s)
133103 hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)
134- truth: Deprecated, renamed to `reference`
135- truth_transform: Deprecated, renamed to `reference_transform`
136-
137- Deprecated:
138- Arguments `truth` and `truth_transform` have been renamed to respectively
139- `reference` and `reference_transform`. Therefore, the keyword arguments
140- `truth` and `truth_transform` will be removed in the next release.
141- At the same time, `reference` and `reference_transform` will lose their
142- default value.
143104
144105 Returns:
145106 (float): The match error rate of the given reference and
146107 hypothesis sentence(s).
147108 """
148- (
149- reference ,
150- hypothesis ,
151- reference_transform ,
152- hypothesis_transform ,
153- ) = _deprecate_truth (
154- reference = reference ,
155- hypothesis = hypothesis ,
156- truth = truth ,
157- reference_transform = reference_transform ,
158- truth_transform = truth_transform ,
159- hypothesis_transform = hypothesis_transform ,
160- )
161-
162109 output = process_words (
163110 reference , hypothesis , reference_transform , hypothesis_transform
164111 )
@@ -171,8 +118,6 @@ def wip(
171118 hypothesis : Union [str , List [str ]] = None ,
172119 reference_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
173120 hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
174- truth : Union [str , List [str ]] = None ,
175- truth_transform : Union [tr .Compose , tr .AbstractTransform ] = None ,
176121) -> float :
177122 """
178123 Calculate the word information preserved (WIP) between one or more reference and
@@ -183,34 +128,11 @@ def wip(
183128 hypothesis: The hypothesis sentence(s)
184129 reference_transform: The transformation(s) to apply to the reference string(s)
185130 hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)
186- truth: Deprecated, renamed to `reference`
187- truth_transform: Deprecated, renamed to `reference_transform`
188-
189- Deprecated:
190- Arguments `truth` and `truth_transform` have been renamed to respectively
191- `reference` and `reference_transform`. Therefore, the keyword arguments
192- `truth` and `truth_transform` will be removed in the next release.
193- At the same time, `reference` and `reference_transform` will lose their
194- default value.
195131
196132 Returns:
197133 (float): The word information preserved of the given reference and
198134 hypothesis sentence(s).
199135 """
200- (
201- reference ,
202- hypothesis ,
203- reference_transform ,
204- hypothesis_transform ,
205- ) = _deprecate_truth (
206- reference = reference ,
207- hypothesis = hypothesis ,
208- truth = truth ,
209- reference_transform = reference_transform ,
210- truth_transform = truth_transform ,
211- hypothesis_transform = hypothesis_transform ,
212- )
213-
214136 output = process_words (
215137 reference , hypothesis , reference_transform , hypothesis_transform
216138 )
@@ -223,8 +145,6 @@ def wil(
223145 hypothesis : Union [str , List [str ]] = None ,
224146 reference_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
225147 hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
226- truth : Union [str , List [str ]] = None ,
227- truth_transform : Union [tr .Compose , tr .AbstractTransform ] = None ,
228148) -> float :
229149 """
230150 Calculate the word information lost (WIL) between one or more reference and
@@ -235,96 +155,18 @@ def wil(
235155 hypothesis: The hypothesis sentence(s)
236156 reference_transform: The transformation(s) to apply to the reference string(s)
237157 hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)
238- truth: Deprecated, renamed to `reference`
239- truth_transform: Deprecated, renamed to `reference_transform`
240-
241- Deprecated:
242- Arguments `truth` and `truth_transform` have been renamed to respectively
243- `reference` and `reference_transform`. Therefore, the keyword arguments
244- `truth` and `truth_transform` will be removed in the next release.
245- At the same time, `reference` and `reference_transform` will lose their
246- default value.
247158
248159 Returns:
249160 (float): The word information lost of the given reference and
250161 hypothesis sentence(s).
251162 """
252- (
253- reference ,
254- hypothesis ,
255- reference_transform ,
256- hypothesis_transform ,
257- ) = _deprecate_truth (
258- reference = reference ,
259- hypothesis = hypothesis ,
260- truth = truth ,
261- reference_transform = reference_transform ,
262- truth_transform = truth_transform ,
263- hypothesis_transform = hypothesis_transform ,
264- )
265-
266163 output = process_words (
267164 reference , hypothesis , reference_transform , hypothesis_transform
268165 )
269166
270167 return output .wil
271168
272169
273- ########################################################################################
274- # deprecated method 'compute_measures'
275-
276-
277- def compute_measures (
278- truth : Union [str , List [str ]],
279- hypothesis : Union [str , List [str ]],
280- truth_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
281- hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ] = wer_default ,
282- ) -> Dict [str , Any ]:
283- """
284- Efficiently computes all measures using only one function call.
285-
286- Deprecated:
287- Deprecated method. Superseded by [jiwer.process_words][process.process_words].
288- This method will be removed on next release.
289-
290- Args:
291- truth: The reference sentence(s)
292- hypothesis: The hypothesis sentence(s)
293- truth_transform: The transformation(s) to apply to the reference string(s)
294- hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)
295-
296- Returns:
297- (dict): A dictionary containing key-value pairs for all measures.
298-
299- """
300- warnings .warn (
301- DeprecationWarning (
302- "jiwer.compute_measures() is deprecated. Please use jiwer.process_words()."
303- )
304- )
305-
306- output = process_words (
307- reference = truth ,
308- hypothesis = hypothesis ,
309- reference_transform = truth_transform ,
310- hypothesis_transform = hypothesis_transform ,
311- )
312-
313- return {
314- "wer" : output .wer ,
315- "mer" : output .mer ,
316- "wil" : output .wil ,
317- "wip" : output .wip ,
318- "hits" : output .hits ,
319- "substitutions" : output .substitutions ,
320- "deletions" : output .deletions ,
321- "insertions" : output .insertions ,
322- "ops" : output .alignments ,
323- "truth" : output .references ,
324- "hypothesis" : output .hypotheses ,
325- }
326-
327-
328170########################################################################################
329171# Implementation of character-error-rate, exposed publicly
330172
@@ -334,10 +176,7 @@ def cer(
334176 hypothesis : Union [str , List [str ]] = None ,
335177 reference_transform : Union [tr .Compose , tr .AbstractTransform ] = cer_default ,
336178 hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ] = cer_default ,
337- return_dict : bool = False ,
338- truth : Union [str , List [str ]] = None ,
339- truth_transform : Union [tr .Compose , tr .AbstractTransform ] = None ,
340- ) -> Union [float , Dict [str , Any ]]:
179+ ) -> float :
341180 """
342181 Calculate the character error rate (CER) between one or more reference and
343182 hypothesis sentences.
@@ -347,91 +186,13 @@ def cer(
347186 hypothesis: The hypothesis sentence(s)
348187 reference_transform: The transformation(s) to apply to the reference string(s)
349188 hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)
350- return_dict: Deprecated option to return the more results in a dict instead of
351- returning only the cer as a single float value
352- truth: Deprecated, renamed to `reference`
353- truth_transform: Deprecated, renamed to `reference_transform`
354-
355- Deprecated:
356- Argument `return_dict` will be deprecated. Please use
357- [jiwer.process_characters][process.process_characters] instead.
358-
359- Arguments `truth` and `truth_transform` have been renamed to respectively
360- `reference` and `reference_transform`. Therefore, the keyword arguments
361- `truth` and `truth_transform` will be removed in the next release.
362- At the same time, `reference` and `reference_transform` will lose their
363- default value.
364189
365190 Returns:
366191 (float): The character error rate of the given reference and hypothesis
367192 sentence(s).
368193 """
369- (
370- reference ,
371- hypothesis ,
372- reference_transform ,
373- hypothesis_transform ,
374- ) = _deprecate_truth (
375- reference = reference ,
376- hypothesis = hypothesis ,
377- truth = truth ,
378- reference_transform = reference_transform ,
379- truth_transform = truth_transform ,
380- hypothesis_transform = hypothesis_transform ,
381- )
382-
383194 output = process_characters (
384195 reference , hypothesis , reference_transform , hypothesis_transform
385196 )
386197
387- if return_dict :
388- warnings .warn (
389- DeprecationWarning (
390- "`return_dict` is deprecated, "
391- "please use jiwer.process_characters() instead."
392- )
393- )
394- return {
395- "cer" : output .cer ,
396- "hits" : output .hits ,
397- "substitutions" : output .substitutions ,
398- "deletions" : output .deletions ,
399- "insertions" : output .insertions ,
400- }
401- else :
402- return output .cer
403-
404-
405- def _deprecate_truth (
406- reference : Union [str , List [str ]],
407- hypothesis : Union [str , List [str ]],
408- truth : Union [str , List [str ]],
409- reference_transform : Union [tr .Compose , tr .AbstractTransform ],
410- hypothesis_transform : Union [tr .Compose , tr .AbstractTransform ],
411- truth_transform : Union [tr .Compose , tr .AbstractTransform ],
412- ):
413- if truth is not None :
414- warnings .warn (
415- DeprecationWarning (
416- "keyword argument `truth` is deprecated, please use `reference`."
417- )
418- )
419- if reference is not None :
420- raise ValueError ("cannot give `reference` and `truth`" )
421- reference = truth
422- if truth_transform is not None :
423- warnings .warn (
424- DeprecationWarning (
425- "keyword argument `truth_transform` is deprecated, "
426- "please use `reference_transform`."
427- )
428- )
429- reference_transform = truth_transform
430-
431- if reference is None or hypothesis is None :
432- raise ValueError (
433- "detected default values for reference or hypothesis arguments, "
434- "please provide actual string or list of strings"
435- )
436-
437- return reference , hypothesis , reference_transform , hypothesis_transform
198+ return output .cer
0 commit comments