@@ -84,7 +84,7 @@ cpdef cnp.ndarray calculations(object reference, object hypothesis):
8484 ldm[i, j] = best
8585
8686 ld = ldm[m, n]
87- wer = (< double > ld) / m
87+ wer = (< double > ld) / m if m > 0 else 0.0
8888
8989 insertions, deletions, substitutions = 0 , 0 , 0
9090 inserted_words, deleted_words, substituted_words = [], [], []
@@ -125,7 +125,7 @@ cdef cnp.ndarray _metrics_batch(list references, list hypotheses):
125125 [wer, ld, m, insertions, deletions, substitutions, inserted_words, deleted_words, substituted_words]
126126 """
127127 cdef Py_ssize_t n = len (references)
128- cdef Py_ssize_t idx, j
128+ cdef Py_ssize_t idx
129129
130130 # Rows output, dtype=object because cols 6-8 are lists
131131 cdef cnp.ndarray out = np.empty((n, 9 ), dtype = object )
@@ -138,8 +138,7 @@ cdef cnp.ndarray _metrics_batch(list references, list hypotheses):
138138 if isinstance (r, np.ndarray) and r.ndim == 0 :
139139 r = r.item()
140140
141- for j in range (9 ):
142- out[idx, j] = r[j]
141+ out[idx, :] = r
143142
144143 return out
145144
@@ -155,3 +154,117 @@ cpdef object metrics(object reference, object hypothesis):
155154 if isinstance (reference, (list , np.ndarray)) and isinstance (hypothesis, (list , np.ndarray)):
156155 return _metrics_batch(list (reference), list (hypothesis))
157156 return calculations(reference, hypothesis)
157+
158+
159+ @ cython.boundscheck (False )
160+ @ cython.wraparound (False )
161+ cpdef cnp.ndarray calculations_fast(object reference, object hypothesis):
162+ """
163+ Fast path for WER/LD calculations without word tracking.
164+ Returns only numeric metrics (WER, LD, m, insertions, deletions, substitutions).
165+
166+ This function is optimized for use cases that only need counts and metrics,
167+ not the actual lists of inserted/deleted/substituted words.
168+
169+ Returns (6,) float64 array: [wer, ld, m, insertions, deletions, substitutions]
170+ """
171+ cdef list reference_word = reference.split()
172+ cdef list hypothesis_word = hypothesis.split()
173+
174+ cdef Py_ssize_t m = len (reference_word)
175+ cdef Py_ssize_t n = len (hypothesis_word)
176+ cdef Py_ssize_t i, j
177+
178+ cdef int ld, insertions, deletions, substitutions
179+ cdef double wer
180+
181+ cdef int cost, del_cost, ins_cost, sub_cost, best
182+
183+ # Allocate the (m+1) x (n+1) DP matrix without zero-initialization
184+ cdef int [:, :] ldm = np.empty((m + 1 , n + 1 ), dtype = np.int32)
185+
186+ # Initialize first column and first row (boundary conditions)
187+ for i in range (m + 1 ):
188+ ldm[i, 0 ] = < int > i
189+ for j in range (n + 1 ):
190+ ldm[0 , j] = < int > j
191+
192+ # Fill the Levenshtein distance matrix
193+ for i in range (1 , m + 1 ):
194+ for j in range (1 , n + 1 ):
195+ cost = 0 if reference_word[i - 1 ] == hypothesis_word[j - 1 ] else 1
196+
197+ del_cost = ldm[i - 1 , j] + 1
198+ ins_cost = ldm[i, j - 1 ] + 1
199+ sub_cost = ldm[i - 1 , j - 1 ] + cost
200+
201+ best = del_cost
202+ if ins_cost < best:
203+ best = ins_cost
204+ if sub_cost < best:
205+ best = sub_cost
206+
207+ ldm[i, j] = best
208+
209+ ld = ldm[m, n]
210+ wer = (< double > ld) / m if m > 0 else 0.0
211+
212+ # Backtrace to count errors (no word tracking)
213+ insertions, deletions, substitutions = 0 , 0 , 0
214+ i, j = m, n
215+ while i > 0 or j > 0 :
216+ if i > 0 and j > 0 and reference_word[i - 1 ] == hypothesis_word[j - 1 ]:
217+ i -= 1
218+ j -= 1
219+ else :
220+ if i > 0 and j > 0 and ldm[i, j] == ldm[i - 1 , j - 1 ] + 1 :
221+ substitutions += 1
222+ i -= 1
223+ j -= 1
224+ elif j > 0 and ldm[i, j] == ldm[i, j - 1 ] + 1 :
225+ insertions += 1
226+ j -= 1
227+ elif i > 0 and ldm[i, j] == ldm[i - 1 , j] + 1 :
228+ deletions += 1
229+ i -= 1
230+
231+ return np.array(
232+ [wer, < double > ld, < double > m,
233+ < double > insertions, < double > deletions, < double > substitutions],
234+ dtype = np.float64
235+ )
236+
237+
238+ @ cython.boundscheck (False )
239+ @ cython.wraparound (False )
240+ cdef cnp.ndarray _metrics_batch_fast(list references, list hypotheses):
241+ """
242+ Fast batch processing without word tracking.
243+
244+ Returns (n, 6) float64 array where each row contains:
245+ [wer, ld, m, insertions, deletions, substitutions]
246+ """
247+ cdef Py_ssize_t n = len (references)
248+ cdef Py_ssize_t idx
249+
250+ cdef cnp.ndarray out = np.empty((n, 6 ), dtype = np.float64)
251+
252+ cdef cnp.ndarray r
253+ for idx in range (n):
254+ r = calculations_fast(references[idx], hypotheses[idx])
255+ out[idx, :] = r
256+
257+ return out
258+
259+
260+ cpdef object metrics_fast(object reference, object hypothesis):
261+ """
262+ Fast metrics entry point without word tracking.
263+
264+ Returns:
265+ - strings: (6,) float64 array [wer, ld, m, insertions, deletions, substitutions]
266+ - sequences: (n, 6) float64 array, one row per pair
267+ """
268+ if isinstance (reference, (list , np.ndarray)) and isinstance (hypothesis, (list , np.ndarray)):
269+ return _metrics_batch_fast(list (reference), list (hypothesis))
270+ return calculations_fast(reference, hypothesis)
0 commit comments