Skip to content

Commit 1888d51

Browse files
committed
2 parents 1a974fe + 39af819 commit 1888d51

18 files changed

Lines changed: 554 additions & 21 deletions

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323

2424
---
2525

26+
## [0.2.0] - TBD
27+
28+
### Added
29+
- Implemented `weighted_wer` functionality to calculate Weighted Word Error Rate with customizable weights for insertion, deletion, and substitution errors.
30+
- Introduced Python API alias `wwer` for convenience alongside `weighted_wer`.
31+
- Created `utils.rs` module containing the shared `extract_string_list` utility function, marked as `pub` and `#[inline]` for performance.
32+
- Added detailed Rust doc comments and Python docstrings, including usage examples and parameter descriptions.
33+
- Introduced `test_weighted_wer.py` with comprehensive unit tests for the `weighted_wer` function.
34+
- Verified correct handling of default and custom weight configurations.
35+
- Added tests for edge cases including zero weights, empty inputs, mismatched lengths, and invalid input types.
36+
- Benchmark script to compare execution speed between `werx.wer` (standard WER) and `werx.weighted_wer` (weighted WER).
37+
- `weighted_wer_results.py` script to visualize weighted WER results and validate benchmark outputs.
38+
39+
### Changed
40+
- Extracted the `extract_string_list` utility function from `wer.rs` and moved it to a new `utils.rs` module for shared usage.
41+
- Marked `extract_string_list` as `pub` and `#[inline]` for performance and cross-module access.
42+
- Updated `wer.rs` to import `extract_string_list` from `utils.rs`.
43+
- Updated Python `__init__.py` to expose the new `weighted_wer` and `wwer` functions.
44+
45+
---
46+
2647
## [0.1.3] - 2025-05-11
2748

2849
### Added

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,29 @@ print(wer)
101101
0.2
102102
```
103103

104+
#### 3. Weighted Word Error Rate Calculation (Custom Weights)
105+
106+
*Python Code:*
107+
```python
108+
ref = ['i love cold pizza', 'the sugar bear character was popular']
109+
hyp = ['i love pizza', 'the sugar bare character was popular']
110+
111+
# Apply lower weight to insertions and deletions, standard weight for substitutions
112+
wer = werx.weighted_wer(
113+
ref,
114+
hyp,
115+
insertion_weight=0.5,
116+
deletion_weight=0.5,
117+
substitution_weight=1.0
118+
)
119+
print(wer)
120+
```
121+
122+
*Results Output:*
123+
```
124+
0.15
125+
```
126+
104127
<br/>
105128

106129
## 📄 License
File renamed without changes.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from datasets import load_dataset
2+
import werx
3+
import werpy
4+
import timeit
5+
6+
# Load the consolidated CSV from the Hugging Face Hub
7+
dataset = load_dataset(
8+
"analyticsinmotion/librispeech-eval",
9+
data_files="all_splits.csv",
10+
split="train"
11+
)
12+
13+
# Specify which split and model/version to evaluate
14+
split = "test-clean"
15+
model_name = "whisper-base"
16+
model_version = "v20240930"
17+
18+
# Filter references and hypotheses for the chosen split/model/version
19+
filtered = dataset.filter(
20+
lambda x: x["split"] == split and
21+
x["model_name"] == model_name and
22+
x["model_version"] == model_version
23+
)
24+
25+
filtered = list(filtered)
26+
#references = [row["reference"] for row in filtered]
27+
#hypotheses = [row["hypothesis"] for row in filtered]
28+
references = [werpy.normalize(row["reference"]) for row in filtered]
29+
hypotheses = [werpy.normalize(row["hypothesis"]) for row in filtered]
30+
31+
# --- WER tools ---
32+
tools = {
33+
"WERX (Standard)": lambda r, h: werx.wer(r, h),
34+
"WERX (Weighted)": lambda r, h: werx.weighted_wer(
35+
r, h, insertion_weight=2.0, deletion_weight=2.0, substitution_weight=1.0
36+
),
37+
}
38+
39+
# --- Run + time each tool using timeit ---
40+
results = []
41+
n_repeats = 10 # Number of repeats for timeit
42+
43+
for name, func in tools.items():
44+
def stmt():
45+
return func(references, hypotheses)
46+
total_time = timeit.timeit(stmt, number=n_repeats)
47+
avg_time = total_time / n_repeats
48+
wer = func(references, hypotheses)
49+
results.append((name, wer, avg_time))
50+
51+
# --- Sort by fastest execution time ---
52+
results.sort(key=lambda x: x[2])
53+
54+
# --- Print CLI-friendly table ---
55+
print("\nWERX Benchmark: Standard vs Weighted WER (Ordered by Speed)\n")
56+
print(f"{'Method':<18} {'WER':<8} {'WER (%)':<10} {'Time (s)':<12}")
57+
print("-" * 60)
58+
for name, wer, t in results:
59+
print(f"{name:<18} {wer:.4f} {wer*100:6.2f}% {t:.6f}")

benchmarks/weighted_wer_results.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from werx import weighted_wer
2+
3+
# ----------------------------------------------------------------------
4+
# Test 1: Alias Consistency
5+
# ----------------------------------------------------------------------
6+
ref = ["i love cold pizza"]
7+
hyp = ["i love pizza"]
8+
test1 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
9+
print("Test 1 Result:", test1)
10+
11+
# ----------------------------------------------------------------------
12+
# Test 2: Basic Weighted WER with Reduced Insertion and Deletion Weights
13+
# ----------------------------------------------------------------------
14+
ref = ["i love cold pizza"]
15+
hyp = ["i love pizza"]
16+
# 1 deletion, deletion weight = 0.5 → weighted cost = 0.5 / 4 = 0.125
17+
expected_result = 0.125
18+
test2 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
19+
print("Test 2 Result:", test2)
20+
21+
# ----------------------------------------------------------------------
22+
# Test 3: Two-Sentence Input with Increased Substitution Weight
23+
# ----------------------------------------------------------------------
24+
ref = ["i love cold pizza", "the sugar bear character was popular"]
25+
hyp = ["i love pizza", "the sugar bare character was popular"]
26+
# 1 deletion, 1 substitution; deletion_weight = 0.5, substitution_weight = 2.0
27+
expected_result = (0.5 + 2.0) / (4 + 6) # Total words = 10
28+
test3 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=2.0)
29+
print("Test 3 Result:", test3)
30+
31+
# ----------------------------------------------------------------------
32+
# Test 4: Edge Case with Zero Weights (Should Return 0 Regardless of Errors)
33+
# ----------------------------------------------------------------------
34+
ref = ["i love cold pizza"]
35+
hyp = ["i love pizza"]
36+
test4 = weighted_wer(ref, hyp, insertion_weight=0.0, deletion_weight=0.0, substitution_weight=0.0)
37+
print("Test 4 Result:", test4)
38+
39+
# ----------------------------------------------------------------------
40+
# Test 5: All Weights Set to 1 (Equivalent to Standard WER)
41+
# ----------------------------------------------------------------------
42+
ref = ["i love cold pizza"]
43+
hyp = ["i love pizza"]
44+
expected_result = 0.25 # 1 deletion / 4 words
45+
test5 = weighted_wer(ref, hyp)
46+
print("Test 5 Result:", test5)
47+
48+
# ----------------------------------------------------------------------
49+
# Test 6: Invalid Input Types Should Raise Exceptions
50+
# ----------------------------------------------------------------------
51+
try:
52+
ref = [1, 2, 3]
53+
hyp = [2, 3, 4]
54+
test6 = weighted_wer(ref, hyp)
55+
except Exception as e:
56+
print("Test 6 Result: Exception Raised -", e)
57+
58+
# ----------------------------------------------------------------------
59+
# Test 7: Edge Case with Empty Input Strings — Should Return WER of 0.0
60+
# ----------------------------------------------------------------------
61+
ref = [""]
62+
hyp = [""]
63+
test7 = weighted_wer(ref, hyp)
64+
print("Test 7 Result:", test7)
65+
66+
# ----------------------------------------------------------------------
67+
# Test 8: Mismatched Reference and Hypothesis Lengths
68+
# ----------------------------------------------------------------------
69+
try:
70+
ref = ["hello world", "another sentence"]
71+
hyp = ["hello world"]
72+
test8 = weighted_wer(ref, hyp)
73+
except Exception as e:
74+
print("Test 8 Result: Exception Raised -", e)
75+
76+
# ----------------------------------------------------------------------
77+
# Test 9: None Inputs Should Raise an Exception
78+
# ----------------------------------------------------------------------
79+
try:
80+
ref = None
81+
hyp = ["hello world"]
82+
test9a = weighted_wer(ref, hyp)
83+
except Exception as e:
84+
print("Test 9a Result: Exception Raised -", e)
85+
86+
try:
87+
ref = ["hello world"]
88+
hyp = None
89+
test9b = weighted_wer(ref, hyp)
90+
except Exception as e:
91+
print("Test 9b Result: Exception Raised -", e)
92+
93+
94+
# ----------------------------------------------------------------------
95+
# Test 10: werpy comparison
96+
# ----------------------------------------------------------------------
97+
ref = ['it was beautiful and sunny today']
98+
hyp = ['it was a beautiful and sunny day']
99+
werp = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1)
100+
print("Test 10 Result:", werp)
101+
102+
103+
# ----------------------------------------------------------------------
104+
# Test 11: Heavy Substitution Impact
105+
# ----------------------------------------------------------------------
106+
ref = ["the quick brown fox jumps over the lazy dog multiple times in the evening"]
107+
hyp = ["the fast brown cat leaps across the sleepy dog several times at night"]
108+
# Expect higher impact from substitutions due to synonym replacements
109+
test11 = weighted_wer(ref, hyp, insertion_weight=1.0, deletion_weight=1.0, substitution_weight=3.0)
110+
print("Test 11 (High Substitution Weight) Result:", test11)
111+
112+
113+
# ----------------------------------------------------------------------
114+
# Test 12: Heavy Substitution Impact
115+
# ----------------------------------------------------------------------
116+
ref = ["i love cold pizza", "the sugar bear character was popular"]
117+
hyp = ["i love pizza", "the sugar bare character was popular"]
118+
# 1 deletion, 1 substitution;
119+
test12a = weighted_wer(ref, hyp, insertion_weight=.5, deletion_weight=.5, substitution_weight=2.0)
120+
test12b = weighted_wer(ref, hyp, insertion_weight=1, deletion_weight=1, substitution_weight=1.0)
121+
test12c = weighted_wer(ref, hyp, insertion_weight=2, deletion_weight=2, substitution_weight=1.0)
122+
print("Test 12a Result:", test12a)
123+
print("Test 12b Result:", test12b)
124+
print("Test 12c Result:", test12c)
125+
126+
# ----------------------------------------------------------------------
127+
# Test 13: Example in weighted_wer.py
128+
# ----------------------------------------------------------------------
129+
ref = ['it was beautiful and sunny today', 'tomorrow may not be as nice']
130+
hyp = ['it was a beautiful and sunny day', 'tomorrow may not be as nice']
131+
test13 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
132+
print("Test 13 Result:", test13)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "werx"
3-
version = "0.1.3"
3+
version = "0.2.0"
44
description = "A high-performance Python package for calculating Word Error Rate (WER), powered by Rust."
55
readme = "README.md"
66
authors = [

src/werx/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
__version__ = "0.1.3"
1+
__version__ = "0.2.0"
22
from .wer import wer
3+
from .weighted_wer import weighted_wer, wwer
34

4-
__all__ = ["wer"]
5+
__all__ = ["wer", "weighted_wer", "wwer"]

src/werx/weighted_wer.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from .werx import weighted_wer as _weighted_wer
2+
3+
def weighted_wer(
4+
ref: str | list[str],
5+
hyp: str | list[str],
6+
insertion_weight: float = 1.0,
7+
deletion_weight: float = 1.0,
8+
substitution_weight: float = 1.0
9+
) -> float:
10+
"""
11+
Compute the Weighted Word Error Rate (WER).
12+
13+
Parameters:
14+
----------
15+
ref : str | list[str]
16+
Reference text(s). Can be a single string or a list of sentences.
17+
hyp : str | list[str]
18+
Hypothesis text(s). Can be a single string or a list of sentences.
19+
insertion_weight : float, default=1.0
20+
Weight assigned to insertion errors.
21+
deletion_weight : float, default=1.0
22+
Weight assigned to deletion errors.
23+
substitution_weight : float, default=1.0
24+
Weight assigned to substitution errors.
25+
26+
Returns:
27+
-------
28+
float
29+
Weighted Word Error Rate (WER) score.
30+
31+
Example:
32+
--------
33+
>>> ref = ['it was beautiful and sunny today', 'tomorrow may not be as nice']
34+
>>> hyp = ['it was a beautiful and sunny day', 'tomorrow may not be as nice']
35+
>>> weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
36+
0.125
37+
"""
38+
return _weighted_wer(ref, hyp, insertion_weight, deletion_weight, substitution_weight)
39+
40+
# Alias
41+
wwer = weighted_wer

0 commit comments

Comments
 (0)