analyticsinmotion
diff --git a/‎CHANGELOG.md‎
Lines changed: 21 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 23 additions & 0 deletions b/‎README.md‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎benchmarks/benchmarks_memory.py‎ ‎benchmarks/memory_comparison_synthetic.py‎benchmarks/benchmarks_memory.py renamed to benchmarks/memory_comparison_synthetic.py b/‎benchmarks/benchmarks_memory.py‎ ‎benchmarks/memory_comparison_synthetic.py‎benchmarks/benchmarks_memory.py renamed to benchmarks/memory_comparison_synthetic.py
diff --git a/‎benchmarks/benchmarks_librispeech.py‎ ‎…nchmarks/speed_comparison_librispeech.py‎benchmarks/benchmarks_librispeech.py renamed to benchmarks/speed_comparison_librispeech.py b/‎benchmarks/benchmarks_librispeech.py‎ ‎…nchmarks/speed_comparison_librispeech.py‎benchmarks/benchmarks_librispeech.py renamed to benchmarks/speed_comparison_librispeech.py
diff --git a/‎benchmarks/benchmarks_speed.py‎ ‎benchmarks/speed_comparison_synthetic.py‎benchmarks/benchmarks_speed.py renamed to benchmarks/speed_comparison_synthetic.py b/‎benchmarks/benchmarks_speed.py‎ ‎benchmarks/speed_comparison_synthetic.py‎benchmarks/benchmarks_speed.py renamed to benchmarks/speed_comparison_synthetic.py
diff --git a/‎benchmarks/speed_comparison_werx_modules.py‎
Lines changed: 59 additions & 0 deletions b/‎benchmarks/speed_comparison_werx_modules.py‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎benchmarks/weighted_wer_results.py‎
Lines changed: 132 additions & 0 deletions b/‎benchmarks/weighted_wer_results.py‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/werx/__init__.py‎
Lines changed: 3 additions & 2 deletions b/‎src/werx/__init__.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/werx/weighted_wer.py‎
Lines changed: 41 additions & 0 deletions b/‎src/werx/weighted_wer.py‎
Lines changed: 41 additions & 0 deletions
@@ -23,6 +23,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ---
 
+## [0.2.0] - TBD
+
+### Added
+- Implemented `weighted_wer` functionality to calculate Weighted Word Error Rate with customizable weights for insertion, deletion, and substitution errors.
+- Introduced Python API alias `wwer` for convenience alongside `weighted_wer`.
+- Created `utils.rs` module containing the shared `extract_string_list` utility function, marked as `pub` and `#[inline]` for performance.
+- Added detailed Rust doc comments and Python docstrings, including usage examples and parameter descriptions.
+- Introduced `test_weighted_wer.py` with comprehensive unit tests for the `weighted_wer` function.
+- Verified correct handling of default and custom weight configurations.
+- Added tests for edge cases including zero weights, empty inputs, mismatched lengths, and invalid input types.
+- Benchmark script to compare execution speed between `werx.wer` (standard WER) and `werx.weighted_wer` (weighted WER). 
+- `weighted_wer_results.py` script to visualize weighted WER results and validate benchmark outputs.
+
+### Changed
+- Extracted the `extract_string_list` utility function from `wer.rs` and moved it to a new `utils.rs` module for shared usage.
+- Marked `extract_string_list` as `pub` and `#[inline]` for performance and cross-module access.
+- Updated `wer.rs` to import `extract_string_list` from `utils.rs`.
+- Updated Python `__init__.py` to expose the new `weighted_wer` and `wwer` functions.
+
+---
+
 ## [0.1.3] - 2025-05-11
 
 ### Added
 
@@ -101,6 +101,29 @@ print(wer)
 0.2
 ```
 
+#### 3. Weighted Word Error Rate Calculation (Custom Weights)
+
+*Python Code:*
+```python
+ref = ['i love cold pizza', 'the sugar bear character was popular']
+hyp = ['i love pizza', 'the sugar bare character was popular']
+
+# Apply lower weight to insertions and deletions, standard weight for substitutions
+wer = werx.weighted_wer(
+    ref, 
+    hyp, 
+    insertion_weight=0.5, 
+    deletion_weight=0.5, 
+    substitution_weight=1.0
+)
+print(wer)
+```
+
+*Results Output:*
+```
+0.15
+```
+
 <br/>
 
 ## 📄 License
 
@@ -0,0 +1,59 @@
+from datasets import load_dataset
+import werx
+import werpy
+import timeit
+
+# Load the consolidated CSV from the Hugging Face Hub
+dataset = load_dataset(
+    "analyticsinmotion/librispeech-eval",
+    data_files="all_splits.csv",
+    split="train"
+)
+
+# Specify which split and model/version to evaluate
+split = "test-clean"
+model_name = "whisper-base"
+model_version = "v20240930"
+
+# Filter references and hypotheses for the chosen split/model/version
+filtered = dataset.filter(
+    lambda x: x["split"] == split and
+              x["model_name"] == model_name and
+              x["model_version"] == model_version
+)
+
+filtered = list(filtered)
+#references = [row["reference"] for row in filtered]
+#hypotheses = [row["hypothesis"] for row in filtered]
+references = [werpy.normalize(row["reference"]) for row in filtered]
+hypotheses = [werpy.normalize(row["hypothesis"]) for row in filtered]
+
+# --- WER tools ---
+tools = {
+    "WERX (Standard)": lambda r, h: werx.wer(r, h),
+    "WERX (Weighted)": lambda r, h: werx.weighted_wer(
+        r, h, insertion_weight=2.0, deletion_weight=2.0, substitution_weight=1.0
+    ),
+}
+
+# --- Run + time each tool using timeit ---
+results = []
+n_repeats = 10  # Number of repeats for timeit
+
+for name, func in tools.items():
+    def stmt():
+        return func(references, hypotheses)
+    total_time = timeit.timeit(stmt, number=n_repeats)
+    avg_time = total_time / n_repeats
+    wer = func(references, hypotheses)
+    results.append((name, wer, avg_time))
+
+# --- Sort by fastest execution time ---
+results.sort(key=lambda x: x[2])
+
+# --- Print CLI-friendly table ---
+print("\nWERX Benchmark: Standard vs Weighted WER (Ordered by Speed)\n")
+print(f"{'Method':<18} {'WER':<8} {'WER (%)':<10} {'Time (s)':<12}")
+print("-" * 60)
+for name, wer, t in results:
+    print(f"{name:<18} {wer:.4f}   {wer*100:6.2f}%   {t:.6f}")
@@ -0,0 +1,132 @@
+from werx import weighted_wer
+
+# ----------------------------------------------------------------------
+# Test 1: Alias Consistency
+# ----------------------------------------------------------------------
+ref = ["i love cold pizza"]
+hyp = ["i love pizza"]
+test1 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
+print("Test 1 Result:", test1)
+
+# ----------------------------------------------------------------------
+# Test 2: Basic Weighted WER with Reduced Insertion and Deletion Weights
+# ----------------------------------------------------------------------
+ref = ["i love cold pizza"]
+hyp = ["i love pizza"]
+# 1 deletion, deletion weight = 0.5 → weighted cost = 0.5 / 4 = 0.125
+expected_result = 0.125
+test2 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
+print("Test 2 Result:", test2)
+
+# ----------------------------------------------------------------------
+# Test 3: Two-Sentence Input with Increased Substitution Weight
+# ----------------------------------------------------------------------
+ref = ["i love cold pizza", "the sugar bear character was popular"]
+hyp = ["i love pizza", "the sugar bare character was popular"]
+# 1 deletion, 1 substitution; deletion_weight = 0.5, substitution_weight = 2.0
+expected_result = (0.5 + 2.0) / (4 + 6)  # Total words = 10
+test3 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=2.0)
+print("Test 3 Result:", test3)
+
+# ----------------------------------------------------------------------
+# Test 4: Edge Case with Zero Weights (Should Return 0 Regardless of Errors)
+# ----------------------------------------------------------------------
+ref = ["i love cold pizza"]
+hyp = ["i love pizza"]
+test4 = weighted_wer(ref, hyp, insertion_weight=0.0, deletion_weight=0.0, substitution_weight=0.0)
+print("Test 4 Result:", test4)
+
+# ----------------------------------------------------------------------
+# Test 5: All Weights Set to 1 (Equivalent to Standard WER)
+# ----------------------------------------------------------------------
+ref = ["i love cold pizza"]
+hyp = ["i love pizza"]
+expected_result = 0.25  # 1 deletion / 4 words
+test5 = weighted_wer(ref, hyp)
+print("Test 5 Result:", test5)
+
+# ----------------------------------------------------------------------
+# Test 6: Invalid Input Types Should Raise Exceptions
+# ----------------------------------------------------------------------
+try:
+    ref = [1, 2, 3]
+    hyp = [2, 3, 4]
+    test6 = weighted_wer(ref, hyp)
+except Exception as e:
+    print("Test 6 Result: Exception Raised -", e)
+
+# ----------------------------------------------------------------------
+# Test 7: Edge Case with Empty Input Strings — Should Return WER of 0.0
+# ----------------------------------------------------------------------
+ref = [""]
+hyp = [""]
+test7 = weighted_wer(ref, hyp)
+print("Test 7 Result:", test7)
+
+# ----------------------------------------------------------------------
+# Test 8: Mismatched Reference and Hypothesis Lengths
+# ----------------------------------------------------------------------
+try:
+    ref = ["hello world", "another sentence"]
+    hyp = ["hello world"]
+    test8 = weighted_wer(ref, hyp)
+except Exception as e:
+    print("Test 8 Result: Exception Raised -", e)
+
+# ----------------------------------------------------------------------
+# Test 9: None Inputs Should Raise an Exception
+# ----------------------------------------------------------------------
+try:
+    ref = None
+    hyp = ["hello world"]
+    test9a = weighted_wer(ref, hyp)
+except Exception as e:
+    print("Test 9a Result: Exception Raised -", e)
+
+try:
+    ref = ["hello world"]
+    hyp = None
+    test9b = weighted_wer(ref, hyp)
+except Exception as e:
+    print("Test 9b Result: Exception Raised -", e)
+
+
+# ----------------------------------------------------------------------
+# Test 10: werpy comparison
+# ----------------------------------------------------------------------
+ref = ['it was beautiful and sunny today']
+hyp = ['it was a beautiful and sunny day']
+werp = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1)
+print("Test 10 Result:", werp)
+
+
+# ----------------------------------------------------------------------
+# Test 11: Heavy Substitution Impact
+# ----------------------------------------------------------------------
+ref = ["the quick brown fox jumps over the lazy dog multiple times in the evening"]
+hyp = ["the fast brown cat leaps across the sleepy dog several times at night"]
+# Expect higher impact from substitutions due to synonym replacements
+test11 = weighted_wer(ref, hyp, insertion_weight=1.0, deletion_weight=1.0, substitution_weight=3.0)
+print("Test 11 (High Substitution Weight) Result:", test11)
+
+
+# ----------------------------------------------------------------------
+# Test 12: Heavy Substitution Impact
+# ----------------------------------------------------------------------
+ref = ["i love cold pizza", "the sugar bear character was popular"]
+hyp = ["i love pizza", "the sugar bare character was popular"]
+# 1 deletion, 1 substitution;
+test12a = weighted_wer(ref, hyp, insertion_weight=.5, deletion_weight=.5, substitution_weight=2.0)
+test12b = weighted_wer(ref, hyp, insertion_weight=1, deletion_weight=1, substitution_weight=1.0)
+test12c = weighted_wer(ref, hyp, insertion_weight=2, deletion_weight=2, substitution_weight=1.0)
+print("Test 12a Result:", test12a)
+print("Test 12b Result:", test12b)
+print("Test 12c Result:", test12c)
+
+# ----------------------------------------------------------------------
+# Test 13: Example in weighted_wer.py
+# ----------------------------------------------------------------------
+ref = ['it was beautiful and sunny today', 'tomorrow may not be as nice']
+hyp = ['it was a beautiful and sunny day', 'tomorrow may not be as nice']
+test13 = weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
+print("Test 13 Result:", test13)
@@ -1,6 +1,6 @@
 [project]
 name = "werx"
-version = "0.1.3"
+version = "0.2.0"
 description = "A high-performance Python package for calculating Word Error Rate (WER), powered by Rust."
 readme = "README.md"
 authors = [
 
@@ -1,4 +1,5 @@
-__version__ = "0.1.3"
+__version__ = "0.2.0"
 from .wer import wer
+from .weighted_wer import weighted_wer, wwer
 
-__all__ = ["wer"]
+__all__ = ["wer", "weighted_wer", "wwer"]
@@ -0,0 +1,41 @@
+from .werx import weighted_wer as _weighted_wer
+
+def weighted_wer(
+    ref: str | list[str],
+    hyp: str | list[str],
+    insertion_weight: float = 1.0,
+    deletion_weight: float = 1.0,
+    substitution_weight: float = 1.0
+) -> float:
+    """
+    Compute the Weighted Word Error Rate (WER).
+
+    Parameters:
+    ----------
+    ref : str | list[str]
+        Reference text(s). Can be a single string or a list of sentences.
+    hyp : str | list[str]
+        Hypothesis text(s). Can be a single string or a list of sentences.
+    insertion_weight : float, default=1.0
+        Weight assigned to insertion errors.
+    deletion_weight : float, default=1.0
+        Weight assigned to deletion errors.
+    substitution_weight : float, default=1.0
+        Weight assigned to substitution errors.
+
+    Returns:
+    -------
+    float
+        Weighted Word Error Rate (WER) score.
+
+    Example:
+    --------
+    >>> ref = ['it was beautiful and sunny today', 'tomorrow may not be as nice']
+    >>> hyp = ['it was a beautiful and sunny day', 'tomorrow may not be as nice']
+    >>> weighted_wer(ref, hyp, insertion_weight=0.5, deletion_weight=0.5, substitution_weight=1.0)
+    0.125
+    """
+    return _weighted_wer(ref, hyp, insertion_weight, deletion_weight, substitution_weight)
+
+# Alias
+wwer = weighted_wer