werx/benchmarks/speed_comparison_synthetic_data.py at main · analyticsinmotion/werx · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import time
import timeit
import werpy
import werx
import jiwer
import pywer
# from torchmetrics.text import WordErrorRate # TODO: Uncomment when supports Python 3.14
import evaluate  # Import the evaluate package
# import universal_edit_distance as ued # TODO: Uncomment when supports Python 3.14

# --- Test Data (Repeated 10,000 times) ---
reference_translation = [
    '     It is consumed domestically           and exported to other countries.     ',
    'The Sugar Bear character was popular enough to have occasional premium toys.',
    'It is one of the most watched television networks in the country.',
    'It could be carried and prepared by the individual soldier.',
    'He was executed in a Lubyanka prison cellar.',
    'Rufino Street in Makati, right inside the Makati Central Business District.',
    'Its estuary is considered to have abnormally low rates of dissolved oxygen.',
    'He later cited his first wife Anita as the inspiration for the song.',
    'Gadya is the nearest rural locality.',
    'Taxes are a tool in the adjustment of the economy.'
] * 10000

hypothesis_translation = [
    'it is consumed domestically and exported to other countries ',
    'the sugar bare character was popular enough to have occasional premium toys ',
    'it is one of the most watched television networks in the country ',
    'it could be carried and prepared by the individual soldier ',
    'he was executed in alabianca prison seller ',
    'rofino street in mccauti right inside the macasi central business district ',
    "it's estiary is considered to have a normally low rates of dissolved oxygen ",
    'he later sighted his first wife anita as the inspiration for the song ',
    'gadia is the nearest rural locality ',
    'taxes are a tool in the adjustment of the economy '
] * 10000

# Normalize using werpy (as common preprocessing)
ref = werpy.normalize(reference_translation)
hyp = werpy.normalize(hypothesis_translation)

# --- Function Definitions ---
def wer_werpy(ref, hyp):
    return werpy.wer(ref, hyp)

def wer_werx(ref, hyp):
    return werx.wer(ref, hyp)

def wer_jiwer(ref, hyp):
    return jiwer.wer(ref, hyp)

def wer_pywer(ref, hyp):
    return pywer.wer(ref, hyp)

# TODO: Uncomment when supports Python 3.14
# def wer_torchmetrics(ref, hyp):
#     metric = WordErrorRate()
#     score = metric(ref, hyp)
#     return score.item()

def wer_evaluate(ref, hyp):
    metric = evaluate.load("wer")  # Load the evaluate WER metric
    score = metric.compute(references=ref, predictions=hyp)
    return score

# TODO: Uncomment when supports Python 3.14
# def wer_ued(ref, hyp):
#     return ued.word_error_rate(ref, hyp)

package_funcs = {
    "werpy": wer_werpy,
    "werx": wer_werx,
    "jiwer": wer_jiwer,
    "pywer": wer_pywer,
    # "torchmetrics": wer_torchmetrics, # TODO: Uncomment when supports Python 3.14
    "evaluate": wer_evaluate,  # Add evaluate to the package functions
    # "ued": wer_ued, # TODO: Uncomment when supports Python 3.14
}

# --- Benchmarks ---
results_perf = {}
results_timeit = {}

print(f"Running benchmark with {len(ref)} examples...\n")

print("⏱ perf_counter() timings:")
for name, func in package_funcs.items():
    try:
        start = time.perf_counter()
        wer = func(ref, hyp)
        end = time.perf_counter()
        duration = end - start
        results_perf[name] = (duration, wer)
        print(f"{name}.wer took: {duration:.4f} seconds, WER: {wer:.4f}")
    except Exception as e:
        print(f"{name} failed during perf_counter timing: {e}")
        results_perf[name] = (None, None)

print("\n📊 timeit comparisons:")
for name, func in package_funcs.items():
    try:
        t = timeit.timeit(lambda: func(ref, hyp), number=1)
        results_timeit[name] = t
        print(f"{name}.wer took: {t:.4f} seconds")
    except Exception as e:
        print(f"{name} failed during timeit timing: {e}")
        results_timeit[name] = None

# --- Relative Speed Summary ---
print("\n📈 Relative performance (based on timeit):")
baseline_name = "werx"
baseline_time = results_timeit.get(baseline_name)

if baseline_time is not None:
    for name, t in results_timeit.items():
        if t is None:
            continue
        if name == baseline_name:
            print(f"{name}: baseline")
        else:
            ratio = t / baseline_time
            if ratio > 1:
                print(f"{name}: ⚠️  {ratio:.2f}× slower than {baseline_name}")
            else:
                print(f"{name}: ✅ {1 / ratio:.2f}× faster than {baseline_name}")
else:
    print(f"Cannot calculate ratios: baseline package {baseline_name} failed.")