|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
| 4 | +from os import pardir, path |
| 5 | + |
| 6 | +from charset_normalizer.api import from_path |
| 7 | + |
| 8 | +DIR_PATH = path.join(path.dirname(path.realpath(__file__)), pardir) |
| 9 | + |
| 10 | + |
| 11 | +# Each tuple: (file, expected_encoding, expected_language) |
| 12 | +_THREAD_CASES = [ |
| 13 | + ("sample-arabic-1.txt", "cp1256", "Arabic"), |
| 14 | + ("sample-french-1.txt", "cp1252", "French"), |
| 15 | + ("sample-chinese.txt", "big5", "Chinese"), |
| 16 | +] |
| 17 | + |
| 18 | + |
| 19 | +def _detect(case: tuple[str, str, str]) -> tuple[str, str, str, str | None, str | None]: |
| 20 | + file_name, expected_enc, expected_lang = case |
| 21 | + result = from_path(path.join(DIR_PATH, "data", file_name)) |
| 22 | + best = result.best() if result else None |
| 23 | + return ( |
| 24 | + file_name, |
| 25 | + expected_enc, |
| 26 | + expected_lang, |
| 27 | + best.encoding if best else None, |
| 28 | + best.language if best else None, |
| 29 | + ) |
| 30 | + |
| 31 | + |
| 32 | +class TestThreadSafety: |
| 33 | + def test_concurrent_detection(self) -> None: |
| 34 | + """Three files detected concurrently must each return the correct |
| 35 | + encoding and language, proving no shared mutable state corruption.""" |
| 36 | + with ThreadPoolExecutor(max_workers=3) as pool: |
| 37 | + futures = {pool.submit(_detect, case): case for case in _THREAD_CASES} |
| 38 | + |
| 39 | + for future in as_completed(futures): |
| 40 | + file_name, expected_enc, expected_lang, got_enc, got_lang = ( |
| 41 | + future.result() |
| 42 | + ) |
| 43 | + assert got_enc == expected_enc, ( |
| 44 | + f"{file_name}: expected encoding {expected_enc}, got {got_enc}" |
| 45 | + ) |
| 46 | + assert got_lang == expected_lang, ( |
| 47 | + f"{file_name}: expected language {expected_lang}, got {got_lang}" |
| 48 | + ) |
| 49 | + |
| 50 | + def test_concurrent_detection_repeated(self) -> None: |
| 51 | + """Run the same three-file detection five times to surface any |
| 52 | + intermittent race conditions.""" |
| 53 | + for _ in range(5): |
| 54 | + self.test_concurrent_detection() |
0 commit comments