patch doc, fix torch_deepcopy for empty DynamicCache and transformers==5.1.0 (#408)

xadupre · web-flow · commit b99ccbf97b49 · 2026-02-07T12:06:00.000+01:00
* patch doc

* update CI for 5.1.0

* fix version

* fix dynamiccache

* fix unt

* fix for 5.2.0

* fix ut

* fix documentation
diff --git a/.github/workflows/check-release.yml b/.github/workflows/check-release.yml
@@ -16,7 +16,7 @@ jobs:
       matrix:
         os: [ubuntu-latest, macOS-latest, windows-latest]
         python: ['3.13']
-        transformers: ['5.0', 'main']
+        transformers: ['5.1.0', 'main']
         torch: ['2.10', 'main']
 
     steps:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,7 +17,7 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         python: ['3.10', '3.11', '3.12', '3.13']
-        transformers: ['4.48.3', '4.51.3', '4.55.4', '4.57.6', '5.0', 'main']
+        transformers: ['4.48.3', '4.51.3', '4.55.4', '4.57.6', '5.1.0', 'main']
         torch: ['2.10', 'main']
         exclude:
           # 3.10 - torch
@@ -29,7 +29,7 @@ jobs:
           - python: '3.10'
             transformers: '4.57.6'
           - python: '3.10'
-            transformers: '5.0'
+            transformers: '5.1.0'
           - python: '3.10'
             transformers: 'main'
           # 3.11 - torch
@@ -41,7 +41,7 @@ jobs:
           - python: '3.11'
             transformers: '4.57.6'
           - python: '3.11'
-            transformers: '5.0'
+            transformers: '5.1.0'
           - python: '3.11'
             transformers: 'main'
           # 3.13 - torch
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.9.1
 +++++
 
+* :pr:`408`: fix torch_deepcopy for empty DynamicCache and transformers==5.1.0, 5.2.0 (see https://github.com/huggingface/transformers/pull/43765/)
+
 0.9.0
 +++++
 
diff --git a/_doc/status/patches_diff.rst b/_doc/status/patches_diff.rst
@@ -61,11 +61,21 @@ Those two versions leads to the following list of patches.
         patch_details=details,
     ):
         pass
+    done = set()
     for patch in details.patched:
-        print(f"* {patch.family} - {getattr(patch.function_to_patch, '__name__', patch.function_to_patch)}")
+        if patch.function_to_patch == patch.patch:
+            continue
+        if patch.refid in done:
+            continue
+        done.add(patch.refid)
+        print(f"* :ref:`{patch.refid}`")
     print()
     print()
+    done = set()
     for patch in details.patched:
+        if patch.refid in done:
+            continue
+        done.add(patch.refid)
         if patch.function_to_patch == patch.patch:
             continue
         rst = patch.format_diff(format="rst")
diff --git a/_scripts/test_backend_onnxruntime.py b/_scripts/test_backend_onnxruntime.py
@@ -141,7 +141,7 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
 
 backend_test.exclude("(test_adagrad|test_adam|test_add_uint8)")
 
-if pv.Version(onnxruntime.__version__) <= pv.Version("1.24"):
+if pv.Version(onnxruntime.__version__) <= pv.Version("1.25"):
     backend_test.exclude("(test_attention_4d_with|test_attention_4d_gqa)")
 
 
diff --git a/_unittests/ut_reference/test_backend_onnxruntime_evaluator.py b/_unittests/ut_reference/test_backend_onnxruntime_evaluator.py
@@ -299,7 +299,7 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     )
 
 
-if pv.Version(onnxruntime.__version__) <= pv.Version("1.24"):
+if pv.Version(onnxruntime.__version__) <= pv.Version("1.25"):
     backend_test.exclude("(test_attention_4d_with|test_attention_4d_gqa)")
 
 # import all test cases at global scope to make them visible to python.unittest
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -266,7 +266,7 @@ def test_falcon_mamba_dev(self):
         model(**inputs)
         model(**data["inputs2"])
         self.assertIn((data["size"], data["n_weights"]), [(274958336, 68739584)])
-        if not has_transformers("5.0.99"):
+        if not has_transformers("5.2.99"):
             raise unittest.SkipTest("The model has control flow.")
         with torch_export_patches(patch_transformers=True, verbose=10, stop_if_static=1):
             torch.export.export(
diff --git a/_unittests/ut_tasks/test_tasks_image_text_to_text.py b/_unittests/ut_tasks/test_tasks_image_text_to_text.py
@@ -15,7 +15,7 @@
 
 class TestTasksImageTextToText(ExtTestCase):
     @hide_stdout()
-    @requires_transformers("5.0.99")
+    @requires_transformers("5.2.99")
     @requires_torch("2.7.99")
     def test_image_text_to_text_idefics(self):
         mid = "HuggingFaceM4/tiny-random-idefics"
@@ -32,7 +32,7 @@ def test_image_text_to_text_idefics(self):
             self.assertEqualAny(expected, ep.module()(**inputs), atol=1)
 
     @hide_stdout()
-    @requires_transformers("5.0.99")
+    @requires_transformers("5.2.99")
     @requires_torch("2.7.99")
     def test_image_text_to_text_tiny_gemma3(self):
         """
@@ -88,7 +88,7 @@ def test_image_text_to_text_gemma3_4b_it(self):
             self.assertEqualAny(expected, ep.module()(**inputs))
 
     @hide_stdout()
-    @requires_transformers("5.0.99")
+    @requires_transformers("5.2.99")
     @requires_torch("2.7.99")
     def test_image_text_to_text_zai_glm(self):
         """
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -703,7 +703,7 @@ def test_plug_multi_head_attention_qwen25_packed_float16(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.01)
         self.assertLess(results.diffs[0]["abs"], 0.01)
 
-    @requires_onnxruntime("1.24")
+    @requires_onnxruntime("1.25")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
     def test_plug_multi_head_attention_qwen25_loopmha_float16(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
@@ -738,7 +738,7 @@ def test_plug_multi_head_attention_qwen25_loopmha_float16(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.01)
         self.assertLess(results.diffs[0]["abs"], 0.01)
 
-    @requires_onnxruntime("1.24")
+    @requires_onnxruntime("1.25")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
     def test_plug_multi_head_attention_qwen25_loopmha_float32(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
@@ -773,7 +773,7 @@ def test_plug_multi_head_attention_qwen25_loopmha_float32(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=1e-5)
         self.assertLess(results.diffs[0]["abs"], 1e-5)
 
-    @requires_onnxruntime("1.24")
+    @requires_onnxruntime("1.25")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
     def test_plug_multi_head_attention_qwen25_loopa24_float16(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
@@ -801,7 +801,7 @@ def test_plug_multi_head_attention_qwen25_loopa24_float16(self):
         self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.005)
         self.assertLess(results.diffs[0]["abs"], 0.005)
 
-    @requires_onnxruntime("1.24")
+    @requires_onnxruntime("1.25")
     @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
     def test_plug_multi_head_attention_qwen25_loopa24_float32(self):
         from onnx_diagnostic.torch_export_patches.patches._patch_transformers_qwen2_5 import (
diff --git a/_unittests/ut_torch_onnx/test_discrepancies.py b/_unittests/ut_torch_onnx/test_discrepancies.py
@@ -46,7 +46,7 @@ def qwen_sdpa_attention(
             return attn_output
 
         for model_name in ["attention_loopa24.onnx", "attention_loopmha.onnx"]:
-            if model_name == "attention_loopa24.onnx" and not has_onnxruntime("1.24"):
+            if model_name == "attention_loopa24.onnx" and not has_onnxruntime("1.25"):
                 # not available
                 continue
             with self.subTest(model=model_name):
diff --git a/onnx_diagnostic/helpers/torch_helper.py b/onnx_diagnostic/helpers/torch_helper.py
@@ -850,6 +850,15 @@ def torch_deepcopy(value: Any) -> Any:
     if value.__class__.__name__ == "DynamicCache":
         from .cache_helper import CacheKeyValue
 
+        if (
+            hasattr(value, "layers")
+            and len(value.layers) == 1
+            and value.layers[0].keys is None
+        ):
+            import transformers
+
+            return transformers.cache_utils.DynamicCache(None)
+
         ca = CacheKeyValue(value)
         pairs = list(zip(ca.key_cache, ca.value_cache))
         assert not hasattr(value, "layers") or len(value.layers) == len(pairs), (
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_errors.py b/onnx_diagnostic/torch_export_patches/onnx_export_errors.py
@@ -71,10 +71,10 @@ def patch_module_or_classes(
     if isinstance(mod, list):
         to_patch = mod
         name = "list"
-        list_name = "auto/list"
+        list_name = "_PATCHED_list"
     else:
         name, to_patch = get_patches(mod, verbose)
-        list_name = f"auto/{mod.__name__.split('.')[-1]}"
+        list_name = f"_PATCHED_{mod.__name__.split('.')[-1]}"
 
     res = {}
     for cls in to_patch:
diff --git a/onnx_diagnostic/torch_export_patches/patch_details.py b/onnx_diagnostic/torch_export_patches/patch_details.py
@@ -117,6 +117,18 @@ def make_diff(self) -> str:
     def function_name(cls, f: Callable) -> str:
         return f.__qualname__
 
+    @property
+    def refid(self) -> str:
+        kind = self.family or ""
+        patch_name = (
+            self.function_name(self.patch)
+            .replace(".", "-")
+            .replace("/", "-")
+            .replace(">", "")
+            .replace("<", "")
+        )
+        return f"patch-{kind}-{patch_name}"
+
     def format_diff(self, format: str = "raw") -> str:
         """
         Format a diff between two function as a string.
@@ -149,11 +161,15 @@ def format_diff(self, format: str = "raw") -> str:
             else self.function_name(self.function_to_patch)
         )
         patch_name = self.function_name(self.patch)
+        kind = kind.replace("_PATCHED_", "")
         title = f"{kind}{function_to_pach_name} -> {patch_name}"
         if format == "raw":
             return f"{title}\n{diff}"
 
         rows = [
+            "",
+            f".. _{self.refid}:",
+            "",
             title,
             "=" * len(title),
             "",
diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_output_capturing.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_output_capturing.py
@@ -0,0 +1,27 @@
+try:
+    import transformers.utils.output_capturing  # noqa: F401
+
+    patch_output_capturing = True
+except ImportError:
+    patch_output_capturing = False
+
+
+if patch_output_capturing:
+    # Introduced in 5.2.0
+    # https://github.com/huggingface/transformers/pull/43765/
+    # changes#diff-b5f9fdbe43ffd89fbdf2b246dc78dd32aa4bdb587e7a53e4dad37b7efd79ab0a
+    import torch
+    import transformers
+    from transformers.utils.import_utils import is_torchdynamo_compiling
+
+    class patched_CompileableContextVar:
+        _PATCHES_ = ["set"]
+        _PATCHED_CLASS_ = transformers.utils.output_capturing.CompileableContextVar
+
+        def set(self, value):
+            if is_torchdynamo_compiling() and not torch.compiler.is_exporting():
+                self.global_var = value
+                self.compiling = True
+                return None
+            else:
+                return self.context_var.set(value)
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -42,6 +42,12 @@
         patched_sdpa_mask_recent_torch,
     )
 
+from ._patch_transformers_output_capturing import patch_output_capturing
+
+if patch_output_capturing:
+    from ._patch_transformers_output_capturing import patched_CompileableContextVar
+
+
 # transformers models dependent patches
 
 if _has_transformers("4.51"):

-Original file line number
+Diff line change
 .9.1
 +++++
 +* :pr:`408`: fix torch_deepcopy for empty DynamicCache and transformers==5.1.0, 5.2.0 (see https://github.com/huggingface/transformers/pull/43765/)
++
 .9.0
 +++++
Original file line number	Diff line number	Diff line change
`@@ -299,7 +299,7 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):`
`299`	`299`	`)`
`300`	`300`
`301`	`301`
`302`		`-if pv.Version(onnxruntime.__version__) <= pv.Version("1.24"):`
	`302`	`+if pv.Version(onnxruntime.__version__) <= pv.Version("1.25"):`
`303`	`303`	`backend_test.exclude("(test_attention_4d_with\|test_attention_4d_gqa)")`
`304`	`304`
`305`	`305`	`# import all test cases at global scope to make them visible to python.unittest`