Skip to content

Commit 483a407

Browse files
Mitix-EPIAlexandre JUANclaude
authored andcommitted
Fix cache verify incorrectly reporting folders as missing files (#3707)
The `hf cache verify` command was incorrectly reporting directories (like `1_Pooling`, `onnx`) as missing files when using `--fail-on-missing-files`. Root cause: `list_repo_tree` returns both `RepoFile` and `RepoFolder` entries, but `collect_local_files` only collects files. This mismatch caused folders to appear in `missing_paths`. Fix: Filter out `RepoFolder` entries in `verify_repo_checksums`, only keeping `RepoFile` entries for comparison. Fixes #3706 Co-authored-by: Alexandre JUAN <a.juan@namirial.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent cd2a551 commit 483a407

3 files changed

Lines changed: 42 additions & 8 deletions

File tree

src/huggingface_hub/hf_api.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3361,12 +3361,13 @@ def verify_repo_checksums(
33613361
)
33623362
local_by_path = collect_local_files(root)
33633363

3364-
# get remote entries
3365-
remote_by_path: dict[str, Union[RepoFile, RepoFolder]] = {}
3364+
# get remote entries (only files, not folders)
3365+
remote_by_path: dict[str, RepoFile] = {}
33663366
for entry in self.list_repo_tree(
33673367
repo_id=repo_id, recursive=True, revision=remote_revision, repo_type=repo_type, token=token
33683368
):
3369-
remote_by_path[entry.path] = entry
3369+
if isinstance(entry, RepoFile):
3370+
remote_by_path[entry.path] = entry
33703371

33713372
return verify_maps(
33723373
remote_by_path=remote_by_path,

src/huggingface_hub/utils/_verification.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
import re
22
from dataclasses import dataclass
33
from pathlib import Path
4-
from typing import TYPE_CHECKING, Literal, Optional, TypedDict, Union
4+
from typing import TYPE_CHECKING, Literal, Optional, TypedDict
55

66
from .. import constants
77
from ..file_download import repo_folder_name
88
from .sha import git_hash, sha_fileobj
99

1010

1111
if TYPE_CHECKING:
12-
from ..hf_api import RepoFile, RepoFolder
12+
from ..hf_api import RepoFile
1313

1414
# using fullmatch for clarity and strictness
1515
_REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
@@ -91,7 +91,7 @@ def compute_file_hash(path: Path, algorithm: HashAlgo) -> str:
9191

9292
def verify_maps(
9393
*,
94-
remote_by_path: dict[str, Union["RepoFile", "RepoFolder"]],
94+
remote_by_path: dict[str, "RepoFile"],
9595
local_by_path: dict[str, Path],
9696
revision: str,
9797
verified_path: Path,

tests/test_verification.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import huggingface_hub.utils._verification as verification_module
9-
from huggingface_hub.hf_api import HfApi
9+
from huggingface_hub.hf_api import HfApi, RepoFile, RepoFolder
1010
from huggingface_hub.utils._verification import (
1111
HashAlgo,
1212
collect_local_files,
@@ -147,9 +147,42 @@ def test_api_verify_repo_checksums_cache_mode(tmp_path: Path) -> None:
147147
with patch.object(
148148
HfApi,
149149
"list_repo_tree",
150-
return_value=[SimpleNamespace(path="file.txt", blob_id=git_hash(content), lfs=None)],
150+
return_value=[RepoFile(path="file.txt", oid=git_hash(content), size=len(content), lfs=None)],
151151
):
152152
res = HfApi().verify_repo_checksums(
153153
repo_id="user/model", repo_type="model", revision=commit, cache_dir=cache_dir, token=None
154154
)
155155
assert res.revision == commit and res.checked_count == 1 and not res.mismatches
156+
157+
158+
def test_api_verify_repo_checksums_ignores_folders(tmp_path: Path) -> None:
159+
"""Test that folders returned by list_repo_tree are ignored and not reported as missing files.
160+
161+
Regression test for https://github.com/huggingface/huggingface_hub/issues/3706
162+
"""
163+
cache_dir = tmp_path
164+
commit = "c" * 40
165+
storage = cache_dir / "models--user--model"
166+
snapshot = storage / "snapshots" / commit
167+
snapshot.mkdir(parents=True)
168+
169+
# Create files inside a nested folder structure
170+
content = b"file content"
171+
_write(snapshot / "subdir" / "file.txt", content)
172+
173+
# Mock list_repo_tree to return both a folder and a file (as the real API does)
174+
def mock_list_repo_tree(*args, **kwargs):
175+
# The real API returns RepoFolder for directories and RepoFile for files
176+
yield RepoFolder(path="subdir", oid="tree-oid-123")
177+
yield RepoFile(path="subdir/file.txt", oid=git_hash(content), size=len(content), lfs=None)
178+
179+
with patch.object(HfApi, "list_repo_tree", mock_list_repo_tree):
180+
res = HfApi().verify_repo_checksums(
181+
repo_id="user/model", repo_type="model", revision=commit, cache_dir=cache_dir, token=None
182+
)
183+
# The folder should NOT be in missing_paths
184+
assert "subdir" not in res.missing_paths
185+
assert res.checked_count == 1
186+
assert res.mismatches == []
187+
assert res.missing_paths == []
188+
assert res.extra_paths == []

0 commit comments

Comments
 (0)