Skip to content

Commit fa3f508

Browse files
committed
fix(diff): do not show missing cache entries on diff result
The directory entry might show up in "Not in cache" when the second rev is a workspace revision. But files inside missing cache entries won't be shown in any of the state, unless we know the oid of the entries from the index on both side of the `diff`. On top of #10844. Closes #7661.
1 parent 29386a0 commit fa3f508

2 files changed

Lines changed: 79 additions & 39 deletions

File tree

dvc/repo/diff.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def meta_cmp_key(meta):
4545
with_renames=True,
4646
meta_cmp_key=meta_cmp_key,
4747
roots=data_keys,
48+
# we need to get unknown and unchanged entries to tell whether
49+
# the object is missing from the cache or not.
50+
with_unknown=True,
51+
with_unchanged=with_missing,
4852
):
4953
if (change.old and change.old.isdir and not change.old.hash_info) or (
5054
change.new and change.new.isdir and not change.new.hash_info

tests/func/test_diff.py

Lines changed: 75 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import os
33

44
import pytest
5-
from funcy import first
65

76
from dvc.exceptions import DvcException
87
from dvc.utils.fs import remove
@@ -96,6 +95,81 @@ def test_no_cache_entry(tmp_dir, scm, dvc):
9695
}
9796

9897

98+
def test_diff_no_cache(tmp_dir, scm, dvc):
99+
(stage,) = tmp_dir.dvc_gen({"dir": {"file": "file content"}}, commit="first")
100+
scm.tag("v1")
101+
dvc.cache.local.clear()
102+
old_digest = stage.outs[0].hash_info.value
103+
dir_path = os.path.join("dir", "")
104+
105+
default_result = {
106+
"added": [],
107+
"deleted": [],
108+
"modified": [],
109+
"renamed": [],
110+
"not in cache": [],
111+
}
112+
113+
assert dvc.diff("v1") == default_result | {
114+
"not in cache": [{"path": dir_path, "hash": old_digest}],
115+
}
116+
assert dvc.diff("HEAD", "v1") == {}
117+
assert dvc.diff("v1", "HEAD") == {}
118+
119+
(stage,) = tmp_dir.dvc_gen(
120+
{"dir": {"file": "modified file content"}}, commit="first"
121+
)
122+
scm.tag("v2")
123+
new_digest = stage.outs[0].hash_info.value
124+
125+
assert dvc.diff("v2") == {}
126+
assert dvc.diff("v1") == default_result | {
127+
"modified": [
128+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
129+
],
130+
"not in cache": [{"path": dir_path, "hash": old_digest}],
131+
}
132+
assert dvc.diff("v1", "v2") == default_result | {
133+
"modified": [
134+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
135+
],
136+
}
137+
138+
remove(dvc.cache.local.path)
139+
# drop the cache so that we can test as if we don't know what entries are
140+
# in the missing cache entry.
141+
dvc.drop_data_index()
142+
143+
assert dvc.diff("v2") == default_result | {
144+
"not in cache": [{"path": dir_path, "hash": new_digest}],
145+
}
146+
assert dvc.diff("v1") == default_result | {
147+
"modified": [
148+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
149+
],
150+
"not in cache": [{"path": dir_path, "hash": old_digest}],
151+
}
152+
assert dvc.diff("v2", "v1") == default_result | {
153+
"modified": [
154+
{"path": dir_path, "hash": {"old": new_digest, "new": old_digest}}
155+
],
156+
}
157+
assert dvc.diff("v1", "v2") == default_result | {
158+
"modified": [
159+
{"path": dir_path, "hash": {"old": old_digest, "new": new_digest}}
160+
],
161+
}
162+
assert dvc.diff() == default_result | {
163+
"not in cache": [{"path": dir_path, "hash": new_digest}],
164+
}
165+
166+
remove(str(tmp_dir / "dir"))
167+
assert dvc.diff() == default_result | {
168+
"deleted": [{"path": dir_path, "hash": new_digest}],
169+
"not in cache": [{"path": dir_path, "hash": new_digest}],
170+
}
171+
172+
99173
@pytest.mark.parametrize("delete_data", [True, False])
100174
def test_deleted(tmp_dir, scm, dvc, delete_data):
101175
tmp_dir.dvc_gen("file", "text", commit="add file")
@@ -256,44 +330,6 @@ def test_directories(tmp_dir, scm, dvc):
256330
}
257331

258332

259-
def test_diff_no_cache(tmp_dir, scm, dvc):
260-
tmp_dir.dvc_gen({"dir": {"file": "file content"}}, commit="first")
261-
scm.tag("v1")
262-
263-
tmp_dir.dvc_gen({"dir": {"file": "modified file content"}}, commit="second")
264-
scm.tag("v2")
265-
266-
remove(dvc.cache.local.path)
267-
268-
# invalidate_dir_info to force cache loading
269-
dvc.cache.local._dir_info = {}
270-
271-
diff = dvc.diff("v1", "v2")
272-
assert diff["added"] == []
273-
assert diff["deleted"] == []
274-
assert first(diff["modified"])["path"] == os.path.join("dir", "")
275-
assert diff["not in cache"] == []
276-
277-
(tmp_dir / "dir" / "file").unlink()
278-
remove(str(tmp_dir / "dir"))
279-
diff = dvc.diff()
280-
assert diff["added"] == []
281-
assert diff["deleted"] == [
282-
{
283-
"path": os.path.join("dir", ""),
284-
"hash": "f0f7a307d223921557c929f944bf5303.dir",
285-
}
286-
]
287-
assert diff["renamed"] == []
288-
assert diff["modified"] == []
289-
assert diff["not in cache"] == [
290-
{
291-
"path": os.path.join("dir", ""),
292-
"hash": "f0f7a307d223921557c929f944bf5303.dir",
293-
}
294-
]
295-
296-
297333
def test_diff_dirty(tmp_dir, scm, dvc):
298334
tmp_dir.dvc_gen(
299335
{"file": "file_content", "dir": {"dir_file1": "dir file content"}},

0 commit comments

Comments
 (0)