Skip to content

Commit ca74549

Browse files
authored
move: support moving output from import/import-url stages (#10889)
1 parent 8912ca7 commit ca74549

3 files changed

Lines changed: 192 additions & 10 deletions

File tree

dvc/output.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -997,8 +997,9 @@ def move(self, out):
997997
self.save()
998998
self.commit()
999999

1000-
if self.protocol == "local" and self.use_scm_ignore:
1001-
self.repo.scm_context.ignore(self.fspath)
1000+
# should already be ignored in self.save()
1001+
# if self.protocol == "local" and self.use_scm_ignore:
1002+
# self.repo.scm_context.ignore(self.fspath)
10021003

10031004
def transfer(
10041005
self, source, odb=None, jobs=None, update=False, no_progress_bar=False

dvc/repo/move.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ def move(self, from_path, to_path):
3232
It only works with outputs generated by `add` or `import`,
3333
also known as data sources.
3434
"""
35-
from dvc import output
35+
from dvc import dependency, output
3636
from dvc.dvcfile import DVC_FILE_SUFFIX
3737
from dvc.stage import Stage
38+
from dvc_objects.fs.local import LocalFileSystem
3839

3940
from_out = output.loads_from(Stage(self), [from_path])[0]
4041
assert from_out.protocol == "local"
@@ -45,6 +46,7 @@ def move(self, from_path, to_path):
4546
assert len(outs) == 1
4647
out = outs[0]
4748
stage = out.stage
49+
deps = stage.deps
4850

4951
if not stage.is_data_source:
5052
raise MoveNotDataSourceError(stage.addressing)
@@ -64,15 +66,28 @@ def move(self, from_path, to_path):
6466
wdir=new_wdir,
6567
outs=[to_path],
6668
meta=stage.meta,
69+
frozen=stage.frozen,
70+
always_changed=stage.always_changed,
71+
desc=stage.desc,
6772
)
6873

6974
os.unlink(stage.path)
7075
stage = new_stage
7176
else:
7277
to_path = os.path.relpath(to_path, stage.wdir)
7378

74-
to_out = output.loads_from(stage, [to_path], out.use_cache, out.metric)[0]
79+
def with_dep_path_adjusted(dep: dependency.Dependency):
80+
d = dep.dumpd()
81+
if isinstance(dep.fs, LocalFileSystem) and not os.path.isabs(dep.def_path):
82+
path = os.path.relpath(os.path.abspath(dep.def_path), stage.wdir)
83+
return d | {"path": path}
84+
return d
7585

76-
out.move(to_out)
77-
stage.save()
86+
stage.outs = output.loadd_from(stage, [out.dumpd() | {"path": to_path}])
87+
stage.deps = dependency.loadd_from(
88+
stage, [with_dep_path_adjusted(dep) for dep in deps]
89+
)
90+
91+
out.move(stage.outs[0])
92+
stage.md5 = stage.compute_md5()
7893
stage.dump()

tests/func/test_move.py

Lines changed: 170 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
from dvc.exceptions import MoveNotDataSourceError, OutputNotFoundError
99

1010

11-
def test_move(tmp_dir, dvc):
12-
tmp_dir.dvc_gen("foo", "foo")
13-
dvc.move("foo", "foo1")
11+
def test_move(tmp_dir, dvc, scm):
12+
tmp_dir.dvc_gen("foo", "bar")
13+
dvc.move("foo", "bar")
1414

1515
assert not (tmp_dir / "foo").is_file()
16-
assert (tmp_dir / "foo1").is_file()
16+
assert (tmp_dir / "bar").is_file()
17+
# should only have the new path in the .gitignore, and only once
18+
assert (tmp_dir / ".gitignore").read_text().splitlines() == ["/bar"]
1719

1820

1921
def test_move_non_existent_file(dvc):
@@ -195,3 +197,167 @@ def test_move_meta(tmp_dir, dvc):
195197
custom_key: 42
196198
"""
197199
)
200+
201+
202+
def test_import(tmp_dir, dvc, scm):
203+
tmp_dir.dvc_gen("foo", "foo", commit="add foo")
204+
imp_stage = dvc.imp(os.curdir, "foo", "foo_imported")
205+
206+
dvc.move("foo_imported", "foo_moved")
207+
208+
(stage,) = dvc.stage.collect("foo_moved.dvc")
209+
assert imp_stage.md5 != stage.md5
210+
res = (tmp_dir / "foo_moved.dvc").read_text()
211+
assert res == textwrap.dedent(
212+
f"""\
213+
md5: {stage.md5}
214+
frozen: true
215+
deps:
216+
- path: foo
217+
repo:
218+
url: {os.curdir}
219+
rev_lock: {scm.get_rev()}
220+
outs:
221+
- md5: acbd18db4cc2f85cedef654fccc4a4d8
222+
size: 3
223+
hash: md5
224+
path: foo_moved
225+
"""
226+
)
227+
228+
229+
@pytest.mark.parametrize(
230+
"path_func",
231+
[pytest.param(os.path.abspath, id="abs"), pytest.param(os.path.relpath, id="rel")],
232+
)
233+
def test_import_url_in_repo(tmp_dir, dvc, path_func):
234+
tmp_dir.gen("foo", "foo")
235+
imp_stage = dvc.imp_url(path_func(tmp_dir / "foo"), "foo_imported")
236+
(tmp_dir / "data").mkdir()
237+
238+
dvc.move("foo_imported", os.path.join("data", "foo_moved"))
239+
240+
(stage,) = dvc.stage.collect(os.path.join("data", "foo_moved.dvc"))
241+
assert imp_stage.md5 != stage.md5
242+
res = (tmp_dir / "data" / "foo_moved.dvc").read_text()
243+
assert res == textwrap.dedent(
244+
f"""\
245+
md5: {stage.md5}
246+
frozen: true
247+
deps:
248+
- md5: acbd18db4cc2f85cedef654fccc4a4d8
249+
size: 3
250+
hash: md5
251+
path: ../foo
252+
outs:
253+
- md5: acbd18db4cc2f85cedef654fccc4a4d8
254+
size: 3
255+
hash: md5
256+
path: foo_moved
257+
"""
258+
)
259+
260+
261+
@pytest.mark.parametrize(
262+
"path_func",
263+
[pytest.param(os.path.abspath, id="abs"), pytest.param(os.path.relpath, id="rel")],
264+
)
265+
def test_import_url_out_of_repo(tmp_dir, dvc, scm, path_func, make_tmp_dir):
266+
external = make_tmp_dir("external")
267+
external.gen("foo", "foo")
268+
269+
imp_stage = dvc.imp_url(path_func(external / "foo"), "foo_imported")
270+
271+
data_dir = tmp_dir / "data"
272+
data_dir.mkdir()
273+
274+
new_path = data_dir / "foo_moved"
275+
new_dvcfile = new_path.with_suffix(".dvc")
276+
dvc.move("foo_imported", os.fspath(new_path))
277+
278+
(stage,) = dvc.stage.collect(os.fspath(new_dvcfile))
279+
assert imp_stage.md5 != stage.md5
280+
281+
with data_dir.chdir():
282+
expected_path = path_func(external / "foo")
283+
284+
assert new_dvcfile.parse() == {
285+
"md5": stage.md5,
286+
"frozen": True,
287+
"deps": [
288+
{
289+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
290+
"size": 3,
291+
"hash": "md5",
292+
"path": expected_path,
293+
}
294+
],
295+
"outs": [
296+
{
297+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
298+
"size": 3,
299+
"hash": "md5",
300+
"path": "foo_moved",
301+
}
302+
],
303+
}
304+
305+
306+
@pytest.mark.parametrize(
307+
"path_func",
308+
[pytest.param(os.path.abspath, id="abs"), pytest.param(os.path.relpath, id="rel")],
309+
)
310+
def test_all_metadata_are_preserved(tmp_dir, dvc, make_tmp_dir, path_func):
311+
external = make_tmp_dir("external")
312+
external.gen("foo", "foo")
313+
314+
contents = {
315+
"md5": "bad", # placeholder, does not matter for the test
316+
"frozen": True,
317+
"desc": "this is a stage description",
318+
"always_changed": True,
319+
"meta": {"custom_key": 42},
320+
"deps": [
321+
{
322+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
323+
"size": 3,
324+
"hash": "md5",
325+
"path": path_func(external / "foo"),
326+
}
327+
],
328+
"outs": [
329+
{
330+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
331+
"path": "foo_imported",
332+
"persist": True,
333+
"hash": "md5",
334+
"size": 3,
335+
"desc": "this is a description",
336+
"type": "model",
337+
"labels": ["label1", "label2"],
338+
"meta": {"custom_key": 42},
339+
"cache": False,
340+
"remote": "myremote",
341+
"push": False,
342+
}
343+
],
344+
}
345+
(tmp_dir / "foo_imported.dvc").dump(contents)
346+
(tmp_dir / "foo_imported").write_text("foo")
347+
348+
data_dir = tmp_dir / "data"
349+
data_dir.mkdir()
350+
351+
new_path = data_dir / "foo_moved"
352+
new_dvcfile = new_path.with_suffix(".dvc")
353+
dvc.move("foo_imported", os.fspath(new_path))
354+
355+
(stage,) = dvc.stage.collect(os.fspath(new_dvcfile))
356+
357+
with data_dir.chdir():
358+
expected_path = path_func(external / "foo")
359+
360+
contents["outs"][0] |= {"path": "foo_moved"}
361+
contents["deps"][0] |= {"path": expected_path}
362+
contents |= {"md5": stage.md5}
363+
assert new_dvcfile.parse() == contents

0 commit comments

Comments
 (0)