Skip to content

Commit aa23be2

Browse files
committed
move: support moving output from import/import-url stages
1 parent 8912ca7 commit aa23be2

3 files changed

Lines changed: 186 additions & 6 deletions

File tree

dvc/output.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -997,8 +997,9 @@ def move(self, out):
997997
self.save()
998998
self.commit()
999999

1000-
if self.protocol == "local" and self.use_scm_ignore:
1001-
self.repo.scm_context.ignore(self.fspath)
1000+
# should already be ignored in self.save()
1001+
# if self.protocol == "local" and self.use_scm_ignore:
1002+
# self.repo.scm_context.ignore(self.fspath)
10021003

10031004
def transfer(
10041005
self, source, odb=None, jobs=None, update=False, no_progress_bar=False

dvc/repo/move.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ def move(self, from_path, to_path):
3232
It only works with outputs generated by `add` or `import`,
3333
also known as data sources.
3434
"""
35-
from dvc import output
35+
from dvc import dependency, output
3636
from dvc.dvcfile import DVC_FILE_SUFFIX
3737
from dvc.stage import Stage
38+
from dvc_objects.fs.local import LocalFileSystem
3839

3940
from_out = output.loads_from(Stage(self), [from_path])[0]
4041
assert from_out.protocol == "local"
@@ -45,6 +46,7 @@ def move(self, from_path, to_path):
4546
assert len(outs) == 1
4647
out = outs[0]
4748
stage = out.stage
49+
deps = stage.deps
4850

4951
if not stage.is_data_source:
5052
raise MoveNotDataSourceError(stage.addressing)
@@ -64,15 +66,28 @@ def move(self, from_path, to_path):
6466
wdir=new_wdir,
6567
outs=[to_path],
6668
meta=stage.meta,
69+
frozen=stage.frozen,
70+
always_changed=stage.always_changed,
71+
desc=stage.desc,
6772
)
6873

6974
os.unlink(stage.path)
7075
stage = new_stage
7176
else:
7277
to_path = os.path.relpath(to_path, stage.wdir)
7378

74-
to_out = output.loads_from(stage, [to_path], out.use_cache, out.metric)[0]
79+
def with_dep_path_adjusted(dep: dependency.Dependency):
80+
d = dep.dumpd()
81+
if isinstance(dep.fs, LocalFileSystem) and not os.path.isabs(dep.def_path):
82+
path = os.path.relpath(os.path.abspath(dep.def_path), stage.wdir)
83+
return d | {"path": path}
84+
return d
7585

76-
out.move(to_out)
77-
stage.save()
86+
stage.outs = output.loadd_from(stage, [out.dumpd() | {"path": to_path}])
87+
stage.deps = dependency.loadd_from(
88+
stage, [with_dep_path_adjusted(dep) for dep in deps]
89+
)
90+
91+
out.move(stage.outs[0])
92+
stage.md5 = stage.compute_md5()
7893
stage.dump()

tests/func/test_move.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,167 @@ def test_move_meta(tmp_dir, dvc):
195195
custom_key: 42
196196
"""
197197
)
198+
199+
200+
def test_import(tmp_dir, dvc, scm):
201+
tmp_dir.dvc_gen("foo", "foo", commit="add foo")
202+
imp_stage = dvc.imp(os.curdir, "foo", "foo_imported")
203+
204+
dvc.move("foo_imported", "foo_moved")
205+
206+
(stage,) = dvc.stage.collect("foo_moved.dvc")
207+
assert imp_stage.md5 != stage.md5
208+
res = (tmp_dir / "foo_moved.dvc").read_text()
209+
assert res == textwrap.dedent(
210+
f"""\
211+
md5: {stage.md5}
212+
frozen: true
213+
deps:
214+
- path: foo
215+
repo:
216+
url: {os.curdir}
217+
rev_lock: {scm.get_rev()}
218+
outs:
219+
- md5: acbd18db4cc2f85cedef654fccc4a4d8
220+
size: 3
221+
hash: md5
222+
path: foo_moved
223+
"""
224+
)
225+
226+
227+
@pytest.mark.parametrize(
228+
"path_func",
229+
[pytest.param(os.path.abspath, id="abs"), pytest.param(os.path.relpath, id="rel")],
230+
)
231+
def test_import_url_in_repo(tmp_dir, dvc, path_func):
232+
tmp_dir.gen("foo", "foo")
233+
imp_stage = dvc.imp_url(path_func(tmp_dir / "foo"), "foo_imported")
234+
(tmp_dir / "data").mkdir()
235+
236+
dvc.move("foo_imported", os.path.join("data", "foo_moved"))
237+
238+
(stage,) = dvc.stage.collect(os.path.join("data", "foo_moved.dvc"))
239+
assert imp_stage.md5 != stage.md5
240+
res = (tmp_dir / "data" / "foo_moved.dvc").read_text()
241+
assert res == textwrap.dedent(
242+
f"""\
243+
md5: {stage.md5}
244+
frozen: true
245+
deps:
246+
- md5: acbd18db4cc2f85cedef654fccc4a4d8
247+
size: 3
248+
hash: md5
249+
path: ../foo
250+
outs:
251+
- md5: acbd18db4cc2f85cedef654fccc4a4d8
252+
size: 3
253+
hash: md5
254+
path: foo_moved
255+
"""
256+
)
257+
258+
259+
@pytest.mark.parametrize(
260+
"path_func",
261+
[pytest.param(os.path.abspath, id="abs"), pytest.param(os.path.relpath, id="rel")],
262+
)
263+
def test_import_url_out_of_repo(tmp_dir, dvc, scm, path_func, make_tmp_dir):
264+
external = make_tmp_dir("external")
265+
external.gen("foo", "foo")
266+
267+
imp_stage = dvc.imp_url(path_func(external / "foo"), "foo_imported")
268+
269+
data_dir = tmp_dir / "data"
270+
data_dir.mkdir()
271+
272+
new_path = data_dir / "foo_moved"
273+
new_dvcfile = new_path.with_suffix(".dvc")
274+
dvc.move("foo_imported", os.fspath(new_path))
275+
276+
(stage,) = dvc.stage.collect(os.fspath(new_dvcfile))
277+
assert imp_stage.md5 != stage.md5
278+
279+
with data_dir.chdir():
280+
expected_path = path_func(external / "foo")
281+
282+
assert new_dvcfile.parse() == {
283+
"md5": stage.md5,
284+
"frozen": True,
285+
"deps": [
286+
{
287+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
288+
"size": 3,
289+
"hash": "md5",
290+
"path": expected_path,
291+
}
292+
],
293+
"outs": [
294+
{
295+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
296+
"size": 3,
297+
"hash": "md5",
298+
"path": "foo_moved",
299+
}
300+
],
301+
}
302+
303+
304+
@pytest.mark.parametrize(
305+
"path_func",
306+
[pytest.param(os.path.abspath, id="abs"), pytest.param(os.path.relpath, id="rel")],
307+
)
308+
def test_all_metadata_are_preserved(tmp_dir, dvc, make_tmp_dir, path_func):
309+
external = make_tmp_dir("external")
310+
external.gen("foo", "foo")
311+
312+
contents = {
313+
"md5": "bad", # placeholder, does not matter for the test
314+
"frozen": True,
315+
"desc": "this is a stage description",
316+
"always_changed": True,
317+
"meta": {"custom_key": 42},
318+
"deps": [
319+
{
320+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
321+
"size": 3,
322+
"hash": "md5",
323+
"path": path_func(external / "foo"),
324+
}
325+
],
326+
"outs": [
327+
{
328+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
329+
"path": "foo_imported",
330+
"persist": True,
331+
"hash": "md5",
332+
"size": 3,
333+
"desc": "this is a description",
334+
"type": "model",
335+
"labels": ["label1", "label2"],
336+
"meta": {"custom_key": 42},
337+
"cache": False,
338+
"remote": "myremote",
339+
"push": False,
340+
}
341+
],
342+
}
343+
(tmp_dir / "foo_imported.dvc").dump(contents)
344+
(tmp_dir / "foo_imported").write_text("foo")
345+
346+
data_dir = tmp_dir / "data"
347+
data_dir.mkdir()
348+
349+
new_path = data_dir / "foo_moved"
350+
new_dvcfile = new_path.with_suffix(".dvc")
351+
dvc.move("foo_imported", os.fspath(new_path))
352+
353+
(stage,) = dvc.stage.collect(os.fspath(new_dvcfile))
354+
355+
with data_dir.chdir():
356+
expected_path = path_func(external / "foo")
357+
358+
contents["outs"][0] |= {"path": "foo_moved"}
359+
contents["deps"][0] |= {"path": expected_path}
360+
contents |= {"md5": stage.md5}
361+
assert new_dvcfile.parse() == contents

0 commit comments

Comments
 (0)