@@ -145,7 +145,7 @@ for block_dims in ((TILE_DIM, TILE_DIM), (TILE_DIM * TILE_DIM, 1), (1, TILE_DIM
145145 (" transpose" , simple_transpose_kernel! (backend, block_dims)),
146146 )
147147 NVTX. @range " Simple $name $block_dims " let
148- input = rand ! (allocate (backend, T, N, N))
148+ input = copyto ! (allocate (backend, T, N, N), rand ( T, N, N))
149149 output = similar (input)
150150
151151 # compile kernel
@@ -165,7 +165,7 @@ for (name, kernel) in (
165165 )
166166 for bank in (true , false )
167167 NVTX. @range " Localmem $name ($TILE_DIM , $TILE_DIM ) bank=$bank " let
168- input = rand ! (allocate (backend, T, N, N))
168+ input = copyto ! (allocate (backend, T, N, N), rand ( T, N, N))
169169 output = similar (input)
170170
171171 # compile kernel
@@ -185,7 +185,7 @@ for (name, kernel) in (
185185 )
186186 for bank in (true , false )
187187 NVTX. @range " Localmem + multiple elements $name ($TILE_DIM , $BLOCK_ROWS ) bank=$bank " let
188- input = rand ! (allocate (backend, T, N, N))
188+ input = copyto ! (allocate (backend, T, N, N), rand ( T, N, N))
189189 output = similar (input)
190190
191191 # We want a number of blocks equivalent to (TILE_DIM, TILE_DIM)
0 commit comments