Skip to content

Commit ec429a2

Browse files
committed
Fixup
1 parent 8c38a76 commit ec429a2

1 file changed

Lines changed: 20 additions & 6 deletions

File tree

test/intrinsics.jl

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,36 @@ end
4444
# Do NOT use this kernel as an example for your code.
4545
# It was written assuming one workgroup of size 32 and
4646
# is only valid for those
47-
function shfl_down_test_kernel(a, b)
47+
function shfl_down_test_kernel(a, b, ::Val{N}) where N
4848
# This is not valid
49-
idx = KI.get_local_id().x
49+
idx = KI.get_sub_group_local_id()
5050

51-
temp = KI.localmemory(eltype(b), 32)
51+
temp = KI.localmemory(eltype(b), N)
5252
temp[idx] = a[idx]
5353

5454
KI.barrier()
5555

5656
if idx == 1
5757
value = temp[idx]
5858

59+
if KI.get_sub_group_size() > 32
60+
value = value + KI.shfl_down(value, 32)
61+
KI.sub_group_barrier()
62+
end
5963
value = value + KI.shfl_down(value, 16)
64+
KI.sub_group_barrier()
65+
6066
value = value + KI.shfl_down(value, 8)
67+
KI.sub_group_barrier()
68+
6169
value = value + KI.shfl_down(value, 4)
70+
KI.sub_group_barrier()
71+
6272
value = value + KI.shfl_down(value, 2)
73+
KI.sub_group_barrier()
74+
6375
value = value + KI.shfl_down(value, 1)
76+
KI.sub_group_barrier()
6477

6578
b[idx] = value
6679
end
@@ -201,13 +214,14 @@ function intrinsics_testsuite(backend, AT)
201214
end
202215
end
203216
@testset "shfl_down(::$T)" for T in KI.shfl_down_types(backend())
204-
a = zeros(T, 32)
217+
N = KI.sub_group_size(backend())
218+
a = zeros(T, N)
205219
rand!(a, (1:4))
206220

207221
dev_a = AT(a)
208-
dev_b = AT(zeros(T, 32))
222+
dev_b = AT(zeros(T, N))
209223

210-
KI.@kernel backend() workgroupsize=32 shfl_down_test_kernel(dev_a, dev_b)
224+
KI.@kernel backend() workgroupsize=N shfl_down_test_kernel(dev_a, dev_b, Val(N))
211225

212226
b = Array(dev_b)
213227
@test sum(a) b[1]

0 commit comments

Comments
 (0)