|
44 | 44 | # Do NOT use this kernel as an example for your code. |
45 | 45 | # It was written assuming one workgroup of size 32 and |
46 | 46 | # is only valid for those |
47 | | -function shfl_down_test_kernel(a, b) |
| 47 | +function shfl_down_test_kernel(a, b, ::Val{N}) where N |
48 | 48 | # This is not valid |
49 | | - idx = KI.get_local_id().x |
| 49 | + idx = KI.get_sub_group_local_id() |
50 | 50 |
|
51 | | - temp = KI.localmemory(eltype(b), 32) |
| 51 | + temp = KI.localmemory(eltype(b), N) |
52 | 52 | temp[idx] = a[idx] |
53 | 53 |
|
54 | 54 | KI.barrier() |
55 | 55 |
|
56 | 56 | if idx == 1 |
57 | 57 | value = temp[idx] |
58 | 58 |
|
| 59 | + if KI.get_sub_group_size() > 32 |
| 60 | + value = value + KI.shfl_down(value, 32) |
| 61 | + KI.sub_group_barrier() |
| 62 | + end |
59 | 63 | value = value + KI.shfl_down(value, 16) |
| 64 | + KI.sub_group_barrier() |
| 65 | + |
60 | 66 | value = value + KI.shfl_down(value, 8) |
| 67 | + KI.sub_group_barrier() |
| 68 | + |
61 | 69 | value = value + KI.shfl_down(value, 4) |
| 70 | + KI.sub_group_barrier() |
| 71 | + |
62 | 72 | value = value + KI.shfl_down(value, 2) |
| 73 | + KI.sub_group_barrier() |
| 74 | + |
63 | 75 | value = value + KI.shfl_down(value, 1) |
| 76 | + KI.sub_group_barrier() |
64 | 77 |
|
65 | 78 | b[idx] = value |
66 | 79 | end |
@@ -201,13 +214,14 @@ function intrinsics_testsuite(backend, AT) |
201 | 214 | end |
202 | 215 | end |
203 | 216 | @testset "shfl_down(::$T)" for T in KI.shfl_down_types(backend()) |
204 | | - a = zeros(T, 32) |
| 217 | + N = KI.sub_group_size(backend()) |
| 218 | + a = zeros(T, N) |
205 | 219 | rand!(a, (1:4)) |
206 | 220 |
|
207 | 221 | dev_a = AT(a) |
208 | | - dev_b = AT(zeros(T, 32)) |
| 222 | + dev_b = AT(zeros(T, N)) |
209 | 223 |
|
210 | | - KI.@kernel backend() workgroupsize=32 shfl_down_test_kernel(dev_a, dev_b) |
| 224 | + KI.@kernel backend() workgroupsize=N shfl_down_test_kernel(dev_a, dev_b, Val(N)) |
211 | 225 |
|
212 | 226 | b = Array(dev_b) |
213 | 227 | @test sum(a) ≈ b[1] |
|
0 commit comments