File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 4545# It was written assuming one workgroup of size 32 and
4646# is only valid for those
4747function shfl_down_test_kernel (a, b, :: Val{N} ) where N
48- # This is not valid
4948 idx = KI. get_sub_group_local_id ()
5049
51- temp = KI. localmemory (eltype (b), N)
52- temp[idx] = a[idx]
50+ val = a[idx]
5351
54- KI. barrier ()
55-
56- if idx == 1
57- value = temp[idx]
58-
59- if KI. get_sub_group_size () > 32
60- value = value + KI. shfl_down (value, 32 )
61- KI. sub_group_barrier ()
62- end
63- value = value + KI. shfl_down (value, 16 )
64- KI. sub_group_barrier ()
65-
66- value = value + KI. shfl_down (value, 8 )
67- KI. sub_group_barrier ()
68-
69- value = value + KI. shfl_down (value, 4 )
70- KI. sub_group_barrier ()
71-
72- value = value + KI. shfl_down (value, 2 )
73- KI. sub_group_barrier ()
52+ offset = 0x00000001
53+ while offset < N
54+ val += KI. shfl_down (val, offset)
55+ offset <<= 1
56+ end
7457
75- value = value + KI. shfl_down (value, 1 )
76- KI. sub_group_barrier ()
58+ KI. sub_group_barrier ()
7759
78- b[idx] = value
60+ if idx == 1
61+ b[idx] = val
7962 end
8063 return
8164end
@@ -215,8 +198,9 @@ function intrinsics_testsuite(backend, AT)
215198 end
216199 @testset " shfl_down(::$T )" for T in KI. shfl_down_types (backend ())
217200 N = KI. sub_group_size (backend ())
218- a = zeros (T, N)
219- rand! (a, (1 : 4 ))
201+ a = ones (T, N)
202+ # a = zeros(T, N)
203+ # rand!(a, (1:4))
220204
221205 dev_a = AT (a)
222206 dev_b = AT (zeros (T, N))
You can’t perform that action at this time.
0 commit comments