Skip to content

Commit 6f4a517

Browse files
committed
Allow setting sub-group size
1 parent 49c63a2 commit 6f4a517

2 files changed

Lines changed: 6 additions & 3 deletions

File tree

src/pocl/compiler/compilation.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,17 @@ function compiler_config(dev::cl.Device; kwargs...)
6767
end
6868
return config
6969
end
70-
@noinline function _compiler_config(dev; kernel = true, name = nothing, always_inline = false, kwargs...)
70+
@noinline function _compiler_config(dev; kernel = true, name = nothing, always_inline = false, sub_group_size = 32, kwargs...)
7171
supports_fp16 = "cl_khr_fp16" in dev.extensions
7272
supports_fp64 = "cl_khr_fp64" in dev.extensions
7373

74+
if sub_group_size dev.sub_group_sizes
75+
@error("$sub_group_size is not a valid sub-group size for this device.")
76+
end
7477

7578
# create GPUCompiler objects
7679
target = SPIRVCompilerTarget(; supports_fp16, supports_fp64, kwargs...)
77-
params = OpenCLCompilerParams(; sub_group_size=32)
80+
params = OpenCLCompilerParams(; sub_group_size)
7881
return CompilerConfig(target, params; kernel, name, always_inline)
7982
end
8083

src/pocl/compiler/execution.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export @opencl, clfunction, clconvert
44
## high-level @opencl interface
55

66
const MACRO_KWARGS = [:launch]
7-
const COMPILER_KWARGS = [:kernel, :name, :always_inline]
7+
const COMPILER_KWARGS = [:kernel, :name, :always_inline, :sub_group_size]
88
const LAUNCH_KWARGS = [:global_size, :local_size, :queue]
99

1010
macro opencl(ex...)

0 commit comments

Comments
 (0)