11# # gpucompiler interface
22
3- struct OpenCLCompilerParams <: AbstractCompilerParams end
3+ Base. @kwdef struct OpenCLCompilerParams <: AbstractCompilerParams
4+ sub_group_size:: Int
5+ end
6+
47const OpenCLCompilerConfig = CompilerConfig{SPIRVCompilerTarget, OpenCLCompilerParams}
58const OpenCLCompilerJob = CompilerJob{SPIRVCompilerTarget, OpenCLCompilerParams}
69
@@ -19,7 +22,21 @@ GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
1922 in (fn, known_intrinsics) ||
2023 contains (fn, " __spirv_" )
2124
25+ function GPUCompiler. finish_module! (
26+ @nospecialize (job:: OpenCLCompilerJob ),
27+ mod:: LLVM.Module , entry:: LLVM.Function
28+ )
29+ entry = invoke (
30+ GPUCompiler. finish_module!,
31+ Tuple{CompilerJob{SPIRVCompilerTarget}, LLVM. Module, LLVM. Function},
32+ job, mod, entry
33+ )
34+
35+ # Set the subgroup size
36+ metadata (entry)[" intel_reqd_sub_group_size" ] = MDNode ([ConstantInt (Int32 (job. config. params. sub_group_size))])
2237
38+ return entry
39+ end
2340# # compiler implementation (cache, configure, compile, and link)
2441
2542# cache of compilation caches, per context
@@ -45,14 +62,17 @@ function compiler_config(dev::cl.Device; kwargs...)
4562 end
4663 return config
4764end
48- @noinline function _compiler_config (dev; kernel = true , name = nothing , always_inline = false , kwargs... )
65+ @noinline function _compiler_config (dev; kernel = true , name = nothing , always_inline = false , sub_group_size = 32 , kwargs... )
4966 supports_fp16 = " cl_khr_fp16" in dev. extensions
5067 supports_fp64 = " cl_khr_fp64" in dev. extensions
5168
69+ if sub_group_size ∉ dev. sub_group_sizes
70+ @error (" $sub_group_size is not a valid sub-group size for this device." )
71+ end
5272
5373 # create GPUCompiler objects
5474 target = SPIRVCompilerTarget (; supports_fp16, supports_fp64, kwargs... )
55- params = OpenCLCompilerParams ()
75+ params = OpenCLCompilerParams (; sub_group_size )
5676 return CompilerConfig (target, params; kernel, name, always_inline)
5777end
5878
0 commit comments