From a349b8f8dc0c0db13becbd3aa1f67e8fc527b782 Mon Sep 17 00:00:00 2001 From: sungwoochoi Date: Fri, 17 Apr 2026 16:37:02 +0900 Subject: [PATCH 1/3] seperating GPU module from CPU module Optimization --- codon/cir/llvm/gpu.cpp | 7 +++++-- codon/cir/llvm/gpu.h | 4 +++- codon/cir/llvm/optimize.cpp | 7 ++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/codon/cir/llvm/gpu.cpp b/codon/cir/llvm/gpu.cpp index 51ebc0ccb..e516b5a20 100644 --- a/codon/cir/llvm/gpu.cpp +++ b/codon/cir/llvm/gpu.cpp @@ -945,8 +945,7 @@ void patchPTXVar(llvm::Module *M, llvm::GlobalValue *ptxVar, } } // namespace -void applyGPUTransformations(llvm::Module *M, const std::string &ptxFilename) { - llvm::LLVMContext &context = M->getContext(); +std::unique_ptr prepareGPUmodule(llvm::Module *M){ std::unique_ptr clone = llvm::CloneModule(*M); clone->setTargetTriple(llvm::Triple::normalize(GPU_TRIPLE)); clone->setDataLayout(GPU_DL); @@ -954,7 +953,11 @@ void applyGPUTransformations(llvm::Module *M, const std::string &ptxFilename) { clone->addModuleFlag(llvm::Module::ModFlagBehavior::Override, "nvvm-reflect-ftz", 1); } + return clone; +} +void applyGPUTransformations(llvm::Module *M, std::unique_ptr clone, const std::string &ptxFilename) { + llvm::LLVMContext &context = M->getContext(); llvm::NamedMDNode *nvvmAnno = clone->getOrInsertNamedMetadata("nvvm.annotations"); std::vector kernelCandidates; std::vector kernels; diff --git a/codon/cir/llvm/gpu.h b/codon/cir/llvm/gpu.h index 0a9c0734c..880e2fe4f 100644 --- a/codon/cir/llvm/gpu.h +++ b/codon/cir/llvm/gpu.h @@ -15,7 +15,9 @@ namespace ir { /// annotation) /// @param ptxFilename Filename for output PTX code; empty to use filename based on /// module -void applyGPUTransformations(llvm::Module *module, const std::string &ptxFilename = ""); + +std::unique_ptr prepareGPUmodule(llvm::Module *module); +void applyGPUTransformations(llvm::Module *module, std::unique_ptr clone, const std::string &ptxFilename = ""); } // namespace ir } // namespace codon diff --git a/codon/cir/llvm/optimize.cpp b/codon/cir/llvm/optimize.cpp index c0224f75e..b96a66d76 100644 --- a/codon/cir/llvm/optimize.cpp +++ b/codon/cir/llvm/optimize.cpp @@ -1074,6 +1074,11 @@ void verify(llvm::Module *module) { void optimize(llvm::Module *module, bool debug, bool jit, PluginManager *plugins) { verify(module); + std::unique_ptr GPUmodule; + { + TIME("preparing/gpu"); + GPUmodule = prepareGPUmodule(module); + } { TIME("llvm/opt1"); runLLVMOptimizationPasses(module, debug, jit, plugins); @@ -1084,7 +1089,7 @@ void optimize(llvm::Module *module, bool debug, bool jit, PluginManager *plugins } { TIME("llvm/gpu"); - applyGPUTransformations(module); + applyGPUTransformations(module, std::move(GPUmodule)); } verify(module); } From fe5136cad724f158bdf9d0c0cd92c6ba92c37137 Mon Sep 17 00:00:00 2001 From: sungwoochoi Date: Mon, 20 Apr 2026 09:17:40 +0900 Subject: [PATCH 2/3] apply optimization for gpu module --- codon/cir/llvm/optimize.cpp | 68 +++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/codon/cir/llvm/optimize.cpp b/codon/cir/llvm/optimize.cpp index b96a66d76..79372daef 100644 --- a/codon/cir/llvm/optimize.cpp +++ b/codon/cir/llvm/optimize.cpp @@ -995,28 +995,9 @@ llvm::cl::opt llvm::cl::desc("Disable architecture-specific optimizations"), llvm::cl::init(false)); -void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit, - PluginManager *plugins) { - applyDebugTransformations(module, debug, jit); - applyFastMathTransformations(module); - - llvm::LoopAnalysisManager lam; - llvm::FunctionAnalysisManager fam; - llvm::CGSCCAnalysisManager cgam; - llvm::ModuleAnalysisManager mam; - auto machine = getTargetMachine(module, /*setFunctionAttributes=*/true); - llvm::PassBuilder pb(machine.get()); - - llvm::Triple moduleTriple(module->getTargetTriple()); - llvm::TargetLibraryInfoImpl tlii(moduleTriple); - fam.registerPass([&] { return llvm::TargetLibraryAnalysis(tlii); }); - - pb.registerModuleAnalyses(mam); - pb.registerCGSCCAnalyses(cgam); - pb.registerFunctionAnalyses(fam); - pb.registerLoopAnalyses(lam); - pb.crossRegisterProxies(lam, fam, cgam, mam); - +void registerCodonLLVMOptimizationPasses(llvm::PassBuilder &pb, bool debug, + PluginManager *plugins, bool includeNative, + bool includePlugins) { pb.registerLateLoopOptimizationsEPCallback( [&](llvm::LoopPassManager &pm, llvm::OptimizationLevel opt) { if (opt.isOptimizingForSpeed()) @@ -1035,14 +1016,43 @@ void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit, } }); - if (!DisableNative) + if (!DisableNative && includeNative) addNativeLLVMPasses(&pb); - if (plugins) { + if (includePlugins && plugins) { for (auto *plugin : *plugins) { plugin->dsl->addLLVMPasses(&pb, debug); } } +} + +void runLLVMOptimizationPasses(llvm::Module *module, bool debug, bool jit, + PluginManager *plugins, bool includeNative, + bool includePlugins) { + applyDebugTransformations(module, debug, jit); + applyFastMathTransformations(module); + + llvm::LoopAnalysisManager lam; + llvm::FunctionAnalysisManager fam; + llvm::CGSCCAnalysisManager cgam; + llvm::ModuleAnalysisManager mam; + auto machine = + includeNative ? getTargetMachine(module, /*setFunctionAttributes=*/true) + : std::unique_ptr(); + llvm::PassBuilder pb(machine.get()); + + llvm::Triple moduleTriple(module->getTargetTriple()); + llvm::TargetLibraryInfoImpl tlii(moduleTriple); + fam.registerPass([&] { return llvm::TargetLibraryAnalysis(tlii); }); + + pb.registerModuleAnalyses(mam); + pb.registerCGSCCAnalyses(cgam); + pb.registerFunctionAnalyses(fam); + pb.registerLoopAnalyses(lam); + pb.crossRegisterProxies(lam, fam, cgam, mam); + + registerCodonLLVMOptimizationPasses(pb, debug, plugins, includeNative, + includePlugins); if (debug) { llvm::ModulePassManager mpm = @@ -1081,11 +1091,17 @@ void optimize(llvm::Module *module, bool debug, bool jit, PluginManager *plugins } { TIME("llvm/opt1"); - runLLVMOptimizationPasses(module, debug, jit, plugins); + runLLVMOptimizationPasses(module, debug, jit, plugins, true, true); } if (!debug) { TIME("llvm/opt2"); - runLLVMOptimizationPasses(module, debug, jit, plugins); + runLLVMOptimizationPasses(module, debug, jit, plugins, true, true); + } + { + TIME("llvm/gpuopt"); + runLLVMOptimizationPasses(GPUmodule.get(), debug, jit, plugins, + /*includeNative=*/false, + /*includePlugins=*/false); } { TIME("llvm/gpu"); From 3f112b26f87030c504240ada3a31c6619952e1e8 Mon Sep 17 00:00:00 2001 From: sungwoochoi Date: Mon, 20 Apr 2026 10:49:23 +0900 Subject: [PATCH 3/3] apply opt twice --- codon/cir/llvm/optimize.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/codon/cir/llvm/optimize.cpp b/codon/cir/llvm/optimize.cpp index 79372daef..ebdb5c688 100644 --- a/codon/cir/llvm/optimize.cpp +++ b/codon/cir/llvm/optimize.cpp @@ -1098,7 +1098,13 @@ void optimize(llvm::Module *module, bool debug, bool jit, PluginManager *plugins runLLVMOptimizationPasses(module, debug, jit, plugins, true, true); } { - TIME("llvm/gpuopt"); + TIME("llvm/gpuopt1"); + runLLVMOptimizationPasses(GPUmodule.get(), debug, jit, plugins, + /*includeNative=*/false, + /*includePlugins=*/false); + } + { + TIME("llvm/gpuopt2"); runLLVMOptimizationPasses(GPUmodule.get(), debug, jit, plugins, /*includeNative=*/false, /*includePlugins=*/false);