From b94fcb4aa33b7a3fb174f387920c47c544d9a42b Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 18 Jul 2024 12:34:58 +0200 Subject: [PATCH 01/17] Implement syevd_batch and heevd_batch --- dpnp/backend/extensions/lapack/CMakeLists.txt | 2 + .../extensions/lapack/common_helpers.hpp | 9 + .../extensions/lapack/evd_batch_common.hpp | 174 +++++++++++++++ .../backend/extensions/lapack/heevd_batch.cpp | 207 ++++++++++++++++++ .../backend/extensions/lapack/heevd_batch.hpp | 38 ++++ dpnp/backend/extensions/lapack/lapack_py.cpp | 5 + .../backend/extensions/lapack/syevd_batch.cpp | 206 +++++++++++++++++ .../backend/extensions/lapack/syevd_batch.hpp | 38 ++++ dpnp/linalg/dpnp_utils_linalg.py | 103 ++++----- 9 files changed, 725 insertions(+), 57 deletions(-) create mode 100644 dpnp/backend/extensions/lapack/evd_batch_common.hpp create mode 100644 dpnp/backend/extensions/lapack/heevd_batch.cpp create mode 100644 dpnp/backend/extensions/lapack/heevd_batch.hpp create mode 100644 dpnp/backend/extensions/lapack/syevd_batch.cpp create mode 100644 dpnp/backend/extensions/lapack/syevd_batch.hpp diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index f21f61c84dfd..03fceb05482e 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -36,11 +36,13 @@ set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/heevd_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/orgqr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/orgqr_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/syevd_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ungqr.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ungqr_batch.cpp ) diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp index 0c818e74664f..ce4c416f27fe 100644 --- a/dpnp/backend/extensions/lapack/common_helpers.hpp +++ b/dpnp/backend/extensions/lapack/common_helpers.hpp @@ -24,6 +24,7 @@ //***************************************************************************** #pragma once +#include #include #include @@ -40,4 +41,12 @@ struct value_type_of> { using type = T; }; + +// Rounds up the number `value` to the nearest multiple of `mult`. +template +intT round_up_mult(intT value, intT mult) +{ + intT q = (value + (mult - 1)) / mult; + return q * mult; +} } // namespace dpnp::extensions::lapack::helper diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp new file mode 100644 index 000000000000..0de1b2fd9796 --- /dev/null +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -0,0 +1,174 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "types_matrix.hpp" + +namespace dpnp::extensions::lapack::evd +{ +typedef sycl::event (*evd_batch_impl_fn_ptr_t)( + sycl::queue &, + const oneapi::mkl::job, + const oneapi::mkl::uplo, + const std::int64_t, + const std::int64_t, + char *, + char *, + const std::vector &); + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace py = pybind11; + +template +std::pair + evd_batch_func(sycl::queue &exec_q, + const std::int8_t jobz, + const std::int8_t upper_lower, + dpctl::tensor::usm_ndarray &eig_vecs, + dpctl::tensor::usm_ndarray &eig_vals, + const std::vector &depends, + const dispatchT &evd_batch_dispatch_table) +{ + const int eig_vecs_nd = eig_vecs.get_ndim(); + const int eig_vals_nd = eig_vals.get_ndim(); + + if (eig_vecs_nd != 3) { + throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + + " of an output array with eigenvectors"); + } + else if (eig_vals_nd != 2) { + throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + + " of an output array with eigenvalues"); + } + + const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw(); + const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw(); + + if (eig_vecs_shape[0] != eig_vecs_shape[1]) { + throw py::value_error("The batch shape of 'eig_vecs' must be square."); + } + else if (eig_vecs_shape[2] != eig_vals_shape[0] || + eig_vecs_shape[0] != eig_vals_shape[1]) + { + throw py::value_error( + "The shape of 'eig_vals' must be (batch_size, n), " + "where batch_size = " + + std::to_string(eig_vecs_shape[0]) + + " and n = " + std::to_string(eig_vecs_shape[1])); + } + + size_t src_nelems(1); + + for (int i = 0; i < eig_vecs_nd; ++i) { + src_nelems *= static_cast(eig_vecs_shape[i]); + } + + if (src_nelems == 0) { + // nothing to do + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs); + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals); + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(eig_vecs, eig_vals)) { + throw py::value_error("Arrays with eigenvectors and eigenvalues are " + "overlapping segments of memory"); + } + + bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); + bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); + if (!is_eig_vecs_f_contig) { + throw py::value_error( + "An array with input matrix / output eigenvectors " + "must be F-contiguous"); + } + else if (!is_eig_vals_c_contig) { + throw py::value_error( + "An array with output eigenvalues must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int eig_vecs_type_id = + array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); + int eig_vals_type_id = + array_types.typenum_to_lookup_id(eig_vals.get_typenum()); + + evd_batch_impl_fn_ptr_t evd_batch_fn = + evd_batch_dispatch_table[eig_vecs_type_id][eig_vals_type_id]; + if (evd_batch_fn == nullptr) { + throw py::value_error( + "Types of input vectors and result array are mismatched."); + } + + char *eig_vecs_data = eig_vecs.get_data(); + char *eig_vals_data = eig_vals.get_data(); + + const std::int64_t batch_size = eig_vecs_shape[2]; + const std::int64_t n = eig_vecs_shape[1]; + + const oneapi::mkl::job jobz_val = static_cast(jobz); + const oneapi::mkl::uplo uplo_val = + static_cast(upper_lower); + + sycl::event evd_batch_ev = + evd_batch_fn(exec_q, jobz_val, uplo_val, batch_size, n, eig_vecs_data, + eig_vals_data, depends); + + sycl::event ht_ev = dpctl::utils::keep_args_alive( + exec_q, {eig_vecs, eig_vals}, {evd_batch_ev}); + + return std::make_pair(ht_ev, evd_batch_ev); +} + +template + typename factoryT> +void init_evd_batch_dispatch_table( + dispatchT evd_batch_dispatch_table[][dpctl_td_ns::num_types]) +{ + dpctl_td_ns::DispatchTableBuilder + contig; + contig.populate_dispatch_table(evd_batch_dispatch_table); +} +} // namespace dpnp::extensions::lapack::evd diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp new file mode 100644 index 000000000000..d447f27b5812 --- /dev/null +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -0,0 +1,207 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "heevd_batch.hpp" +#include "common_helpers.hpp" + +namespace dpnp::extensions::lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace type_utils = dpctl::tensor::type_utils; + +template +static sycl::event heevd_batch_impl(sycl::queue &exec_q, + const oneapi::mkl::job jobz, + const oneapi::mkl::uplo upper_lower, + const std::int64_t batch_size, + const std::int64_t n, + char *in_a, + char *out_w, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + RealT *w = reinterpret_cast(out_w); + + std::int64_t a_size = n * n; + std::int64_t w_size = n; + + const std::int64_t lda = std::max(1UL, n); + + // Get the number of independent linear streams + std::int64_t n_linear_streams = + (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1)); + + const std::int64_t scratchpad_size = + mkl_lapack::heevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); + T *scratchpad = nullptr; + + // Get padding size to ensure memory allocations are aligned to 256 bytes + // for better performance + std::int64_t padding = 256 / sizeof(T); + + if (scratchpad_size > 0) { + // Calculate the total scratchpad memory size needed for all linear + // streams with proper alignment + size_t alloc_scratch_size = + helper::round_up_mult(n_linear_streams * scratchpad_size, padding); + + // Allocate memory for the total scratchpad + scratchpad = sycl::malloc_device(alloc_scratch_size, exec_q); + if (!scratchpad) + throw std::runtime_error("Device allocation for scratchpad failed"); + } + + // Computation events to manage dependencies for each linear stream + std::vector> comp_evs(n_linear_streams, depends); + + std::stringstream error_msg; + std::int64_t info = 0; + + // Release GIL to avoid serialization of host task + // submissions to the same queue in OneMKL + py::gil_scoped_release release; + + for (std::int64_t batch_id = 0; batch_id < batch_size; ++batch_id) { + T *a_batch = a + batch_id * a_size; + RealT *w_batch = w + batch_id * w_size; + + std::int64_t stream_id = (batch_id % n_linear_streams); + + T *current_scratch_heevd = nullptr; + if (scratchpad != nullptr) { + current_scratch_heevd = scratchpad + stream_id * scratchpad_size; + } + + // Get the event dependencies for the current stream + const auto ¤t_dep = comp_evs[stream_id]; + + sycl::event heevd_event; + try { + heevd_event = mkl_lapack::heevd( + exec_q, + jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors + // are computed. + upper_lower, // 'upper_lower == job::upper' means the upper + // triangular part of A, or the lower triangular + // otherwise + n, // The order of the matrix A (0 <= n) + a_batch, // Pointer to the square A (n x n) + // If 'jobz == job::vec', then on exit it will + // contain the eigenvectors of A + lda, // The leading dimension of A, must be at least max(1, n) + w_batch, // Pointer to array of size at least n, it will contain + // the eigenvalues of A in ascending order + current_scratch_heevd, // Pointer to scratchpad memory to be + // used by MKL routine for storing + // intermediate results + scratchpad_size, current_dep); + } catch (mkl_lapack::exception const &e) { + error_msg << "Unexpected MKL exception caught during heevd() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + info = e.info(); + } catch (sycl::exception const &e) { + error_msg + << "Unexpected SYCL exception caught during heevd() call:\n" + << e.what(); + info = -1; + } + + // Update the event dependencies for the current stream + comp_evs[stream_id] = {heevd_event}; + } + + if (info != 0) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) { + for (const auto &ev : comp_evs) { + cgh.depends_on(ev); + } + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + + return ht_ev; +} + +template +struct HeevdBatchContigFactory +{ + fnT get() + { + if constexpr (types::HeevdTypePairSupportFactory::is_defined) + { + return heevd_batch_impl; + } + else { + return nullptr; + } + } +}; + +using evd::evd_batch_impl_fn_ptr_t; + +void init_heevd_batch(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + static evd_batch_impl_fn_ptr_t + heevd_batch_dispatch_table[dpctl_td_ns::num_types] + [dpctl_td_ns::num_types]; + + { + evd::init_evd_batch_dispatch_table( + heevd_batch_dispatch_table); + + auto heevd_batch_pyapi = + [&](sycl::queue &exec_q, const std::int8_t jobz, + const std::int8_t upper_lower, arrayT &eig_vecs, + arrayT &eig_vals, const event_vecT &depends = {}) { + return evd::evd_batch_func(exec_q, jobz, upper_lower, eig_vecs, + eig_vals, depends, + heevd_batch_dispatch_table); + }; + m.def( + "_heevd_batch", heevd_batch_pyapi, + "Call `heevd` from OneMKL LAPACK library in a loop to return " + "the eigenvalues and eigenvectors of a batch of complex Hermitian " + "matrices", + py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + py::arg("eig_vecs"), py::arg("eig_vals"), + py::arg("depends") = py::list()); + } +} +} // namespace dpnp::extensions::lapack diff --git a/dpnp/backend/extensions/lapack/heevd_batch.hpp b/dpnp/backend/extensions/lapack/heevd_batch.hpp new file mode 100644 index 000000000000..3a3fa4bab991 --- /dev/null +++ b/dpnp/backend/extensions/lapack/heevd_batch.hpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include "evd_batch_common.hpp" + +namespace py = pybind11; + +namespace dpnp::extensions::lapack +{ +void init_heevd_batch(py::module_ m); +} // namespace dpnp::extensions::lapack diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index b854a3136a9c..7afa67e84eca 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -37,10 +37,12 @@ #include "getri.hpp" #include "getrs.hpp" #include "heevd.hpp" +#include "heevd_batch.hpp" #include "linalg_exceptions.hpp" #include "orgqr.hpp" #include "potrf.hpp" #include "syevd.hpp" +#include "syevd_batch.hpp" #include "ungqr.hpp" namespace lapack_ext = dpnp::extensions::lapack; @@ -83,6 +85,9 @@ PYBIND11_MODULE(_lapack_impl, m) lapack_ext::init_heevd(m); lapack_ext::init_syevd(m); + lapack_ext::init_heevd_batch(m); + lapack_ext::init_syevd_batch(m); + m.def("_geqrf_batch", &lapack_ext::geqrf_batch, "Call `geqrf_batch` from OneMKL LAPACK library to return " "the QR factorization of a batch general matrix ", diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp new file mode 100644 index 000000000000..843c129241c9 --- /dev/null +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -0,0 +1,206 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "syevd_batch.hpp" +#include "common_helpers.hpp" + +namespace dpnp::extensions::lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace type_utils = dpctl::tensor::type_utils; + +template +static sycl::event syevd_batch_impl(sycl::queue &exec_q, + const oneapi::mkl::job jobz, + const oneapi::mkl::uplo upper_lower, + const std::int64_t batch_size, + const std::int64_t n, + char *in_a, + char *out_w, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + RealT *w = reinterpret_cast(out_w); + + std::int64_t a_size = n * n; + std::int64_t w_size = n; + + const std::int64_t lda = std::max(1UL, n); + + // Get the number of independent linear streams + std::int64_t n_linear_streams = + (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1)); + + const std::int64_t scratchpad_size = + mkl_lapack::syevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); + T *scratchpad = nullptr; + + // Get padding size to ensure memory allocations are aligned to 256 bytes + // for better performance + std::int64_t padding = 256 / sizeof(T); + + if (scratchpad_size > 0) { + // Calculate the total scratchpad memory size needed for all linear + // streams with proper alignment + size_t alloc_scratch_size = + helper::round_up_mult(n_linear_streams * scratchpad_size, padding); + + // Allocate memory for the total scratchpad + scratchpad = sycl::malloc_device(alloc_scratch_size, exec_q); + if (!scratchpad) + throw std::runtime_error("Device allocation for scratchpad failed"); + } + + // Computation events to manage dependencies for each linear stream + std::vector> comp_evs(n_linear_streams, depends); + + std::stringstream error_msg; + std::int64_t info = 0; + + // Release GIL to avoid serialization of host task + // submissions to the same queue in OneMKL + py::gil_scoped_release release; + + for (std::int64_t batch_id = 0; batch_id < batch_size; ++batch_id) { + T *a_batch = a + batch_id * a_size; + RealT *w_batch = w + batch_id * w_size; + + std::int64_t stream_id = (batch_id % n_linear_streams); + + T *current_scratch_syevd = nullptr; + if (scratchpad != nullptr) { + current_scratch_syevd = scratchpad + stream_id * scratchpad_size; + } + + // Get the event dependencies for the current stream + const auto ¤t_dep = comp_evs[stream_id]; + + sycl::event syevd_event; + try { + syevd_event = mkl_lapack::syevd( + exec_q, + jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors + // are computed. + upper_lower, // 'upper_lower == job::upper' means the upper + // triangular part of A, or the lower triangular + // otherwise + n, // The order of the matrix A (0 <= n) + a_batch, // Pointer to the square A (n x n) + // If 'jobz == job::vec', then on exit it will + // contain the eigenvectors of A + lda, // The leading dimension of A, must be at least max(1, n) + w_batch, // Pointer to array of size at least n, it will contain + // the eigenvalues of A in ascending order + current_scratch_syevd, // Pointer to scratchpad memory to be + // used by MKL routine for storing + // intermediate results + scratchpad_size, current_dep); + } catch (mkl_lapack::exception const &e) { + error_msg << "Unexpected MKL exception caught during syevd() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + info = e.info(); + } catch (sycl::exception const &e) { + error_msg + << "Unexpected SYCL exception caught during syevd() call:\n" + << e.what(); + info = -1; + } + + // Update the event dependencies for the current stream + comp_evs[stream_id] = {syevd_event}; + } + + if (info != 0) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) { + for (const auto &ev : comp_evs) { + cgh.depends_on(ev); + } + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + + return ht_ev; +} + +template +struct SyevdBatchContigFactory +{ + fnT get() + { + if constexpr (types::SyevdTypePairSupportFactory::is_defined) + { + return syevd_batch_impl; + } + else { + return nullptr; + } + } +}; + +using evd::evd_batch_impl_fn_ptr_t; + +void init_syevd_batch(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + static evd_batch_impl_fn_ptr_t + syevd_batch_dispatch_table[dpctl_td_ns::num_types] + [dpctl_td_ns::num_types]; + + { + evd::init_evd_batch_dispatch_table( + syevd_batch_dispatch_table); + + auto syevd_batch_pyapi = + [&](sycl::queue &exec_q, const std::int8_t jobz, + const std::int8_t upper_lower, arrayT &eig_vecs, + arrayT &eig_vals, const event_vecT &depends = {}) { + return evd::evd_batch_func(exec_q, jobz, upper_lower, eig_vecs, + eig_vals, depends, + syevd_batch_dispatch_table); + }; + m.def("_syevd_batch", syevd_batch_pyapi, + "Call `syevd` from OneMKL LAPACK library in a loop to return " + "the eigenvalues and eigenvectors of a batch of real symmetric " + "matrices", + py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), + py::arg("eig_vecs"), py::arg("eig_vals"), + py::arg("depends") = py::list()); + } +} +} // namespace dpnp::extensions::lapack diff --git a/dpnp/backend/extensions/lapack/syevd_batch.hpp b/dpnp/backend/extensions/lapack/syevd_batch.hpp new file mode 100644 index 000000000000..04e1a3c035fc --- /dev/null +++ b/dpnp/backend/extensions/lapack/syevd_batch.hpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include "evd_batch_common.hpp" + +namespace py = pybind11; + +namespace dpnp::extensions::lapack +{ +void init_syevd_batch(py::module_ m); +} // namespace dpnp::extensions::lapack diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 10e93db2e1a6..2ed24ff3251e 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -96,22 +96,6 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type): """ - is_cpu_device = a.sycl_device.has_aspect_cpu - orig_shape = a.shape - # get 3d input array by reshape - a = dpnp.reshape(a, (-1, orig_shape[-2], orig_shape[-1])) - a_usm_arr = dpnp.get_usm_ndarray(a) - - # allocate a memory for dpnp array of eigenvalues - w = dpnp.empty_like( - a, - shape=orig_shape[:-1], - dtype=w_type, - ) - w_orig_shape = w.shape - # get 2d dpnp array with eigenvalues by reshape - w = w.reshape(-1, w_orig_shape[-1]) - # `eigen_mode` can be either "N" or "V", specifying the computation mode # for OneMKL LAPACK `syevd` and `heevd` routines. # "V" (default) means both eigenvectors and eigenvalues will be calculated @@ -119,62 +103,67 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type): jobz = _jobz[eigen_mode] uplo = _upper_lower[UPLO] - # Get LAPACK function (_syevd for real or _heevd for complex data types) + # Get LAPACK function (_syevd_batch for real or _heevd_batch + # for complex data types) # to compute all eigenvalues and, optionally, all eigenvectors lapack_func = ( - "_heevd" if dpnp.issubdtype(v_type, dpnp.complexfloating) else "_syevd" + "_heevd_batch" + if dpnp.issubdtype(v_type, dpnp.complexfloating) + else "_syevd_batch" ) a_sycl_queue = a.sycl_queue - a_order = "C" if a.flags.c_contiguous else "F" + + a_orig_shape = a.shape + a_orig_order = "C" if a.flags.c_contiguous else "F" + # get 3d input array by reshape + a = dpnp.reshape(a, (-1, a_orig_shape[-2], a_orig_shape[-1])) _manager = dpu.SequentialOrderManager[a_sycl_queue] + dep_evs = _manager.submitted_events - # need to loop over the 1st dimension to get eigenvalues and - # eigenvectors of 3d matrix A - batch_size = a.shape[0] - eig_vecs = [None] * batch_size - for i in range(batch_size): - # oneMKL LAPACK assumes fortran-like array as input, so - # allocate a memory with 'F' order for dpnp array of eigenvectors - eig_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=v_type) + # Reorder the elements by moving the last two axes of `a` to the front + # to match fortran-like array order which is assumed by syevd/heevd. + a = dpnp.moveaxis(a, (-2, -1), (0, 1)) + a_usm_arr = dpnp.get_usm_ndarray(a) - # use DPCTL tensor function to fill the array of eigenvectors with - # content of input array - ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=a_usm_arr[i], - dst=eig_vecs[i].get_array(), - sycl_queue=a_sycl_queue, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(ht_ev, copy_ev) + a_copy = dpnp.empty_like(a, dtype=v_type, order="F") + ht_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, + dst=a_copy.get_array(), + sycl_queue=a_sycl_queue, + depends=dep_evs, + ) + _manager.add_event_pair(ht_ev, a_copy_ev) - # TODO: Remove this w/a when MKLD-17201 is solved. - # Waiting for a host task executing an OneMKL LAPACK syevd/heevd call - # on CPU causes deadlock due to serialization of all host tasks - # in the queue. - # We need to wait for each host tasks before calling _seyvd and _heevd - # to avoid deadlock. - if is_cpu_device: - dpnp.synchronize_array_data(a) + # allocate a memory for dpnp array of eigenvalues + w = dpnp.empty_like(a, shape=a_orig_shape[:-1], dtype=w_type) + w_orig_shape = w.shape - # call LAPACK extension function to get eigenvalues and - # eigenvectors of a portion of matrix A - ht_ev, lapack_ev = getattr(li, lapack_func)( - a_sycl_queue, - jobz, - uplo, - eig_vecs[i].get_array(), - w[i].get_array(), - depends=[copy_ev], - ) - _manager.add_event_pair(ht_ev, lapack_ev) + # get 2d dpnp array with eigenvalues by reshape + w = w.reshape(-1, w_orig_shape[-1]) + + ht_ev, evd_batch_ev = getattr(li, lapack_func)( + a_sycl_queue, + jobz, + uplo, + a_copy.get_array(), + w.get_array(), + depends=[a_copy_ev], + ) + + _manager.add_event_pair(ht_ev, evd_batch_ev) w = w.reshape(w_orig_shape) if eigen_mode == "V": - # combine the list of eigenvectors into a single array - v = dpnp.array(eig_vecs, order=a_order).reshape(orig_shape) + # syevd/heevd call overwtires `a` in Fortran order, reorder the axes + # to match C order by moving the last axis to the front and + # reshape it back to the original shape of `a`. + v = dpnp.moveaxis(a_copy, -1, 0).reshape(a_orig_shape) + # Convert to contiguous to align with NumPy + if a_orig_order == "C": + v = dpnp.ascontiguousarray(v) return w, v return w From 2da4e5992bae3053c91e4b99b1e2bbe0f9b6699a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 11:31:57 +0200 Subject: [PATCH 02/17] Move include dpctl type_utils header to sourse files --- dpnp/backend/extensions/lapack/evd_batch_common.hpp | 1 - dpnp/backend/extensions/lapack/evd_common.hpp | 1 - dpnp/backend/extensions/lapack/heevd.cpp | 3 +++ dpnp/backend/extensions/lapack/heevd_batch.cpp | 3 +++ dpnp/backend/extensions/lapack/syevd.cpp | 3 +++ dpnp/backend/extensions/lapack/syevd_batch.cpp | 3 +++ 6 files changed, 12 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index 0de1b2fd9796..c22f0bb1c953 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -32,7 +32,6 @@ #include "utils/memory_overlap.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" -#include "utils/type_utils.hpp" #include "types_matrix.hpp" diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index 9ddb20fb8faa..b15deeaeaa0b 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -32,7 +32,6 @@ #include "utils/memory_overlap.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" -#include "utils/type_utils.hpp" #include "types_matrix.hpp" diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp index 285926bb5986..6d246819f0d6 100644 --- a/dpnp/backend/extensions/lapack/heevd.cpp +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -25,6 +25,9 @@ #include "heevd.hpp" +// dpctl tensor headers +#include "utils/type_utils.hpp" + namespace dpnp::extensions::lapack { namespace mkl_lapack = oneapi::mkl::lapack; diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index d447f27b5812..7d4f0d5cb017 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -26,6 +26,9 @@ #include "heevd_batch.hpp" #include "common_helpers.hpp" +// dpctl tensor headers +#include "utils/type_utils.hpp" + namespace dpnp::extensions::lapack { namespace mkl_lapack = oneapi::mkl::lapack; diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index e30ecf0208d5..527013cf0e62 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -25,6 +25,9 @@ #include "syevd.hpp" +// dpctl tensor headers +#include "utils/type_utils.hpp" + namespace dpnp::extensions::lapack { namespace mkl_lapack = oneapi::mkl::lapack; diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 843c129241c9..bdbd2b7f16bc 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -26,6 +26,9 @@ #include "syevd_batch.hpp" #include "common_helpers.hpp" +// dpctl tensor headers +#include "utils/type_utils.hpp" + namespace dpnp::extensions::lapack { namespace mkl_lapack = oneapi::mkl::lapack; From 88249e4245e9e31e46f530ed35ed0138dd131e38 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 11:39:31 +0200 Subject: [PATCH 03/17] Add memory alocation check for scratchpad --- dpnp/backend/extensions/lapack/heevd.cpp | 7 +++++-- dpnp/backend/extensions/lapack/syevd.cpp | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp index 6d246819f0d6..9169196de9fa 100644 --- a/dpnp/backend/extensions/lapack/heevd.cpp +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -54,13 +54,16 @@ static sycl::event heevd_impl(sycl::queue &exec_q, mkl_lapack::heevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); T *scratchpad = nullptr; + // Allocate memory for the scratchpad + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + if (!scratchpad) + throw std::runtime_error("Device allocation for scratchpad failed"); + std::stringstream error_msg; std::int64_t info = 0; sycl::event heevd_event; try { - scratchpad = sycl::malloc_device(scratchpad_size, exec_q); - heevd_event = mkl_lapack::heevd( exec_q, jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index 527013cf0e62..f1413f758050 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -54,13 +54,16 @@ static sycl::event syevd_impl(sycl::queue &exec_q, mkl_lapack::syevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); T *scratchpad = nullptr; + // Allocate memory for the scratchpad + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + if (!scratchpad) + throw std::runtime_error("Device allocation for scratchpad failed"); + std::stringstream error_msg; std::int64_t info = 0; sycl::event syevd_event; try { - scratchpad = sycl::malloc_device(scratchpad_size, exec_q); - syevd_event = mkl_lapack::syevd( exec_q, jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are From d222704bf1eec3a6682006d9494886bd8b810256 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 12:23:30 +0200 Subject: [PATCH 04/17] Add more checks for scratchpad_size --- dpnp/backend/extensions/lapack/heevd.cpp | 9 ++++++++- dpnp/backend/extensions/lapack/heevd_batch.cpp | 6 ++++++ dpnp/backend/extensions/lapack/syevd.cpp | 9 ++++++++- dpnp/backend/extensions/lapack/syevd_batch.cpp | 6 ++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp index 9169196de9fa..57e0df30f556 100644 --- a/dpnp/backend/extensions/lapack/heevd.cpp +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -52,8 +52,15 @@ static sycl::event heevd_impl(sycl::queue &exec_q, const std::int64_t lda = std::max(1UL, n); const std::int64_t scratchpad_size = mkl_lapack::heevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); - T *scratchpad = nullptr; + if (scratchpad_size <= 0) { + throw std::runtime_error( + "Invalid scratchpad size: must be greater than zero." + "Calculated scratchpad size: " + + std::to_string(scratchpad_size)); + } + + T *scratchpad = nullptr; // Allocate memory for the scratchpad scratchpad = sycl::malloc_device(scratchpad_size, exec_q); if (!scratchpad) diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index 7d4f0d5cb017..78b64ec0a9dd 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -78,6 +78,12 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q, if (!scratchpad) throw std::runtime_error("Device allocation for scratchpad failed"); } + else { + throw std::runtime_error( + "Invalid scratchpad size: must be greater than zero." + "Calculated scratchpad size: " + + std::to_string(scratchpad_size)); + } // Computation events to manage dependencies for each linear stream std::vector> comp_evs(n_linear_streams, depends); diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index f1413f758050..e317877c4b9f 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -52,8 +52,15 @@ static sycl::event syevd_impl(sycl::queue &exec_q, const std::int64_t lda = std::max(1UL, n); const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); - T *scratchpad = nullptr; + if (scratchpad_size <= 0) { + throw std::runtime_error( + "Invalid scratchpad size: must be greater than zero." + "Calculated scratchpad size: " + + std::to_string(scratchpad_size)); + } + + T *scratchpad = nullptr; // Allocate memory for the scratchpad scratchpad = sycl::malloc_device(scratchpad_size, exec_q); if (!scratchpad) diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index bdbd2b7f16bc..0ae6a67884c7 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -78,6 +78,12 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q, if (!scratchpad) throw std::runtime_error("Device allocation for scratchpad failed"); } + else { + throw std::runtime_error( + "Invalid scratchpad size: must be greater than zero." + "Calculated scratchpad size: " + + std::to_string(scratchpad_size)); + } // Computation events to manage dependencies for each linear stream std::vector> comp_evs(n_linear_streams, depends); From 73265c5c751d49244c3eb4395ffc24e10e6a262e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 12:40:57 +0200 Subject: [PATCH 05/17] Move includes --- dpnp/backend/extensions/lapack/heevd.cpp | 3 +++ dpnp/backend/extensions/lapack/heevd.hpp | 3 --- dpnp/backend/extensions/lapack/heevd_batch.cpp | 5 ++++- dpnp/backend/extensions/lapack/heevd_batch.hpp | 3 --- dpnp/backend/extensions/lapack/syevd.cpp | 3 +++ dpnp/backend/extensions/lapack/syevd.hpp | 3 --- dpnp/backend/extensions/lapack/syevd_batch.cpp | 5 ++++- dpnp/backend/extensions/lapack/syevd_batch.hpp | 3 --- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp index 57e0df30f556..ce2b49aedff0 100644 --- a/dpnp/backend/extensions/lapack/heevd.cpp +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -23,6 +23,9 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#include + +#include "evd_common.hpp" #include "heevd.hpp" // dpctl tensor headers diff --git a/dpnp/backend/extensions/lapack/heevd.hpp b/dpnp/backend/extensions/lapack/heevd.hpp index 34f1702af00b..25ed7f0a7a34 100644 --- a/dpnp/backend/extensions/lapack/heevd.hpp +++ b/dpnp/backend/extensions/lapack/heevd.hpp @@ -26,9 +26,6 @@ #pragma once #include -#include - -#include "evd_common.hpp" namespace py = pybind11; diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index 78b64ec0a9dd..905434ed5c71 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -23,8 +23,11 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "heevd_batch.hpp" +#include + #include "common_helpers.hpp" +#include "evd_batch_common.hpp" +#include "heevd_batch.hpp" // dpctl tensor headers #include "utils/type_utils.hpp" diff --git a/dpnp/backend/extensions/lapack/heevd_batch.hpp b/dpnp/backend/extensions/lapack/heevd_batch.hpp index 3a3fa4bab991..668c3e6175c6 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.hpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.hpp @@ -26,9 +26,6 @@ #pragma once #include -#include - -#include "evd_batch_common.hpp" namespace py = pybind11; diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index e317877c4b9f..9b589bbbf49c 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -23,6 +23,9 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#include + +#include "evd_common.hpp" #include "syevd.hpp" // dpctl tensor headers diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp index 1fb2409834f3..9bbb515ffd4d 100644 --- a/dpnp/backend/extensions/lapack/syevd.hpp +++ b/dpnp/backend/extensions/lapack/syevd.hpp @@ -26,9 +26,6 @@ #pragma once #include -#include - -#include "evd_common.hpp" namespace py = pybind11; diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 0ae6a67884c7..963d94b9c3e8 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -23,8 +23,11 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "syevd_batch.hpp" +#include + #include "common_helpers.hpp" +#include "evd_batch_common.hpp" +#include "syevd_batch.hpp" // dpctl tensor headers #include "utils/type_utils.hpp" diff --git a/dpnp/backend/extensions/lapack/syevd_batch.hpp b/dpnp/backend/extensions/lapack/syevd_batch.hpp index 04e1a3c035fc..62d6fbb2e664 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.hpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.hpp @@ -26,9 +26,6 @@ #pragma once #include -#include - -#include "evd_batch_common.hpp" namespace py = pybind11; From 5eaaba479a179f7401e88398309216f9257158a1 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 13:24:04 +0200 Subject: [PATCH 06/17] Allocate memory for w with expected shape --- dpnp/linalg/dpnp_utils_linalg.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 4b4791227ba4..ea42f521aea1 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -118,6 +118,7 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type): a_orig_order = "C" if a.flags.c_contiguous else "F" # get 3d input array by reshape a = dpnp.reshape(a, (-1, a_orig_shape[-2], a_orig_shape[-1])) + a_new_shape = a.shape _manager = dpu.SequentialOrderManager[a_sycl_queue] dep_evs = _manager.submitted_events @@ -136,12 +137,9 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type): ) _manager.add_event_pair(ht_ev, a_copy_ev) - # allocate a memory for dpnp array of eigenvalues - w = dpnp.empty_like(a, shape=a_orig_shape[:-1], dtype=w_type) - w_orig_shape = w.shape - - # get 2d dpnp array with eigenvalues by reshape - w = w.reshape(-1, w_orig_shape[-1]) + w_orig_shape = a_orig_shape[:-1] + # allocate a memory for 2d dpnp array of eigenvalues + w = dpnp.empty_like(a, shape=a_new_shape[:-1], dtype=w_type) ht_ev, evd_batch_ev = getattr(li, lapack_func)( a_sycl_queue, From fca94705e459a3b0ad22a2bd86e1fc36d5dd31b0 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 13:35:53 +0200 Subject: [PATCH 07/17] Applied review comments --- dpnp/backend/extensions/lapack/evd_batch_common.hpp | 8 ++++---- dpnp/backend/extensions/lapack/evd_common.hpp | 8 ++++---- dpnp/backend/extensions/lapack/heevd_batch.cpp | 10 +++++----- dpnp/backend/extensions/lapack/syevd_batch.cpp | 10 +++++----- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index c22f0bb1c953..00b1eed01ccd 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -114,8 +114,8 @@ std::pair "overlapping segments of memory"); } - bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); - bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); + const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); + const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); if (!is_eig_vecs_f_contig) { throw py::value_error( "An array with input matrix / output eigenvectors " @@ -127,9 +127,9 @@ std::pair } auto array_types = dpctl_td_ns::usm_ndarray_types(); - int eig_vecs_type_id = + const int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); - int eig_vals_type_id = + const int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum()); evd_batch_impl_fn_ptr_t evd_batch_fn = diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index b15deeaeaa0b..53b9b2a07847 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -108,8 +108,8 @@ std::pair "overlapping segments of memory"); } - bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); - bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); + const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); + const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); if (!is_eig_vecs_f_contig) { throw py::value_error( "An array with input matrix / output eigenvectors " @@ -121,9 +121,9 @@ std::pair } auto array_types = dpctl_td_ns::usm_ndarray_types(); - int eig_vecs_type_id = + const int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); - int eig_vals_type_id = + const int eig_vals_type_id = array_types.typenum_to_lookup_id(eig_vals.get_typenum()); evd_impl_fn_ptr_t evd_fn = diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index 905434ed5c71..c3b02182d891 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -53,13 +53,13 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q, T *a = reinterpret_cast(in_a); RealT *w = reinterpret_cast(out_w); - std::int64_t a_size = n * n; - std::int64_t w_size = n; + const std::int64_t a_size = n * n; + const std::int64_t w_size = n; const std::int64_t lda = std::max(1UL, n); // Get the number of independent linear streams - std::int64_t n_linear_streams = + const std::int64_t n_linear_streams = (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1)); const std::int64_t scratchpad_size = @@ -68,12 +68,12 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q, // Get padding size to ensure memory allocations are aligned to 256 bytes // for better performance - std::int64_t padding = 256 / sizeof(T); + const std::int64_t padding = 256 / sizeof(T); if (scratchpad_size > 0) { // Calculate the total scratchpad memory size needed for all linear // streams with proper alignment - size_t alloc_scratch_size = + const size_t alloc_scratch_size = helper::round_up_mult(n_linear_streams * scratchpad_size, padding); // Allocate memory for the total scratchpad diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 963d94b9c3e8..b0efd3764a49 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -53,13 +53,13 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q, T *a = reinterpret_cast(in_a); RealT *w = reinterpret_cast(out_w); - std::int64_t a_size = n * n; - std::int64_t w_size = n; + const std::int64_t a_size = n * n; + const std::int64_t w_size = n; const std::int64_t lda = std::max(1UL, n); // Get the number of independent linear streams - std::int64_t n_linear_streams = + const std::int64_t n_linear_streams = (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1)); const std::int64_t scratchpad_size = @@ -68,12 +68,12 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q, // Get padding size to ensure memory allocations are aligned to 256 bytes // for better performance - std::int64_t padding = 256 / sizeof(T); + const std::int64_t padding = 256 / sizeof(T); if (scratchpad_size > 0) { // Calculate the total scratchpad memory size needed for all linear // streams with proper alignment - size_t alloc_scratch_size = + const size_t alloc_scratch_size = helper::round_up_mult(n_linear_streams * scratchpad_size, padding); // Allocate memory for the total scratchpad From e52305604a0241fe5310d5e129b1c7bd910c2722 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 16:06:50 +0200 Subject: [PATCH 08/17] Add common_evd_checks to reduce dublicate code --- .../extensions/lapack/evd_batch_common.hpp | 53 ++--------- dpnp/backend/extensions/lapack/evd_common.hpp | 51 ++-------- .../extensions/lapack/evd_common_utils.hpp | 92 +++++++++++++++++++ 3 files changed, 109 insertions(+), 87 deletions(-) create mode 100644 dpnp/backend/extensions/lapack/evd_common_utils.hpp diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index 00b1eed01ccd..f8948d1c21c0 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -29,10 +29,9 @@ #include // dpctl tensor headers -#include "utils/memory_overlap.hpp" -#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" +#include "evd_common_utils.hpp" #include "types_matrix.hpp" namespace dpnp::extensions::lapack::evd @@ -61,25 +60,18 @@ std::pair const dispatchT &evd_batch_dispatch_table) { const int eig_vecs_nd = eig_vecs.get_ndim(); - const int eig_vals_nd = eig_vals.get_ndim(); - - if (eig_vecs_nd != 3) { - throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + - " of an output array with eigenvectors"); - } - else if (eig_vals_nd != 2) { - throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + - " of an output array with eigenvalues"); - } const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw(); const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw(); - if (eig_vecs_shape[0] != eig_vecs_shape[1]) { - throw py::value_error("The batch shape of 'eig_vecs' must be square."); - } - else if (eig_vecs_shape[2] != eig_vals_shape[0] || - eig_vecs_shape[0] != eig_vals_shape[1]) + const int expected_eig_vecs_nd = 3; + const int expected_eig_vals_nd = 2; + + common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape, + expected_eig_vecs_nd, expected_eig_vals_nd); + + if (eig_vecs_shape[2] != eig_vals_shape[0] || + eig_vecs_shape[0] != eig_vals_shape[1]) { throw py::value_error( "The shape of 'eig_vals' must be (batch_size, n), " @@ -99,33 +91,6 @@ std::pair return std::make_pair(sycl::event(), sycl::event()); } - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs); - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals); - - // check compatibility of execution queue and allocation queue - if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) { - throw py::value_error( - "Execution queue is not compatible with allocation queues"); - } - - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(eig_vecs, eig_vals)) { - throw py::value_error("Arrays with eigenvectors and eigenvalues are " - "overlapping segments of memory"); - } - - const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); - const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); - if (!is_eig_vecs_f_contig) { - throw py::value_error( - "An array with input matrix / output eigenvectors " - "must be F-contiguous"); - } - else if (!is_eig_vals_c_contig) { - throw py::value_error( - "An array with output eigenvalues must be C-contiguous"); - } - auto array_types = dpctl_td_ns::usm_ndarray_types(); const int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index 53b9b2a07847..1945181f2d4c 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -29,10 +29,9 @@ #include // dpctl tensor headers -#include "utils/memory_overlap.hpp" -#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" +#include "evd_common_utils.hpp" #include "types_matrix.hpp" namespace dpnp::extensions::lapack::evd @@ -60,24 +59,17 @@ std::pair const dispatchT &evd_dispatch_table) { const int eig_vecs_nd = eig_vecs.get_ndim(); - const int eig_vals_nd = eig_vals.get_ndim(); - - if (eig_vecs_nd != 2) { - throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) + - " of an output array with eigenvectors"); - } - else if (eig_vals_nd != 1) { - throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) + - " of an output array with eigenvalues"); - } const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw(); const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw(); - if (eig_vecs_shape[0] != eig_vecs_shape[1]) { - throw py::value_error("Output array with eigenvectors with be square"); - } - else if (eig_vecs_shape[0] != eig_vals_shape[0]) { + const int expected_eig_vecs_nd = 2; + const int expected_eig_vals_nd = 1; + + common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape, + expected_eig_vecs_nd, expected_eig_vals_nd); + + if (eig_vecs_shape[0] != eig_vals_shape[0]) { throw py::value_error( "Eigenvectors and eigenvalues have different shapes"); } @@ -93,33 +85,6 @@ std::pair return std::make_pair(sycl::event(), sycl::event()); } - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs); - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals); - - // check compatibility of execution queue and allocation queue - if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) { - throw py::value_error( - "Execution queue is not compatible with allocation queues"); - } - - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(eig_vecs, eig_vals)) { - throw py::value_error("Arrays with eigenvectors and eigenvalues are " - "overlapping segments of memory"); - } - - const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); - const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); - if (!is_eig_vecs_f_contig) { - throw py::value_error( - "An array with input matrix / output eigenvectors " - "must be F-contiguous"); - } - else if (!is_eig_vals_c_contig) { - throw py::value_error( - "An array with output eigenvalues must be C-contiguous"); - } - auto array_types = dpctl_td_ns::usm_ndarray_types(); const int eig_vecs_type_id = array_types.typenum_to_lookup_id(eig_vecs.get_typenum()); diff --git a/dpnp/backend/extensions/lapack/evd_common_utils.hpp b/dpnp/backend/extensions/lapack/evd_common_utils.hpp new file mode 100644 index 000000000000..0f9bdd1927e3 --- /dev/null +++ b/dpnp/backend/extensions/lapack/evd_common_utils.hpp @@ -0,0 +1,92 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/output_validation.hpp" + +namespace dpnp::extensions::lapack::evd +{ +namespace py = pybind11; + +inline void common_evd_checks(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray &eig_vecs, + dpctl::tensor::usm_ndarray &eig_vals, + const py::ssize_t *eig_vecs_shape, + const int expected_eig_vecs_nd, + const int expected_eig_vals_nd) +{ + const int eig_vecs_nd = eig_vecs.get_ndim(); + const int eig_vals_nd = eig_vals.get_ndim(); + + if (eig_vecs_nd != expected_eig_vecs_nd) { + throw py::value_error("The output eigenvectors array has ndim=" + + std::to_string(eig_vecs_nd) + ", but a " + + std::to_string(expected_eig_vecs_nd) + + "-dimensional array is expected."); + } + else if (eig_vals_nd != expected_eig_vals_nd) { + throw py::value_error("The output eigenvalues array has ndim=" + + std::to_string(eig_vals_nd) + ", but a " + + std::to_string(expected_eig_vals_nd) + + "-dimensional array is expected."); + } + + if (eig_vecs_shape[0] != eig_vecs_shape[1]) { + throw py::value_error("Output array with eigenvectors must be square"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs); + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals); + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(eig_vecs, eig_vals)) { + throw py::value_error("Arrays with eigenvectors and eigenvalues are " + "overlapping segments of memory"); + } + + const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous(); + const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous(); + if (!is_eig_vecs_f_contig) { + throw py::value_error( + "An array with input matrix / output eigenvectors " + "must be F-contiguous"); + } + else if (!is_eig_vals_c_contig) { + throw py::value_error( + "An array with output eigenvalues must be C-contiguous"); + } +} +} // namespace dpnp::extensions::lapack::evd From a08036919c3f2c9e93e7f9f8efbcbd094fecc105 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 24 Jul 2024 17:02:35 +0200 Subject: [PATCH 09/17] Remove host_task_events from syevd and heevd --- dpnp/backend/extensions/lapack/evd_common.hpp | 10 ++++------ dpnp/backend/extensions/lapack/heevd.cpp | 6 ++---- dpnp/backend/extensions/lapack/syevd.cpp | 6 ++---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index 1945181f2d4c..a7c39fb48244 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -42,7 +42,6 @@ typedef sycl::event (*evd_impl_fn_ptr_t)(sycl::queue &, const std::int64_t, char *, char *, - std::vector &, const std::vector &); namespace dpctl_td_ns = dpctl::tensor::type_dispatch; @@ -106,14 +105,13 @@ std::pair const oneapi::mkl::uplo uplo_val = static_cast(upper_lower); - std::vector host_task_events; sycl::event evd_ev = evd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data, - eig_vals_data, host_task_events, depends); + eig_vals_data, depends); - sycl::event args_ev = dpctl::utils::keep_args_alive( - exec_q, {eig_vecs, eig_vals}, host_task_events); + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, {evd_ev}); - return std::make_pair(args_ev, evd_ev); + return std::make_pair(ht_ev, evd_ev); } template &host_task_events, const std::vector &depends) { type_utils::validate_type_for_device(exec_q); @@ -110,14 +109,13 @@ static sycl::event heevd_impl(sycl::queue &exec_q, throw std::runtime_error(error_msg.str()); } - sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(heevd_event); auto ctx = exec_q.get_context(); cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); }); - host_task_events.push_back(clean_up_event); - return heevd_event; + return ht_ev; } template diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index 9b589bbbf49c..bbc975b43d0f 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -43,7 +43,6 @@ static sycl::event syevd_impl(sycl::queue &exec_q, const std::int64_t n, char *in_a, char *out_w, - std::vector &host_task_events, const std::vector &depends) { type_utils::validate_type_for_device(exec_q); @@ -110,14 +109,13 @@ static sycl::event syevd_impl(sycl::queue &exec_q, throw std::runtime_error(error_msg.str()); } - sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(syevd_event); auto ctx = exec_q.get_context(); cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); }); - host_task_events.push_back(clean_up_event); - return syevd_event; + return ht_ev; } template From cca72b8d03c1ca4663ef069cd31ba7d26e6a547f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 25 Jul 2024 13:27:41 +0200 Subject: [PATCH 10/17] Applied review comments --- dpnp/backend/extensions/lapack/evd_batch_common.hpp | 4 ++-- dpnp/backend/extensions/lapack/evd_common.hpp | 4 ++-- dpnp/backend/extensions/lapack/heevd_batch.cpp | 5 +---- dpnp/backend/extensions/lapack/syevd_batch.cpp | 5 +---- dpnp/linalg/dpnp_utils_linalg.py | 6 +++--- 5 files changed, 9 insertions(+), 15 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index f8948d1c21c0..71f00528c2b0 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -64,8 +64,8 @@ std::pair const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw(); const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw(); - const int expected_eig_vecs_nd = 3; - const int expected_eig_vals_nd = 2; + constexpr int expected_eig_vecs_nd = 3; + constexpr int expected_eig_vals_nd = 2; common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape, expected_eig_vecs_nd, expected_eig_vals_nd); diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index a7c39fb48244..cd1ee44047ef 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -62,8 +62,8 @@ std::pair const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw(); const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw(); - const int expected_eig_vecs_nd = 2; - const int expected_eig_vals_nd = 1; + constexpr int expected_eig_vecs_nd = 2; + constexpr int expected_eig_vals_nd = 1; common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape, expected_eig_vecs_nd, expected_eig_vals_nd); diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index c3b02182d891..0cb7066c74ab 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -104,10 +104,7 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q, std::int64_t stream_id = (batch_id % n_linear_streams); - T *current_scratch_heevd = nullptr; - if (scratchpad != nullptr) { - current_scratch_heevd = scratchpad + stream_id * scratchpad_size; - } + T *current_scratch_heevd = scratchpad + stream_id * scratchpad_size; // Get the event dependencies for the current stream const auto ¤t_dep = comp_evs[stream_id]; diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index b0efd3764a49..0177ef1a1b89 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -104,10 +104,7 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q, std::int64_t stream_id = (batch_id % n_linear_streams); - T *current_scratch_syevd = nullptr; - if (scratchpad != nullptr) { - current_scratch_syevd = scratchpad + stream_id * scratchpad_size; - } + T *current_scratch_syevd = scratchpad + stream_id * scratchpad_size; // Get the event dependencies for the current stream const auto ¤t_dep = comp_evs[stream_id]; diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index ea42f521aea1..8634e8fcba6a 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -120,14 +120,14 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type): a = dpnp.reshape(a, (-1, a_orig_shape[-2], a_orig_shape[-1])) a_new_shape = a.shape - _manager = dpu.SequentialOrderManager[a_sycl_queue] - dep_evs = _manager.submitted_events - # Reorder the elements by moving the last two axes of `a` to the front # to match fortran-like array order which is assumed by syevd/heevd. a = dpnp.moveaxis(a, (-2, -1), (0, 1)) a_usm_arr = dpnp.get_usm_ndarray(a) + _manager = dpu.SequentialOrderManager[a_sycl_queue] + dep_evs = _manager.submitted_events + a_copy = dpnp.empty_like(a, dtype=v_type, order="F") ht_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_usm_arr, From 19c82fb18bee269157612a566211d0888d93ef5e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 25 Jul 2024 14:19:59 +0200 Subject: [PATCH 11/17] Use init_evd_dispatch_table instead of init_evd_batch_dispatch_table --- dpnp/backend/extensions/lapack/evd_batch_common.hpp | 12 ------------ dpnp/backend/extensions/lapack/heevd_batch.cpp | 5 +++-- dpnp/backend/extensions/lapack/syevd_batch.cpp | 5 +++-- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index 71f00528c2b0..2c18e527783c 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -123,16 +123,4 @@ std::pair return std::make_pair(ht_ev, evd_batch_ev); } - -template - typename factoryT> -void init_evd_batch_dispatch_table( - dispatchT evd_batch_dispatch_table[][dpctl_td_ns::num_types]) -{ - dpctl_td_ns::DispatchTableBuilder - contig; - contig.populate_dispatch_table(evd_batch_dispatch_table); -} } // namespace dpnp::extensions::lapack::evd diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index 0cb7066c74ab..bc788ecad812 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -27,6 +27,7 @@ #include "common_helpers.hpp" #include "evd_batch_common.hpp" +#include "evd_common.hpp" #include "heevd_batch.hpp" // dpctl tensor headers @@ -191,8 +192,8 @@ void init_heevd_batch(py::module_ m) [dpctl_td_ns::num_types]; { - evd::init_evd_batch_dispatch_table( + evd::init_evd_dispatch_table( heevd_batch_dispatch_table); auto heevd_batch_pyapi = diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 0177ef1a1b89..211a84fae510 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -27,6 +27,7 @@ #include "common_helpers.hpp" #include "evd_batch_common.hpp" +#include "evd_common.hpp" #include "syevd_batch.hpp" // dpctl tensor headers @@ -191,8 +192,8 @@ void init_syevd_batch(py::module_ m) [dpctl_td_ns::num_types]; { - evd::init_evd_batch_dispatch_table( + evd::init_evd_dispatch_table( syevd_batch_dispatch_table); auto syevd_batch_pyapi = From d4ce5159df964d836cb76a83449343972bbec7a8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 25 Jul 2024 14:29:47 +0200 Subject: [PATCH 12/17] Move init_evd_dispatch_table to evd_common_utils.hpp --- dpnp/backend/extensions/lapack/evd_common.hpp | 12 ------------ .../backend/extensions/lapack/evd_common_utils.hpp | 14 ++++++++++++++ dpnp/backend/extensions/lapack/heevd_batch.cpp | 1 - dpnp/backend/extensions/lapack/syevd_batch.cpp | 1 - 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index cd1ee44047ef..1ed517665a19 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -113,16 +113,4 @@ std::pair return std::make_pair(ht_ev, evd_ev); } - -template - typename factoryT> -void init_evd_dispatch_table( - dispatchT evd_dispatch_table[][dpctl_td_ns::num_types]) -{ - dpctl_td_ns::DispatchTableBuilder - contig; - contig.populate_dispatch_table(evd_dispatch_table); -} } // namespace dpnp::extensions::lapack::evd diff --git a/dpnp/backend/extensions/lapack/evd_common_utils.hpp b/dpnp/backend/extensions/lapack/evd_common_utils.hpp index 0f9bdd1927e3..1da74bc4ac53 100644 --- a/dpnp/backend/extensions/lapack/evd_common_utils.hpp +++ b/dpnp/backend/extensions/lapack/evd_common_utils.hpp @@ -30,11 +30,25 @@ // dpctl tensor headers #include "utils/memory_overlap.hpp" #include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" namespace dpnp::extensions::lapack::evd { +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; namespace py = pybind11; +template + typename factoryT> +void init_evd_dispatch_table( + dispatchT evd_dispatch_table[][dpctl_td_ns::num_types]) +{ + dpctl_td_ns::DispatchTableBuilder + contig; + contig.populate_dispatch_table(evd_dispatch_table); +} + inline void common_evd_checks(sycl::queue &exec_q, dpctl::tensor::usm_ndarray &eig_vecs, dpctl::tensor::usm_ndarray &eig_vals, diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index bc788ecad812..23b08db9265d 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -27,7 +27,6 @@ #include "common_helpers.hpp" #include "evd_batch_common.hpp" -#include "evd_common.hpp" #include "heevd_batch.hpp" // dpctl tensor headers diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 211a84fae510..de75d58b756e 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -27,7 +27,6 @@ #include "common_helpers.hpp" #include "evd_batch_common.hpp" -#include "evd_common.hpp" #include "syevd_batch.hpp" // dpctl tensor headers From 5932918f01d00456993fcf407d90dea758fa2813 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 25 Jul 2024 15:11:36 +0200 Subject: [PATCH 13/17] Add helper function check_zeros_shape --- dpnp/backend/extensions/lapack/common_helpers.hpp | 14 ++++++++++++++ .../backend/extensions/lapack/evd_batch_common.hpp | 11 ++++------- dpnp/backend/extensions/lapack/evd_common.hpp | 11 ++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp index ce4c416f27fe..88ff8772d1b1 100644 --- a/dpnp/backend/extensions/lapack/common_helpers.hpp +++ b/dpnp/backend/extensions/lapack/common_helpers.hpp @@ -26,10 +26,13 @@ #pragma once #include #include +#include #include namespace dpnp::extensions::lapack::helper { +namespace py = pybind11; + template struct value_type_of { @@ -49,4 +52,15 @@ intT round_up_mult(intT value, intT mult) intT q = (value + (mult - 1)) / mult; return q * mult; } + +// Checks if the shape array has any non-zero dimension. +inline bool check_zeros_shape(int ndim, const py::ssize_t *shape) +{ + size_t src_nelems(1); + + for (int i = 0; i < ndim; ++i) { + src_nelems *= static_cast(shape[i]); + } + return src_nelems == 0; +} } // namespace dpnp::extensions::lapack::helper diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index 2c18e527783c..c37ffea341d2 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -31,11 +31,14 @@ // dpctl tensor headers #include "utils/type_dispatch.hpp" +#include "common_helpers.hpp" #include "evd_common_utils.hpp" #include "types_matrix.hpp" namespace dpnp::extensions::lapack::evd { +using dpnp::extensions::lapack::helper::check_zeros_shape; + typedef sycl::event (*evd_batch_impl_fn_ptr_t)( sycl::queue &, const oneapi::mkl::job, @@ -80,13 +83,7 @@ std::pair " and n = " + std::to_string(eig_vecs_shape[1])); } - size_t src_nelems(1); - - for (int i = 0; i < eig_vecs_nd; ++i) { - src_nelems *= static_cast(eig_vecs_shape[i]); - } - - if (src_nelems == 0) { + if (check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp index 1ed517665a19..cdccb3e79d3d 100644 --- a/dpnp/backend/extensions/lapack/evd_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_common.hpp @@ -31,11 +31,14 @@ // dpctl tensor headers #include "utils/type_dispatch.hpp" +#include "common_helpers.hpp" #include "evd_common_utils.hpp" #include "types_matrix.hpp" namespace dpnp::extensions::lapack::evd { +using dpnp::extensions::lapack::helper::check_zeros_shape; + typedef sycl::event (*evd_impl_fn_ptr_t)(sycl::queue &, const oneapi::mkl::job, const oneapi::mkl::uplo, @@ -73,13 +76,7 @@ std::pair "Eigenvectors and eigenvalues have different shapes"); } - size_t src_nelems(1); - - for (int i = 0; i < eig_vecs_nd; ++i) { - src_nelems *= static_cast(eig_vecs_shape[i]); - } - - if (src_nelems == 0) { + if (check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } From 48b2d63c27ec915448d7eae7d190d9bd7fcba6fc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 25 Jul 2024 15:39:08 +0200 Subject: [PATCH 14/17] Implement alloc_scratchpad function to evd_batch_common.hpp --- .../extensions/lapack/evd_batch_common.hpp | 34 +++++++++++++++++-- .../backend/extensions/lapack/heevd_batch.cpp | 26 ++------------ .../backend/extensions/lapack/syevd_batch.cpp | 26 ++------------ 3 files changed, 37 insertions(+), 49 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index c37ffea341d2..2bee52240657 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -37,8 +37,6 @@ namespace dpnp::extensions::lapack::evd { -using dpnp::extensions::lapack::helper::check_zeros_shape; - typedef sycl::event (*evd_batch_impl_fn_ptr_t)( sycl::queue &, const oneapi::mkl::job, @@ -83,7 +81,7 @@ std::pair " and n = " + std::to_string(eig_vecs_shape[1])); } - if (check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) { + if (helper::check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } @@ -120,4 +118,34 @@ std::pair return std::make_pair(ht_ev, evd_batch_ev); } + +template +T *alloc_scratchpad(std::int64_t scratchpad_size, + std::int64_t n_linear_streams, + sycl::queue &exec_q) +{ + // Get padding size to ensure memory allocations are aligned to 256 bytes + // for better performance + const std::int64_t padding = 256 / sizeof(T); + + if (scratchpad_size <= 0) { + throw std::runtime_error( + "Invalid scratchpad size: must be greater than zero." + " Calculated scratchpad size: " + + std::to_string(scratchpad_size)); + } + + // Calculate the total scratchpad memory size needed for all linear + // streams with proper alignment + const size_t alloc_scratch_size = + helper::round_up_mult(n_linear_streams * scratchpad_size, padding); + + // Allocate memory for the total scratchpad + T *scratchpad = sycl::malloc_device(alloc_scratch_size, exec_q); + if (!scratchpad) { + throw std::runtime_error("Device allocation for scratchpad failed"); + } + + return scratchpad; +} } // namespace dpnp::extensions::lapack::evd diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index 23b08db9265d..bc21b3da35d5 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -64,29 +64,9 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q, const std::int64_t scratchpad_size = mkl_lapack::heevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); - T *scratchpad = nullptr; - - // Get padding size to ensure memory allocations are aligned to 256 bytes - // for better performance - const std::int64_t padding = 256 / sizeof(T); - - if (scratchpad_size > 0) { - // Calculate the total scratchpad memory size needed for all linear - // streams with proper alignment - const size_t alloc_scratch_size = - helper::round_up_mult(n_linear_streams * scratchpad_size, padding); - - // Allocate memory for the total scratchpad - scratchpad = sycl::malloc_device(alloc_scratch_size, exec_q); - if (!scratchpad) - throw std::runtime_error("Device allocation for scratchpad failed"); - } - else { - throw std::runtime_error( - "Invalid scratchpad size: must be greater than zero." - "Calculated scratchpad size: " + - std::to_string(scratchpad_size)); - } + + T *scratchpad = + evd::alloc_scratchpad(scratchpad_size, n_linear_streams, exec_q); // Computation events to manage dependencies for each linear stream std::vector> comp_evs(n_linear_streams, depends); diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index de75d58b756e..d2b87fc260cc 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -64,29 +64,9 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q, const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size(exec_q, jobz, upper_lower, n, lda); - T *scratchpad = nullptr; - - // Get padding size to ensure memory allocations are aligned to 256 bytes - // for better performance - const std::int64_t padding = 256 / sizeof(T); - - if (scratchpad_size > 0) { - // Calculate the total scratchpad memory size needed for all linear - // streams with proper alignment - const size_t alloc_scratch_size = - helper::round_up_mult(n_linear_streams * scratchpad_size, padding); - - // Allocate memory for the total scratchpad - scratchpad = sycl::malloc_device(alloc_scratch_size, exec_q); - if (!scratchpad) - throw std::runtime_error("Device allocation for scratchpad failed"); - } - else { - throw std::runtime_error( - "Invalid scratchpad size: must be greater than zero." - "Calculated scratchpad size: " + - std::to_string(scratchpad_size)); - } + + T *scratchpad = + evd::alloc_scratchpad(scratchpad_size, n_linear_streams, exec_q); // Computation events to manage dependencies for each linear stream std::vector> comp_evs(n_linear_streams, depends); From e974eff463ed0354aa6f730d41e5bb684b20abf6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 26 Jul 2024 11:01:02 +0200 Subject: [PATCH 15/17] Make round_up_mult as inline --- dpnp/backend/extensions/lapack/common_helpers.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp index 88ff8772d1b1..df2b87028e48 100644 --- a/dpnp/backend/extensions/lapack/common_helpers.hpp +++ b/dpnp/backend/extensions/lapack/common_helpers.hpp @@ -47,7 +47,7 @@ struct value_type_of> // Rounds up the number `value` to the nearest multiple of `mult`. template -intT round_up_mult(intT value, intT mult) +inline intT round_up_mult(intT value, intT mult) { intT q = (value + (mult - 1)) / mult; return q * mult; From 21dc0b0acc176aa42dc6bb84b388fed0b35b8244 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 26 Jul 2024 11:23:12 +0200 Subject: [PATCH 16/17] Add comment for check_zeros_shape --- dpnp/backend/extensions/lapack/evd_batch_common.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index 2bee52240657..4e8ee4921bc7 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -81,6 +81,7 @@ std::pair " and n = " + std::to_string(eig_vecs_shape[1])); } + // Ensure `batch_size` and `n` are non-zero, otherwise return empty events if (helper::check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); From 894ac54843c86c506aceaabcc12e6f7d532ad62e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 26 Jul 2024 11:38:12 +0200 Subject: [PATCH 17/17] Make alloc_scratchpad as inline --- dpnp/backend/extensions/lapack/evd_batch_common.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index 4e8ee4921bc7..5a7d985ff821 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -121,9 +121,9 @@ std::pair } template -T *alloc_scratchpad(std::int64_t scratchpad_size, - std::int64_t n_linear_streams, - sycl::queue &exec_q) +inline T *alloc_scratchpad(std::int64_t scratchpad_size, + std::int64_t n_linear_streams, + sycl::queue &exec_q) { // Get padding size to ensure memory allocations are aligned to 256 bytes // for better performance