From b94fcb4aa33b7a3fb174f387920c47c544d9a42b Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 18 Jul 2024 12:34:58 +0200
Subject: [PATCH 01/17] Implement syevd_batch and heevd_batch

---
 dpnp/backend/extensions/lapack/CMakeLists.txt |   2 +
 .../extensions/lapack/common_helpers.hpp      |   9 +
 .../extensions/lapack/evd_batch_common.hpp    | 174 +++++++++++++++
 .../backend/extensions/lapack/heevd_batch.cpp | 207 ++++++++++++++++++
 .../backend/extensions/lapack/heevd_batch.hpp |  38 ++++
 dpnp/backend/extensions/lapack/lapack_py.cpp  |   5 +
 .../backend/extensions/lapack/syevd_batch.cpp | 206 +++++++++++++++++
 .../backend/extensions/lapack/syevd_batch.hpp |  38 ++++
 dpnp/linalg/dpnp_utils_linalg.py              | 103 ++++-----
 9 files changed, 725 insertions(+), 57 deletions(-)
 create mode 100644 dpnp/backend/extensions/lapack/evd_batch_common.hpp
 create mode 100644 dpnp/backend/extensions/lapack/heevd_batch.cpp
 create mode 100644 dpnp/backend/extensions/lapack/heevd_batch.hpp
 create mode 100644 dpnp/backend/extensions/lapack/syevd_batch.cpp
 create mode 100644 dpnp/backend/extensions/lapack/syevd_batch.hpp
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index f21f61c84dfd..03fceb05482e 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -36,11 +36,13 @@ set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/getrs.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/heevd_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/orgqr.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/orgqr_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/syevd_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ungqr.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ungqr_batch.cpp
 )
diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp
index 0c818e74664f..ce4c416f27fe 100644
--- a/dpnp/backend/extensions/lapack/common_helpers.hpp
+++ b/dpnp/backend/extensions/lapack/common_helpers.hpp
@@ -24,6 +24,7 @@
 //*****************************************************************************
 
 #pragma once
+#include <complex>
 #include <cstring>
 #include <stdexcept>
 
@@ -40,4 +41,12 @@ struct value_type_of<std::complex<T>>
 {
     using type = T;
 };
+
+// Rounds up the number `value` to the nearest multiple of `mult`.
+template <typename intT>
+intT round_up_mult(intT value, intT mult)
+{
+    intT q = (value + (mult - 1)) / mult;
+    return q * mult;
+}
 } // namespace dpnp::extensions::lapack::helper
diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
new file mode 100644
index 000000000000..0de1b2fd9796
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -0,0 +1,174 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <oneapi/mkl.hpp>
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "types_matrix.hpp"
+
+namespace dpnp::extensions::lapack::evd
+{
+typedef sycl::event (*evd_batch_impl_fn_ptr_t)(
+    sycl::queue &,
+    const oneapi::mkl::job,
+    const oneapi::mkl::uplo,
+    const std::int64_t,
+    const std::int64_t,
+    char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+namespace py = pybind11;
+
+template <typename dispatchT>
+std::pair<sycl::event, sycl::event>
+    evd_batch_func(sycl::queue &exec_q,
+                   const std::int8_t jobz,
+                   const std::int8_t upper_lower,
+                   dpctl::tensor::usm_ndarray &eig_vecs,
+                   dpctl::tensor::usm_ndarray &eig_vals,
+                   const std::vector<sycl::event> &depends,
+                   const dispatchT &evd_batch_dispatch_table)
+{
+    const int eig_vecs_nd = eig_vecs.get_ndim();
+    const int eig_vals_nd = eig_vals.get_ndim();
+
+    if (eig_vecs_nd != 3) {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) +
+                              " of an output array with eigenvectors");
+    }
+    else if (eig_vals_nd != 2) {
+        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) +
+                              " of an output array with eigenvalues");
+    }
+
+    const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw();
+    const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw();
+
+    if (eig_vecs_shape[0] != eig_vecs_shape[1]) {
+        throw py::value_error("The batch shape of 'eig_vecs' must be square.");
+    }
+    else if (eig_vecs_shape[2] != eig_vals_shape[0] ||
+             eig_vecs_shape[0] != eig_vals_shape[1])
+    {
+        throw py::value_error(
+            "The shape of 'eig_vals' must be (batch_size, n), "
+            "where batch_size = " +
+            std::to_string(eig_vecs_shape[0]) +
+            " and n = " + std::to_string(eig_vecs_shape[1]));
+    }
+
+    size_t src_nelems(1);
+
+    for (int i = 0; i < eig_vecs_nd; ++i) {
+        src_nelems *= static_cast<size_t>(eig_vecs_shape[i]);
+    }
+
+    if (src_nelems == 0) {
+        // nothing to do
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs);
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals);
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(eig_vecs, eig_vals)) {
+        throw py::value_error("Arrays with eigenvectors and eigenvalues are "
+                              "overlapping segments of memory");
+    }
+
+    bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
+    bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    if (!is_eig_vecs_f_contig) {
+        throw py::value_error(
+            "An array with input matrix / output eigenvectors "
+            "must be F-contiguous");
+    }
+    else if (!is_eig_vals_c_contig) {
+        throw py::value_error(
+            "An array with output eigenvalues must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int eig_vecs_type_id =
+        array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
+    int eig_vals_type_id =
+        array_types.typenum_to_lookup_id(eig_vals.get_typenum());
+
+    evd_batch_impl_fn_ptr_t evd_batch_fn =
+        evd_batch_dispatch_table[eig_vecs_type_id][eig_vals_type_id];
+    if (evd_batch_fn == nullptr) {
+        throw py::value_error(
+            "Types of input vectors and result array are mismatched.");
+    }
+
+    char *eig_vecs_data = eig_vecs.get_data();
+    char *eig_vals_data = eig_vals.get_data();
+
+    const std::int64_t batch_size = eig_vecs_shape[2];
+    const std::int64_t n = eig_vecs_shape[1];
+
+    const oneapi::mkl::job jobz_val = static_cast<oneapi::mkl::job>(jobz);
+    const oneapi::mkl::uplo uplo_val =
+        static_cast<oneapi::mkl::uplo>(upper_lower);
+
+    sycl::event evd_batch_ev =
+        evd_batch_fn(exec_q, jobz_val, uplo_val, batch_size, n, eig_vecs_data,
+                     eig_vals_data, depends);
+
+    sycl::event ht_ev = dpctl::utils::keep_args_alive(
+        exec_q, {eig_vecs, eig_vals}, {evd_batch_ev});
+
+    return std::make_pair(ht_ev, evd_batch_ev);
+}
+
+template <typename dispatchT,
+          template <typename fnT, typename T, typename RealT>
+          typename factoryT>
+void init_evd_batch_dispatch_table(
+    dispatchT evd_batch_dispatch_table[][dpctl_td_ns::num_types])
+{
+    dpctl_td_ns::DispatchTableBuilder<dispatchT, factoryT,
+                                      dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_table(evd_batch_dispatch_table);
+}
+} // namespace dpnp::extensions::lapack::evd
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
new file mode 100644
index 000000000000..d447f27b5812
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -0,0 +1,207 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include "heevd_batch.hpp"
+#include "common_helpers.hpp"
+
+namespace dpnp::extensions::lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace type_utils = dpctl::tensor::type_utils;
+
+template <typename T, typename RealT>
+static sycl::event heevd_batch_impl(sycl::queue &exec_q,
+                                    const oneapi::mkl::job jobz,
+                                    const oneapi::mkl::uplo upper_lower,
+                                    const std::int64_t batch_size,
+                                    const std::int64_t n,
+                                    char *in_a,
+                                    char *out_w,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+    type_utils::validate_type_for_device<RealT>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+    RealT *w = reinterpret_cast<RealT *>(out_w);
+
+    std::int64_t a_size = n * n;
+    std::int64_t w_size = n;
+
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    // Get the number of independent linear streams
+    std::int64_t n_linear_streams =
+        (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1));
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
+    T *scratchpad = nullptr;
+
+    // Get padding size to ensure memory allocations are aligned to 256 bytes
+    // for better performance
+    std::int64_t padding = 256 / sizeof(T);
+
+    if (scratchpad_size > 0) {
+        // Calculate the total scratchpad memory size needed for all linear
+        // streams with proper alignment
+        size_t alloc_scratch_size =
+            helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
+
+        // Allocate memory for the total scratchpad
+        scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
+        if (!scratchpad)
+            throw std::runtime_error("Device allocation for scratchpad failed");
+    }
+
+    // Computation events to manage dependencies for each linear stream
+    std::vector<std::vector<sycl::event>> comp_evs(n_linear_streams, depends);
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    // Release GIL to avoid serialization of host task
+    // submissions to the same queue in OneMKL
+    py::gil_scoped_release release;
+
+    for (std::int64_t batch_id = 0; batch_id < batch_size; ++batch_id) {
+        T *a_batch = a + batch_id * a_size;
+        RealT *w_batch = w + batch_id * w_size;
+
+        std::int64_t stream_id = (batch_id % n_linear_streams);
+
+        T *current_scratch_heevd = nullptr;
+        if (scratchpad != nullptr) {
+            current_scratch_heevd = scratchpad + stream_id * scratchpad_size;
+        }
+
+        // Get the event dependencies for the current stream
+        const auto &current_dep = comp_evs[stream_id];
+
+        sycl::event heevd_event;
+        try {
+            heevd_event = mkl_lapack::heevd(
+                exec_q,
+                jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors
+                      // are computed.
+                upper_lower, // 'upper_lower == job::upper' means the upper
+                             // triangular part of A, or the lower triangular
+                             // otherwise
+                n,           // The order of the matrix A (0 <= n)
+                a_batch,     // Pointer to the square A (n x n)
+                             // If 'jobz == job::vec', then on exit it will
+                             // contain the eigenvectors of A
+                lda, // The leading dimension of A, must be at least max(1, n)
+                w_batch, // Pointer to array of size at least n, it will contain
+                         // the eigenvalues of A in ascending order
+                current_scratch_heevd, // Pointer to scratchpad memory to be
+                                       // used by MKL routine for storing
+                                       // intermediate results
+                scratchpad_size, current_dep);
+        } catch (mkl_lapack::exception const &e) {
+            error_msg << "Unexpected MKL exception caught during heevd() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+            info = e.info();
+        } catch (sycl::exception const &e) {
+            error_msg
+                << "Unexpected SYCL exception caught during heevd() call:\n"
+                << e.what();
+            info = -1;
+        }
+
+        // Update the event dependencies for the current stream
+        comp_evs[stream_id] = {heevd_event};
+    }
+
+    if (info != 0) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) {
+        for (const auto &ev : comp_evs) {
+            cgh.depends_on(ev);
+        }
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+
+    return ht_ev;
+}
+
+template <typename fnT, typename T, typename RealT>
+struct HeevdBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::HeevdTypePairSupportFactory<T, RealT>::is_defined)
+        {
+            return heevd_batch_impl<T, RealT>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+using evd::evd_batch_impl_fn_ptr_t;
+
+void init_heevd_batch(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+
+    static evd_batch_impl_fn_ptr_t
+        heevd_batch_dispatch_table[dpctl_td_ns::num_types]
+                                  [dpctl_td_ns::num_types];
+
+    {
+        evd::init_evd_batch_dispatch_table<evd_batch_impl_fn_ptr_t,
+                                           HeevdBatchContigFactory>(
+            heevd_batch_dispatch_table);
+
+        auto heevd_batch_pyapi =
+            [&](sycl::queue &exec_q, const std::int8_t jobz,
+                const std::int8_t upper_lower, arrayT &eig_vecs,
+                arrayT &eig_vals, const event_vecT &depends = {}) {
+                return evd::evd_batch_func(exec_q, jobz, upper_lower, eig_vecs,
+                                           eig_vals, depends,
+                                           heevd_batch_dispatch_table);
+            };
+        m.def(
+            "_heevd_batch", heevd_batch_pyapi,
+            "Call `heevd` from OneMKL LAPACK library in a loop to return "
+            "the eigenvalues and eigenvectors of a batch of complex Hermitian "
+            "matrices",
+            py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
+            py::arg("eig_vecs"), py::arg("eig_vals"),
+            py::arg("depends") = py::list());
+    }
+}
+} // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.hpp b/dpnp/backend/extensions/lapack/heevd_batch.hpp
new file mode 100644
index 000000000000..3a3fa4bab991
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/heevd_batch.hpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "evd_batch_common.hpp"
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::lapack
+{
+void init_heevd_batch(py::module_ m);
+} // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index b854a3136a9c..7afa67e84eca 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -37,10 +37,12 @@
 #include "getri.hpp"
 #include "getrs.hpp"
 #include "heevd.hpp"
+#include "heevd_batch.hpp"
 #include "linalg_exceptions.hpp"
 #include "orgqr.hpp"
 #include "potrf.hpp"
 #include "syevd.hpp"
+#include "syevd_batch.hpp"
 #include "ungqr.hpp"
 
 namespace lapack_ext = dpnp::extensions::lapack;
@@ -83,6 +85,9 @@ PYBIND11_MODULE(_lapack_impl, m)
     lapack_ext::init_heevd(m);
     lapack_ext::init_syevd(m);
 
+    lapack_ext::init_heevd_batch(m);
+    lapack_ext::init_syevd_batch(m);
+
     m.def("_geqrf_batch", &lapack_ext::geqrf_batch,
           "Call `geqrf_batch` from OneMKL LAPACK library to return "
           "the QR factorization of a batch general matrix ",
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
new file mode 100644
index 000000000000..843c129241c9
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -0,0 +1,206 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include "syevd_batch.hpp"
+#include "common_helpers.hpp"
+
+namespace dpnp::extensions::lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace type_utils = dpctl::tensor::type_utils;
+
+template <typename T, typename RealT>
+static sycl::event syevd_batch_impl(sycl::queue &exec_q,
+                                    const oneapi::mkl::job jobz,
+                                    const oneapi::mkl::uplo upper_lower,
+                                    const std::int64_t batch_size,
+                                    const std::int64_t n,
+                                    char *in_a,
+                                    char *out_w,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+    type_utils::validate_type_for_device<RealT>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+    RealT *w = reinterpret_cast<RealT *>(out_w);
+
+    std::int64_t a_size = n * n;
+    std::int64_t w_size = n;
+
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    // Get the number of independent linear streams
+    std::int64_t n_linear_streams =
+        (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1));
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
+    T *scratchpad = nullptr;
+
+    // Get padding size to ensure memory allocations are aligned to 256 bytes
+    // for better performance
+    std::int64_t padding = 256 / sizeof(T);
+
+    if (scratchpad_size > 0) {
+        // Calculate the total scratchpad memory size needed for all linear
+        // streams with proper alignment
+        size_t alloc_scratch_size =
+            helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
+
+        // Allocate memory for the total scratchpad
+        scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
+        if (!scratchpad)
+            throw std::runtime_error("Device allocation for scratchpad failed");
+    }
+
+    // Computation events to manage dependencies for each linear stream
+    std::vector<std::vector<sycl::event>> comp_evs(n_linear_streams, depends);
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+
+    // Release GIL to avoid serialization of host task
+    // submissions to the same queue in OneMKL
+    py::gil_scoped_release release;
+
+    for (std::int64_t batch_id = 0; batch_id < batch_size; ++batch_id) {
+        T *a_batch = a + batch_id * a_size;
+        RealT *w_batch = w + batch_id * w_size;
+
+        std::int64_t stream_id = (batch_id % n_linear_streams);
+
+        T *current_scratch_syevd = nullptr;
+        if (scratchpad != nullptr) {
+            current_scratch_syevd = scratchpad + stream_id * scratchpad_size;
+        }
+
+        // Get the event dependencies for the current stream
+        const auto &current_dep = comp_evs[stream_id];
+
+        sycl::event syevd_event;
+        try {
+            syevd_event = mkl_lapack::syevd(
+                exec_q,
+                jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors
+                      // are computed.
+                upper_lower, // 'upper_lower == job::upper' means the upper
+                             // triangular part of A, or the lower triangular
+                             // otherwise
+                n,           // The order of the matrix A (0 <= n)
+                a_batch,     // Pointer to the square A (n x n)
+                             // If 'jobz == job::vec', then on exit it will
+                             // contain the eigenvectors of A
+                lda, // The leading dimension of A, must be at least max(1, n)
+                w_batch, // Pointer to array of size at least n, it will contain
+                         // the eigenvalues of A in ascending order
+                current_scratch_syevd, // Pointer to scratchpad memory to be
+                                       // used by MKL routine for storing
+                                       // intermediate results
+                scratchpad_size, current_dep);
+        } catch (mkl_lapack::exception const &e) {
+            error_msg << "Unexpected MKL exception caught during syevd() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+            info = e.info();
+        } catch (sycl::exception const &e) {
+            error_msg
+                << "Unexpected SYCL exception caught during syevd() call:\n"
+                << e.what();
+            info = -1;
+        }
+
+        // Update the event dependencies for the current stream
+        comp_evs[stream_id] = {syevd_event};
+    }
+
+    if (info != 0) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) {
+        for (const auto &ev : comp_evs) {
+            cgh.depends_on(ev);
+        }
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+
+    return ht_ev;
+}
+
+template <typename fnT, typename T, typename RealT>
+struct SyevdBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::SyevdTypePairSupportFactory<T, RealT>::is_defined)
+        {
+            return syevd_batch_impl<T, RealT>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+using evd::evd_batch_impl_fn_ptr_t;
+
+void init_syevd_batch(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+
+    static evd_batch_impl_fn_ptr_t
+        syevd_batch_dispatch_table[dpctl_td_ns::num_types]
+                                  [dpctl_td_ns::num_types];
+
+    {
+        evd::init_evd_batch_dispatch_table<evd_batch_impl_fn_ptr_t,
+                                           SyevdBatchContigFactory>(
+            syevd_batch_dispatch_table);
+
+        auto syevd_batch_pyapi =
+            [&](sycl::queue &exec_q, const std::int8_t jobz,
+                const std::int8_t upper_lower, arrayT &eig_vecs,
+                arrayT &eig_vals, const event_vecT &depends = {}) {
+                return evd::evd_batch_func(exec_q, jobz, upper_lower, eig_vecs,
+                                           eig_vals, depends,
+                                           syevd_batch_dispatch_table);
+            };
+        m.def("_syevd_batch", syevd_batch_pyapi,
+              "Call `syevd` from OneMKL LAPACK library in a loop to return "
+              "the eigenvalues and eigenvectors of a batch of real symmetric "
+              "matrices",
+              py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"),
+              py::arg("eig_vecs"), py::arg("eig_vals"),
+              py::arg("depends") = py::list());
+    }
+}
+} // namespace dpnp::extensions::lapack
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.hpp b/dpnp/backend/extensions/lapack/syevd_batch.hpp
new file mode 100644
index 000000000000..04e1a3c035fc
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/syevd_batch.hpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "evd_batch_common.hpp"
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::lapack
+{
+void init_syevd_batch(py::module_ m);
+} // namespace dpnp::extensions::lapack
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 10e93db2e1a6..2ed24ff3251e 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -96,22 +96,6 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type):
 
     """
 
-    is_cpu_device = a.sycl_device.has_aspect_cpu
-    orig_shape = a.shape
-    # get 3d input array by reshape
-    a = dpnp.reshape(a, (-1, orig_shape[-2], orig_shape[-1]))
-    a_usm_arr = dpnp.get_usm_ndarray(a)
-
-    # allocate a memory for dpnp array of eigenvalues
-    w = dpnp.empty_like(
-        a,
-        shape=orig_shape[:-1],
-        dtype=w_type,
-    )
-    w_orig_shape = w.shape
-    # get 2d dpnp array with eigenvalues by reshape
-    w = w.reshape(-1, w_orig_shape[-1])
-
     # `eigen_mode` can be either "N" or "V", specifying the computation mode
     # for OneMKL LAPACK `syevd` and `heevd` routines.
     # "V" (default) means both eigenvectors and eigenvalues will be calculated
@@ -119,62 +103,67 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type):
     jobz = _jobz[eigen_mode]
     uplo = _upper_lower[UPLO]
 
-    # Get LAPACK function (_syevd for real or _heevd for complex data types)
+    # Get LAPACK function (_syevd_batch for real or _heevd_batch
+    # for complex data types)
     # to compute all eigenvalues and, optionally, all eigenvectors
     lapack_func = (
-        "_heevd" if dpnp.issubdtype(v_type, dpnp.complexfloating) else "_syevd"
+        "_heevd_batch"
+        if dpnp.issubdtype(v_type, dpnp.complexfloating)
+        else "_syevd_batch"
     )
 
     a_sycl_queue = a.sycl_queue
-    a_order = "C" if a.flags.c_contiguous else "F"
+
+    a_orig_shape = a.shape
+    a_orig_order = "C" if a.flags.c_contiguous else "F"
+    # get 3d input array by reshape
+    a = dpnp.reshape(a, (-1, a_orig_shape[-2], a_orig_shape[-1]))
 
     _manager = dpu.SequentialOrderManager[a_sycl_queue]
+    dep_evs = _manager.submitted_events
 
-    # need to loop over the 1st dimension to get eigenvalues and
-    # eigenvectors of 3d matrix A
-    batch_size = a.shape[0]
-    eig_vecs = [None] * batch_size
-    for i in range(batch_size):
-        # oneMKL LAPACK assumes fortran-like array as input, so
-        # allocate a memory with 'F' order for dpnp array of eigenvectors
-        eig_vecs[i] = dpnp.empty_like(a[i], order="F", dtype=v_type)
+    # Reorder the elements by moving the last two axes of `a` to the front
+    # to match fortran-like array order which is assumed by syevd/heevd.
+    a = dpnp.moveaxis(a, (-2, -1), (0, 1))
+    a_usm_arr = dpnp.get_usm_ndarray(a)
 
-        # use DPCTL tensor function to fill the array of eigenvectors with
-        # content of input array
-        ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
-            src=a_usm_arr[i],
-            dst=eig_vecs[i].get_array(),
-            sycl_queue=a_sycl_queue,
-            depends=_manager.submitted_events,
-        )
-        _manager.add_event_pair(ht_ev, copy_ev)
+    a_copy = dpnp.empty_like(a, dtype=v_type, order="F")
+    ht_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr,
+        dst=a_copy.get_array(),
+        sycl_queue=a_sycl_queue,
+        depends=dep_evs,
+    )
+    _manager.add_event_pair(ht_ev, a_copy_ev)
 
-        # TODO: Remove this w/a when MKLD-17201 is solved.
-        # Waiting for a host task executing an OneMKL LAPACK syevd/heevd call
-        # on CPU causes deadlock due to serialization of all host tasks
-        # in the queue.
-        # We need to wait for each host tasks before calling _seyvd and _heevd
-        # to avoid deadlock.
-        if is_cpu_device:
-            dpnp.synchronize_array_data(a)
+    # allocate a memory for dpnp array of eigenvalues
+    w = dpnp.empty_like(a, shape=a_orig_shape[:-1], dtype=w_type)
+    w_orig_shape = w.shape
 
-        # call LAPACK extension function to get eigenvalues and
-        # eigenvectors of a portion of matrix A
-        ht_ev, lapack_ev = getattr(li, lapack_func)(
-            a_sycl_queue,
-            jobz,
-            uplo,
-            eig_vecs[i].get_array(),
-            w[i].get_array(),
-            depends=[copy_ev],
-        )
-        _manager.add_event_pair(ht_ev, lapack_ev)
+    # get 2d dpnp array with eigenvalues by reshape
+    w = w.reshape(-1, w_orig_shape[-1])
+
+    ht_ev, evd_batch_ev = getattr(li, lapack_func)(
+        a_sycl_queue,
+        jobz,
+        uplo,
+        a_copy.get_array(),
+        w.get_array(),
+        depends=[a_copy_ev],
+    )
+
+    _manager.add_event_pair(ht_ev, evd_batch_ev)
 
     w = w.reshape(w_orig_shape)
 
     if eigen_mode == "V":
-        # combine the list of eigenvectors into a single array
-        v = dpnp.array(eig_vecs, order=a_order).reshape(orig_shape)
+        # syevd/heevd call overwtires `a` in Fortran order, reorder the axes
+        # to match C order by moving the last axis to the front and
+        # reshape it back to the original shape of `a`.
+        v = dpnp.moveaxis(a_copy, -1, 0).reshape(a_orig_shape)
+        # Convert to contiguous to align with NumPy
+        if a_orig_order == "C":
+            v = dpnp.ascontiguousarray(v)
         return w, v
     return w
 

From 2da4e5992bae3053c91e4b99b1e2bbe0f9b6699a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 11:31:57 +0200
Subject: [PATCH 02/17] Move include dpctl type_utils header to sourse files

---
 dpnp/backend/extensions/lapack/evd_batch_common.hpp | 1 -
 dpnp/backend/extensions/lapack/evd_common.hpp       | 1 -
 dpnp/backend/extensions/lapack/heevd.cpp            | 3 +++
 dpnp/backend/extensions/lapack/heevd_batch.cpp      | 3 +++
 dpnp/backend/extensions/lapack/syevd.cpp            | 3 +++
 dpnp/backend/extensions/lapack/syevd_batch.cpp      | 3 +++
 6 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 0de1b2fd9796..c22f0bb1c953 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -32,7 +32,6 @@
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
-#include "utils/type_utils.hpp"
 
 #include "types_matrix.hpp"
 
diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index 9ddb20fb8faa..b15deeaeaa0b 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -32,7 +32,6 @@
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
-#include "utils/type_utils.hpp"
 
 #include "types_matrix.hpp"
 
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index 285926bb5986..6d246819f0d6 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -25,6 +25,9 @@
 
 #include "heevd.hpp"
 
+// dpctl tensor headers
+#include "utils/type_utils.hpp"
+
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index d447f27b5812..7d4f0d5cb017 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -26,6 +26,9 @@
 #include "heevd_batch.hpp"
 #include "common_helpers.hpp"
 
+// dpctl tensor headers
+#include "utils/type_utils.hpp"
+
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index e30ecf0208d5..527013cf0e62 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -25,6 +25,9 @@
 
 #include "syevd.hpp"
 
+// dpctl tensor headers
+#include "utils/type_utils.hpp"
+
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index 843c129241c9..bdbd2b7f16bc 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -26,6 +26,9 @@
 #include "syevd_batch.hpp"
 #include "common_helpers.hpp"
 
+// dpctl tensor headers
+#include "utils/type_utils.hpp"
+
 namespace dpnp::extensions::lapack
 {
 namespace mkl_lapack = oneapi::mkl::lapack;

From 88249e4245e9e31e46f530ed35ed0138dd131e38 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 11:39:31 +0200
Subject: [PATCH 03/17] Add memory alocation check for scratchpad

---
 dpnp/backend/extensions/lapack/heevd.cpp | 7 +++++--
 dpnp/backend/extensions/lapack/syevd.cpp | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index 6d246819f0d6..9169196de9fa 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -54,13 +54,16 @@ static sycl::event heevd_impl(sycl::queue &exec_q,
         mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
     T *scratchpad = nullptr;
 
+    // Allocate memory for the scratchpad
+    scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+    if (!scratchpad)
+        throw std::runtime_error("Device allocation for scratchpad failed");
+
     std::stringstream error_msg;
     std::int64_t info = 0;
 
     sycl::event heevd_event;
     try {
-        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
-
         heevd_event = mkl_lapack::heevd(
             exec_q,
             jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index 527013cf0e62..f1413f758050 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -54,13 +54,16 @@ static sycl::event syevd_impl(sycl::queue &exec_q,
         mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
     T *scratchpad = nullptr;
 
+    // Allocate memory for the scratchpad
+    scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+    if (!scratchpad)
+        throw std::runtime_error("Device allocation for scratchpad failed");
+
     std::stringstream error_msg;
     std::int64_t info = 0;
 
     sycl::event syevd_event;
     try {
-        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
-
         syevd_event = mkl_lapack::syevd(
             exec_q,
             jobz, // 'jobz == job::vec' means eigenvalues and eigenvectors are

From d222704bf1eec3a6682006d9494886bd8b810256 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 12:23:30 +0200
Subject: [PATCH 04/17] Add more checks for scratchpad_size

---
 dpnp/backend/extensions/lapack/heevd.cpp       | 9 ++++++++-
 dpnp/backend/extensions/lapack/heevd_batch.cpp | 6 ++++++
 dpnp/backend/extensions/lapack/syevd.cpp       | 9 ++++++++-
 dpnp/backend/extensions/lapack/syevd_batch.cpp | 6 ++++++
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index 9169196de9fa..57e0df30f556 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -52,8 +52,15 @@ static sycl::event heevd_impl(sycl::queue &exec_q,
     const std::int64_t lda = std::max<size_t>(1UL, n);
     const std::int64_t scratchpad_size =
         mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
-    T *scratchpad = nullptr;
 
+    if (scratchpad_size <= 0) {
+        throw std::runtime_error(
+            "Invalid scratchpad size: must be greater than zero."
+            "Calculated scratchpad size: " +
+            std::to_string(scratchpad_size));
+    }
+
+    T *scratchpad = nullptr;
     // Allocate memory for the scratchpad
     scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
     if (!scratchpad)
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index 7d4f0d5cb017..78b64ec0a9dd 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -78,6 +78,12 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q,
         if (!scratchpad)
             throw std::runtime_error("Device allocation for scratchpad failed");
     }
+    else {
+        throw std::runtime_error(
+            "Invalid scratchpad size: must be greater than zero."
+            "Calculated scratchpad size: " +
+            std::to_string(scratchpad_size));
+    }
 
     // Computation events to manage dependencies for each linear stream
     std::vector<std::vector<sycl::event>> comp_evs(n_linear_streams, depends);
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index f1413f758050..e317877c4b9f 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -52,8 +52,15 @@ static sycl::event syevd_impl(sycl::queue &exec_q,
     const std::int64_t lda = std::max<size_t>(1UL, n);
     const std::int64_t scratchpad_size =
         mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
-    T *scratchpad = nullptr;
 
+    if (scratchpad_size <= 0) {
+        throw std::runtime_error(
+            "Invalid scratchpad size: must be greater than zero."
+            "Calculated scratchpad size: " +
+            std::to_string(scratchpad_size));
+    }
+
+    T *scratchpad = nullptr;
     // Allocate memory for the scratchpad
     scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
     if (!scratchpad)
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index bdbd2b7f16bc..0ae6a67884c7 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -78,6 +78,12 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q,
         if (!scratchpad)
             throw std::runtime_error("Device allocation for scratchpad failed");
     }
+    else {
+        throw std::runtime_error(
+            "Invalid scratchpad size: must be greater than zero."
+            "Calculated scratchpad size: " +
+            std::to_string(scratchpad_size));
+    }
 
     // Computation events to manage dependencies for each linear stream
     std::vector<std::vector<sycl::event>> comp_evs(n_linear_streams, depends);

From 73265c5c751d49244c3eb4395ffc24e10e6a262e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 12:40:57 +0200
Subject: [PATCH 05/17] Move includes

---
 dpnp/backend/extensions/lapack/heevd.cpp       | 3 +++
 dpnp/backend/extensions/lapack/heevd.hpp       | 3 ---
 dpnp/backend/extensions/lapack/heevd_batch.cpp | 5 ++++-
 dpnp/backend/extensions/lapack/heevd_batch.hpp | 3 ---
 dpnp/backend/extensions/lapack/syevd.cpp       | 3 +++
 dpnp/backend/extensions/lapack/syevd.hpp       | 3 ---
 dpnp/backend/extensions/lapack/syevd_batch.cpp | 5 ++++-
 dpnp/backend/extensions/lapack/syevd_batch.hpp | 3 ---
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index 57e0df30f556..ce2b49aedff0 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <pybind11/stl.h>
+
+#include "evd_common.hpp"
 #include "heevd.hpp"
 
 // dpctl tensor headers
diff --git a/dpnp/backend/extensions/lapack/heevd.hpp b/dpnp/backend/extensions/lapack/heevd.hpp
index 34f1702af00b..25ed7f0a7a34 100644
--- a/dpnp/backend/extensions/lapack/heevd.hpp
+++ b/dpnp/backend/extensions/lapack/heevd.hpp
@@ -26,9 +26,6 @@
 #pragma once
 
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-
-#include "evd_common.hpp"
 
 namespace py = pybind11;
 
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index 78b64ec0a9dd..905434ed5c71 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -23,8 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-#include "heevd_batch.hpp"
+#include <pybind11/stl.h>
+
 #include "common_helpers.hpp"
+#include "evd_batch_common.hpp"
+#include "heevd_batch.hpp"
 
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.hpp b/dpnp/backend/extensions/lapack/heevd_batch.hpp
index 3a3fa4bab991..668c3e6175c6 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.hpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.hpp
@@ -26,9 +26,6 @@
 #pragma once
 
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-
-#include "evd_batch_common.hpp"
 
 namespace py = pybind11;
 
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index e317877c4b9f..9b589bbbf49c 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -23,6 +23,9 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#include <pybind11/stl.h>
+
+#include "evd_common.hpp"
 #include "syevd.hpp"
 
 // dpctl tensor headers
diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp
index 1fb2409834f3..9bbb515ffd4d 100644
--- a/dpnp/backend/extensions/lapack/syevd.hpp
+++ b/dpnp/backend/extensions/lapack/syevd.hpp
@@ -26,9 +26,6 @@
 #pragma once
 
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-
-#include "evd_common.hpp"
 
 namespace py = pybind11;
 
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index 0ae6a67884c7..963d94b9c3e8 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -23,8 +23,11 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-#include "syevd_batch.hpp"
+#include <pybind11/stl.h>
+
 #include "common_helpers.hpp"
+#include "evd_batch_common.hpp"
+#include "syevd_batch.hpp"
 
 // dpctl tensor headers
 #include "utils/type_utils.hpp"
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.hpp b/dpnp/backend/extensions/lapack/syevd_batch.hpp
index 04e1a3c035fc..62d6fbb2e664 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.hpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.hpp
@@ -26,9 +26,6 @@
 #pragma once
 
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
-
-#include "evd_batch_common.hpp"
 
 namespace py = pybind11;
 

From 5eaaba479a179f7401e88398309216f9257158a1 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 13:24:04 +0200
Subject: [PATCH 06/17] Allocate memory for w with expected shape

---
 dpnp/linalg/dpnp_utils_linalg.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 4b4791227ba4..ea42f521aea1 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -118,6 +118,7 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type):
     a_orig_order = "C" if a.flags.c_contiguous else "F"
     # get 3d input array by reshape
     a = dpnp.reshape(a, (-1, a_orig_shape[-2], a_orig_shape[-1]))
+    a_new_shape = a.shape
 
     _manager = dpu.SequentialOrderManager[a_sycl_queue]
     dep_evs = _manager.submitted_events
@@ -136,12 +137,9 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type):
     )
     _manager.add_event_pair(ht_ev, a_copy_ev)
 
-    # allocate a memory for dpnp array of eigenvalues
-    w = dpnp.empty_like(a, shape=a_orig_shape[:-1], dtype=w_type)
-    w_orig_shape = w.shape
-
-    # get 2d dpnp array with eigenvalues by reshape
-    w = w.reshape(-1, w_orig_shape[-1])
+    w_orig_shape = a_orig_shape[:-1]
+    # allocate a memory for 2d dpnp array of eigenvalues
+    w = dpnp.empty_like(a, shape=a_new_shape[:-1], dtype=w_type)
 
     ht_ev, evd_batch_ev = getattr(li, lapack_func)(
         a_sycl_queue,

From fca94705e459a3b0ad22a2bd86e1fc36d5dd31b0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 13:35:53 +0200
Subject: [PATCH 07/17] Applied review comments

---
 dpnp/backend/extensions/lapack/evd_batch_common.hpp |  8 ++++----
 dpnp/backend/extensions/lapack/evd_common.hpp       |  8 ++++----
 dpnp/backend/extensions/lapack/heevd_batch.cpp      | 10 +++++-----
 dpnp/backend/extensions/lapack/syevd_batch.cpp      | 10 +++++-----
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index c22f0bb1c953..00b1eed01ccd 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -114,8 +114,8 @@ std::pair<sycl::event, sycl::event>
                               "overlapping segments of memory");
     }
 
-    bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
-    bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
+    const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
     if (!is_eig_vecs_f_contig) {
         throw py::value_error(
             "An array with input matrix / output eigenvectors "
@@ -127,9 +127,9 @@ std::pair<sycl::event, sycl::event>
     }
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
-    int eig_vecs_type_id =
+    const int eig_vecs_type_id =
         array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
-    int eig_vals_type_id =
+    const int eig_vals_type_id =
         array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
     evd_batch_impl_fn_ptr_t evd_batch_fn =
diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index b15deeaeaa0b..53b9b2a07847 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -108,8 +108,8 @@ std::pair<sycl::event, sycl::event>
                               "overlapping segments of memory");
     }
 
-    bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
-    bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
+    const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
     if (!is_eig_vecs_f_contig) {
         throw py::value_error(
             "An array with input matrix / output eigenvectors "
@@ -121,9 +121,9 @@ std::pair<sycl::event, sycl::event>
     }
 
     auto array_types = dpctl_td_ns::usm_ndarray_types();
-    int eig_vecs_type_id =
+    const int eig_vecs_type_id =
         array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
-    int eig_vals_type_id =
+    const int eig_vals_type_id =
         array_types.typenum_to_lookup_id(eig_vals.get_typenum());
 
     evd_impl_fn_ptr_t evd_fn =
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index 905434ed5c71..c3b02182d891 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -53,13 +53,13 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q,
     T *a = reinterpret_cast<T *>(in_a);
     RealT *w = reinterpret_cast<RealT *>(out_w);
 
-    std::int64_t a_size = n * n;
-    std::int64_t w_size = n;
+    const std::int64_t a_size = n * n;
+    const std::int64_t w_size = n;
 
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
     // Get the number of independent linear streams
-    std::int64_t n_linear_streams =
+    const std::int64_t n_linear_streams =
         (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1));
 
     const std::int64_t scratchpad_size =
@@ -68,12 +68,12 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q,
 
     // Get padding size to ensure memory allocations are aligned to 256 bytes
     // for better performance
-    std::int64_t padding = 256 / sizeof(T);
+    const std::int64_t padding = 256 / sizeof(T);
 
     if (scratchpad_size > 0) {
         // Calculate the total scratchpad memory size needed for all linear
         // streams with proper alignment
-        size_t alloc_scratch_size =
+        const size_t alloc_scratch_size =
             helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
 
         // Allocate memory for the total scratchpad
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index 963d94b9c3e8..b0efd3764a49 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -53,13 +53,13 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q,
     T *a = reinterpret_cast<T *>(in_a);
     RealT *w = reinterpret_cast<RealT *>(out_w);
 
-    std::int64_t a_size = n * n;
-    std::int64_t w_size = n;
+    const std::int64_t a_size = n * n;
+    const std::int64_t w_size = n;
 
     const std::int64_t lda = std::max<size_t>(1UL, n);
 
     // Get the number of independent linear streams
-    std::int64_t n_linear_streams =
+    const std::int64_t n_linear_streams =
         (batch_size > 16) ? 4 : ((batch_size > 4 ? 2 : 1));
 
     const std::int64_t scratchpad_size =
@@ -68,12 +68,12 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q,
 
     // Get padding size to ensure memory allocations are aligned to 256 bytes
     // for better performance
-    std::int64_t padding = 256 / sizeof(T);
+    const std::int64_t padding = 256 / sizeof(T);
 
     if (scratchpad_size > 0) {
         // Calculate the total scratchpad memory size needed for all linear
         // streams with proper alignment
-        size_t alloc_scratch_size =
+        const size_t alloc_scratch_size =
             helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
 
         // Allocate memory for the total scratchpad

From e52305604a0241fe5310d5e129b1c7bd910c2722 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 16:06:50 +0200
Subject: [PATCH 08/17] Add common_evd_checks to reduce dublicate code

---
 .../extensions/lapack/evd_batch_common.hpp    | 53 ++---------
 dpnp/backend/extensions/lapack/evd_common.hpp | 51 ++--------
 .../extensions/lapack/evd_common_utils.hpp    | 92 +++++++++++++++++++
 3 files changed, 109 insertions(+), 87 deletions(-)
 create mode 100644 dpnp/backend/extensions/lapack/evd_common_utils.hpp

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 00b1eed01ccd..f8948d1c21c0 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -29,10 +29,9 @@
 #include <pybind11/pybind11.h>
 
 // dpctl tensor headers
-#include "utils/memory_overlap.hpp"
-#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
+#include "evd_common_utils.hpp"
 #include "types_matrix.hpp"
 
 namespace dpnp::extensions::lapack::evd
@@ -61,25 +60,18 @@ std::pair<sycl::event, sycl::event>
                    const dispatchT &evd_batch_dispatch_table)
 {
     const int eig_vecs_nd = eig_vecs.get_ndim();
-    const int eig_vals_nd = eig_vals.get_ndim();
-
-    if (eig_vecs_nd != 3) {
-        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) +
-                              " of an output array with eigenvectors");
-    }
-    else if (eig_vals_nd != 2) {
-        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) +
-                              " of an output array with eigenvalues");
-    }
 
     const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw();
     const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw();
 
-    if (eig_vecs_shape[0] != eig_vecs_shape[1]) {
-        throw py::value_error("The batch shape of 'eig_vecs' must be square.");
-    }
-    else if (eig_vecs_shape[2] != eig_vals_shape[0] ||
-             eig_vecs_shape[0] != eig_vals_shape[1])
+    const int expected_eig_vecs_nd = 3;
+    const int expected_eig_vals_nd = 2;
+
+    common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape,
+                      expected_eig_vecs_nd, expected_eig_vals_nd);
+
+    if (eig_vecs_shape[2] != eig_vals_shape[0] ||
+        eig_vecs_shape[0] != eig_vals_shape[1])
     {
         throw py::value_error(
             "The shape of 'eig_vals' must be (batch_size, n), "
@@ -99,33 +91,6 @@ std::pair<sycl::event, sycl::event>
         return std::make_pair(sycl::event(), sycl::event());
     }
 
-    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs);
-    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals);
-
-    // check compatibility of execution queue and allocation queue
-    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) {
-        throw py::value_error(
-            "Execution queue is not compatible with allocation queues");
-    }
-
-    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
-    if (overlap(eig_vecs, eig_vals)) {
-        throw py::value_error("Arrays with eigenvectors and eigenvalues are "
-                              "overlapping segments of memory");
-    }
-
-    const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
-    const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
-    if (!is_eig_vecs_f_contig) {
-        throw py::value_error(
-            "An array with input matrix / output eigenvectors "
-            "must be F-contiguous");
-    }
-    else if (!is_eig_vals_c_contig) {
-        throw py::value_error(
-            "An array with output eigenvalues must be C-contiguous");
-    }
-
     auto array_types = dpctl_td_ns::usm_ndarray_types();
     const int eig_vecs_type_id =
         array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index 53b9b2a07847..1945181f2d4c 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -29,10 +29,9 @@
 #include <pybind11/pybind11.h>
 
 // dpctl tensor headers
-#include "utils/memory_overlap.hpp"
-#include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
 
+#include "evd_common_utils.hpp"
 #include "types_matrix.hpp"
 
 namespace dpnp::extensions::lapack::evd
@@ -60,24 +59,17 @@ std::pair<sycl::event, sycl::event>
              const dispatchT &evd_dispatch_table)
 {
     const int eig_vecs_nd = eig_vecs.get_ndim();
-    const int eig_vals_nd = eig_vals.get_ndim();
-
-    if (eig_vecs_nd != 2) {
-        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vecs_nd) +
-                              " of an output array with eigenvectors");
-    }
-    else if (eig_vals_nd != 1) {
-        throw py::value_error("Unexpected ndim=" + std::to_string(eig_vals_nd) +
-                              " of an output array with eigenvalues");
-    }
 
     const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw();
     const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw();
 
-    if (eig_vecs_shape[0] != eig_vecs_shape[1]) {
-        throw py::value_error("Output array with eigenvectors with be square");
-    }
-    else if (eig_vecs_shape[0] != eig_vals_shape[0]) {
+    const int expected_eig_vecs_nd = 2;
+    const int expected_eig_vals_nd = 1;
+
+    common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape,
+                      expected_eig_vecs_nd, expected_eig_vals_nd);
+
+    if (eig_vecs_shape[0] != eig_vals_shape[0]) {
         throw py::value_error(
             "Eigenvectors and eigenvalues have different shapes");
     }
@@ -93,33 +85,6 @@ std::pair<sycl::event, sycl::event>
         return std::make_pair(sycl::event(), sycl::event());
     }
 
-    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs);
-    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals);
-
-    // check compatibility of execution queue and allocation queue
-    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) {
-        throw py::value_error(
-            "Execution queue is not compatible with allocation queues");
-    }
-
-    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
-    if (overlap(eig_vecs, eig_vals)) {
-        throw py::value_error("Arrays with eigenvectors and eigenvalues are "
-                              "overlapping segments of memory");
-    }
-
-    const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
-    const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
-    if (!is_eig_vecs_f_contig) {
-        throw py::value_error(
-            "An array with input matrix / output eigenvectors "
-            "must be F-contiguous");
-    }
-    else if (!is_eig_vals_c_contig) {
-        throw py::value_error(
-            "An array with output eigenvalues must be C-contiguous");
-    }
-
     auto array_types = dpctl_td_ns::usm_ndarray_types();
     const int eig_vecs_type_id =
         array_types.typenum_to_lookup_id(eig_vecs.get_typenum());
diff --git a/dpnp/backend/extensions/lapack/evd_common_utils.hpp b/dpnp/backend/extensions/lapack/evd_common_utils.hpp
new file mode 100644
index 000000000000..0f9bdd1927e3
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/evd_common_utils.hpp
@@ -0,0 +1,92 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/output_validation.hpp"
+
+namespace dpnp::extensions::lapack::evd
+{
+namespace py = pybind11;
+
+inline void common_evd_checks(sycl::queue &exec_q,
+                              dpctl::tensor::usm_ndarray &eig_vecs,
+                              dpctl::tensor::usm_ndarray &eig_vals,
+                              const py::ssize_t *eig_vecs_shape,
+                              const int expected_eig_vecs_nd,
+                              const int expected_eig_vals_nd)
+{
+    const int eig_vecs_nd = eig_vecs.get_ndim();
+    const int eig_vals_nd = eig_vals.get_ndim();
+
+    if (eig_vecs_nd != expected_eig_vecs_nd) {
+        throw py::value_error("The output eigenvectors array has ndim=" +
+                              std::to_string(eig_vecs_nd) + ", but a " +
+                              std::to_string(expected_eig_vecs_nd) +
+                              "-dimensional array is expected.");
+    }
+    else if (eig_vals_nd != expected_eig_vals_nd) {
+        throw py::value_error("The output eigenvalues array has ndim=" +
+                              std::to_string(eig_vals_nd) + ", but a " +
+                              std::to_string(expected_eig_vals_nd) +
+                              "-dimensional array is expected.");
+    }
+
+    if (eig_vecs_shape[0] != eig_vecs_shape[1]) {
+        throw py::value_error("Output array with eigenvectors must be square");
+    }
+
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vecs);
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(eig_vals);
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {eig_vecs, eig_vals})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(eig_vecs, eig_vals)) {
+        throw py::value_error("Arrays with eigenvectors and eigenvalues are "
+                              "overlapping segments of memory");
+    }
+
+    const bool is_eig_vecs_f_contig = eig_vecs.is_f_contiguous();
+    const bool is_eig_vals_c_contig = eig_vals.is_c_contiguous();
+    if (!is_eig_vecs_f_contig) {
+        throw py::value_error(
+            "An array with input matrix / output eigenvectors "
+            "must be F-contiguous");
+    }
+    else if (!is_eig_vals_c_contig) {
+        throw py::value_error(
+            "An array with output eigenvalues must be C-contiguous");
+    }
+}
+} // namespace dpnp::extensions::lapack::evd

From a08036919c3f2c9e93e7f9f8efbcbd094fecc105 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 24 Jul 2024 17:02:35 +0200
Subject: [PATCH 09/17] Remove host_task_events from syevd and heevd

---
 dpnp/backend/extensions/lapack/evd_common.hpp | 10 ++++------
 dpnp/backend/extensions/lapack/heevd.cpp      |  6 ++----
 dpnp/backend/extensions/lapack/syevd.cpp      |  6 ++----
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index 1945181f2d4c..a7c39fb48244 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -42,7 +42,6 @@ typedef sycl::event (*evd_impl_fn_ptr_t)(sycl::queue &,
                                          const std::int64_t,
                                          char *,
                                          char *,
-                                         std::vector<sycl::event> &,
                                          const std::vector<sycl::event> &);
 
 namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
@@ -106,14 +105,13 @@ std::pair<sycl::event, sycl::event>
     const oneapi::mkl::uplo uplo_val =
         static_cast<oneapi::mkl::uplo>(upper_lower);
 
-    std::vector<sycl::event> host_task_events;
     sycl::event evd_ev = evd_fn(exec_q, jobz_val, uplo_val, n, eig_vecs_data,
-                                eig_vals_data, host_task_events, depends);
+                                eig_vals_data, depends);
 
-    sycl::event args_ev = dpctl::utils::keep_args_alive(
-        exec_q, {eig_vecs, eig_vals}, host_task_events);
+    sycl::event ht_ev =
+        dpctl::utils::keep_args_alive(exec_q, {eig_vecs, eig_vals}, {evd_ev});
 
-    return std::make_pair(args_ev, evd_ev);
+    return std::make_pair(ht_ev, evd_ev);
 }
 
 template <typename dispatchT,
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index ce2b49aedff0..a623a0dd182c 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -43,7 +43,6 @@ static sycl::event heevd_impl(sycl::queue &exec_q,
                               const std::int64_t n,
                               char *in_a,
                               char *out_w,
-                              std::vector<sycl::event> &host_task_events,
                               const std::vector<sycl::event> &depends)
 {
     type_utils::validate_type_for_device<T>(exec_q);
@@ -110,14 +109,13 @@ static sycl::event heevd_impl(sycl::queue &exec_q,
         throw std::runtime_error(error_msg.str());
     }
 
-    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+    sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(heevd_event);
         auto ctx = exec_q.get_context();
         cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
     });
-    host_task_events.push_back(clean_up_event);
 
-    return heevd_event;
+    return ht_ev;
 }
 
 template <typename fnT, typename T, typename RealT>
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index 9b589bbbf49c..bbc975b43d0f 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -43,7 +43,6 @@ static sycl::event syevd_impl(sycl::queue &exec_q,
                               const std::int64_t n,
                               char *in_a,
                               char *out_w,
-                              std::vector<sycl::event> &host_task_events,
                               const std::vector<sycl::event> &depends)
 {
     type_utils::validate_type_for_device<T>(exec_q);
@@ -110,14 +109,13 @@ static sycl::event syevd_impl(sycl::queue &exec_q,
         throw std::runtime_error(error_msg.str());
     }
 
-    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+    sycl::event ht_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(syevd_event);
         auto ctx = exec_q.get_context();
         cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
     });
 
-    host_task_events.push_back(clean_up_event);
-    return syevd_event;
+    return ht_ev;
 }
 
 template <typename fnT, typename T, typename RealT>

From cca72b8d03c1ca4663ef069cd31ba7d26e6a547f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 25 Jul 2024 13:27:41 +0200
Subject: [PATCH 10/17] Applied review comments

---
 dpnp/backend/extensions/lapack/evd_batch_common.hpp | 4 ++--
 dpnp/backend/extensions/lapack/evd_common.hpp       | 4 ++--
 dpnp/backend/extensions/lapack/heevd_batch.cpp      | 5 +----
 dpnp/backend/extensions/lapack/syevd_batch.cpp      | 5 +----
 dpnp/linalg/dpnp_utils_linalg.py                    | 6 +++---
 5 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index f8948d1c21c0..71f00528c2b0 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -64,8 +64,8 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw();
     const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw();
 
-    const int expected_eig_vecs_nd = 3;
-    const int expected_eig_vals_nd = 2;
+    constexpr int expected_eig_vecs_nd = 3;
+    constexpr int expected_eig_vals_nd = 2;
 
     common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape,
                       expected_eig_vecs_nd, expected_eig_vals_nd);
diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index a7c39fb48244..cd1ee44047ef 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -62,8 +62,8 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *eig_vecs_shape = eig_vecs.get_shape_raw();
     const py::ssize_t *eig_vals_shape = eig_vals.get_shape_raw();
 
-    const int expected_eig_vecs_nd = 2;
-    const int expected_eig_vals_nd = 1;
+    constexpr int expected_eig_vecs_nd = 2;
+    constexpr int expected_eig_vals_nd = 1;
 
     common_evd_checks(exec_q, eig_vecs, eig_vals, eig_vecs_shape,
                       expected_eig_vecs_nd, expected_eig_vals_nd);
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index c3b02182d891..0cb7066c74ab 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -104,10 +104,7 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q,
 
         std::int64_t stream_id = (batch_id % n_linear_streams);
 
-        T *current_scratch_heevd = nullptr;
-        if (scratchpad != nullptr) {
-            current_scratch_heevd = scratchpad + stream_id * scratchpad_size;
-        }
+        T *current_scratch_heevd = scratchpad + stream_id * scratchpad_size;
 
         // Get the event dependencies for the current stream
         const auto &current_dep = comp_evs[stream_id];
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index b0efd3764a49..0177ef1a1b89 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -104,10 +104,7 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q,
 
         std::int64_t stream_id = (batch_id % n_linear_streams);
 
-        T *current_scratch_syevd = nullptr;
-        if (scratchpad != nullptr) {
-            current_scratch_syevd = scratchpad + stream_id * scratchpad_size;
-        }
+        T *current_scratch_syevd = scratchpad + stream_id * scratchpad_size;
 
         // Get the event dependencies for the current stream
         const auto &current_dep = comp_evs[stream_id];
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index ea42f521aea1..8634e8fcba6a 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -120,14 +120,14 @@ def _batched_eigh(a, UPLO, eigen_mode, w_type, v_type):
     a = dpnp.reshape(a, (-1, a_orig_shape[-2], a_orig_shape[-1]))
     a_new_shape = a.shape
 
-    _manager = dpu.SequentialOrderManager[a_sycl_queue]
-    dep_evs = _manager.submitted_events
-
     # Reorder the elements by moving the last two axes of `a` to the front
     # to match fortran-like array order which is assumed by syevd/heevd.
     a = dpnp.moveaxis(a, (-2, -1), (0, 1))
     a_usm_arr = dpnp.get_usm_ndarray(a)
 
+    _manager = dpu.SequentialOrderManager[a_sycl_queue]
+    dep_evs = _manager.submitted_events
+
     a_copy = dpnp.empty_like(a, dtype=v_type, order="F")
     ht_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
         src=a_usm_arr,

From 19c82fb18bee269157612a566211d0888d93ef5e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 25 Jul 2024 14:19:59 +0200
Subject: [PATCH 11/17] Use init_evd_dispatch_table instead of
 init_evd_batch_dispatch_table

---
 dpnp/backend/extensions/lapack/evd_batch_common.hpp | 12 ------------
 dpnp/backend/extensions/lapack/heevd_batch.cpp      |  5 +++--
 dpnp/backend/extensions/lapack/syevd_batch.cpp      |  5 +++--
 3 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 71f00528c2b0..2c18e527783c 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -123,16 +123,4 @@ std::pair<sycl::event, sycl::event>
 
     return std::make_pair(ht_ev, evd_batch_ev);
 }
-
-template <typename dispatchT,
-          template <typename fnT, typename T, typename RealT>
-          typename factoryT>
-void init_evd_batch_dispatch_table(
-    dispatchT evd_batch_dispatch_table[][dpctl_td_ns::num_types])
-{
-    dpctl_td_ns::DispatchTableBuilder<dispatchT, factoryT,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(evd_batch_dispatch_table);
-}
 } // namespace dpnp::extensions::lapack::evd
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index 0cb7066c74ab..bc788ecad812 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -27,6 +27,7 @@
 
 #include "common_helpers.hpp"
 #include "evd_batch_common.hpp"
+#include "evd_common.hpp"
 #include "heevd_batch.hpp"
 
 // dpctl tensor headers
@@ -191,8 +192,8 @@ void init_heevd_batch(py::module_ m)
                                   [dpctl_td_ns::num_types];
 
     {
-        evd::init_evd_batch_dispatch_table<evd_batch_impl_fn_ptr_t,
-                                           HeevdBatchContigFactory>(
+        evd::init_evd_dispatch_table<evd_batch_impl_fn_ptr_t,
+                                     HeevdBatchContigFactory>(
             heevd_batch_dispatch_table);
 
         auto heevd_batch_pyapi =
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index 0177ef1a1b89..211a84fae510 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -27,6 +27,7 @@
 
 #include "common_helpers.hpp"
 #include "evd_batch_common.hpp"
+#include "evd_common.hpp"
 #include "syevd_batch.hpp"
 
 // dpctl tensor headers
@@ -191,8 +192,8 @@ void init_syevd_batch(py::module_ m)
                                   [dpctl_td_ns::num_types];
 
     {
-        evd::init_evd_batch_dispatch_table<evd_batch_impl_fn_ptr_t,
-                                           SyevdBatchContigFactory>(
+        evd::init_evd_dispatch_table<evd_batch_impl_fn_ptr_t,
+                                     SyevdBatchContigFactory>(
             syevd_batch_dispatch_table);
 
         auto syevd_batch_pyapi =

From d4ce5159df964d836cb76a83449343972bbec7a8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 25 Jul 2024 14:29:47 +0200
Subject: [PATCH 12/17] Move init_evd_dispatch_table to evd_common_utils.hpp

---
 dpnp/backend/extensions/lapack/evd_common.hpp      | 12 ------------
 .../backend/extensions/lapack/evd_common_utils.hpp | 14 ++++++++++++++
 dpnp/backend/extensions/lapack/heevd_batch.cpp     |  1 -
 dpnp/backend/extensions/lapack/syevd_batch.cpp     |  1 -
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index cd1ee44047ef..1ed517665a19 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -113,16 +113,4 @@ std::pair<sycl::event, sycl::event>
 
     return std::make_pair(ht_ev, evd_ev);
 }
-
-template <typename dispatchT,
-          template <typename fnT, typename T, typename RealT>
-          typename factoryT>
-void init_evd_dispatch_table(
-    dispatchT evd_dispatch_table[][dpctl_td_ns::num_types])
-{
-    dpctl_td_ns::DispatchTableBuilder<dispatchT, factoryT,
-                                      dpctl_td_ns::num_types>
-        contig;
-    contig.populate_dispatch_table(evd_dispatch_table);
-}
 } // namespace dpnp::extensions::lapack::evd
diff --git a/dpnp/backend/extensions/lapack/evd_common_utils.hpp b/dpnp/backend/extensions/lapack/evd_common_utils.hpp
index 0f9bdd1927e3..1da74bc4ac53 100644
--- a/dpnp/backend/extensions/lapack/evd_common_utils.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common_utils.hpp
@@ -30,11 +30,25 @@
 // dpctl tensor headers
 #include "utils/memory_overlap.hpp"
 #include "utils/output_validation.hpp"
+#include "utils/type_dispatch.hpp"
 
 namespace dpnp::extensions::lapack::evd
 {
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
 namespace py = pybind11;
 
+template <typename dispatchT,
+          template <typename fnT, typename T, typename RealT>
+          typename factoryT>
+void init_evd_dispatch_table(
+    dispatchT evd_dispatch_table[][dpctl_td_ns::num_types])
+{
+    dpctl_td_ns::DispatchTableBuilder<dispatchT, factoryT,
+                                      dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_table(evd_dispatch_table);
+}
+
 inline void common_evd_checks(sycl::queue &exec_q,
                               dpctl::tensor::usm_ndarray &eig_vecs,
                               dpctl::tensor::usm_ndarray &eig_vals,
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index bc788ecad812..23b08db9265d 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -27,7 +27,6 @@
 
 #include "common_helpers.hpp"
 #include "evd_batch_common.hpp"
-#include "evd_common.hpp"
 #include "heevd_batch.hpp"
 
 // dpctl tensor headers
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index 211a84fae510..de75d58b756e 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -27,7 +27,6 @@
 
 #include "common_helpers.hpp"
 #include "evd_batch_common.hpp"
-#include "evd_common.hpp"
 #include "syevd_batch.hpp"
 
 // dpctl tensor headers

From 5932918f01d00456993fcf407d90dea758fa2813 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 25 Jul 2024 15:11:36 +0200
Subject: [PATCH 13/17] Add helper function check_zeros_shape

---
 dpnp/backend/extensions/lapack/common_helpers.hpp  | 14 ++++++++++++++
 .../backend/extensions/lapack/evd_batch_common.hpp | 11 ++++-------
 dpnp/backend/extensions/lapack/evd_common.hpp      | 11 ++++-------
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp
index ce4c416f27fe..88ff8772d1b1 100644
--- a/dpnp/backend/extensions/lapack/common_helpers.hpp
+++ b/dpnp/backend/extensions/lapack/common_helpers.hpp
@@ -26,10 +26,13 @@
 #pragma once
 #include <complex>
 #include <cstring>
+#include <pybind11/pybind11.h>
 #include <stdexcept>
 
 namespace dpnp::extensions::lapack::helper
 {
+namespace py = pybind11;
+
 template <typename T>
 struct value_type_of
 {
@@ -49,4 +52,15 @@ intT round_up_mult(intT value, intT mult)
     intT q = (value + (mult - 1)) / mult;
     return q * mult;
 }
+
+// Checks if the shape array has any non-zero dimension.
+inline bool check_zeros_shape(int ndim, const py::ssize_t *shape)
+{
+    size_t src_nelems(1);
+
+    for (int i = 0; i < ndim; ++i) {
+        src_nelems *= static_cast<size_t>(shape[i]);
+    }
+    return src_nelems == 0;
+}
 } // namespace dpnp::extensions::lapack::helper
diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 2c18e527783c..c37ffea341d2 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -31,11 +31,14 @@
 // dpctl tensor headers
 #include "utils/type_dispatch.hpp"
 
+#include "common_helpers.hpp"
 #include "evd_common_utils.hpp"
 #include "types_matrix.hpp"
 
 namespace dpnp::extensions::lapack::evd
 {
+using dpnp::extensions::lapack::helper::check_zeros_shape;
+
 typedef sycl::event (*evd_batch_impl_fn_ptr_t)(
     sycl::queue &,
     const oneapi::mkl::job,
@@ -80,13 +83,7 @@ std::pair<sycl::event, sycl::event>
             " and n = " + std::to_string(eig_vecs_shape[1]));
     }
 
-    size_t src_nelems(1);
-
-    for (int i = 0; i < eig_vecs_nd; ++i) {
-        src_nelems *= static_cast<size_t>(eig_vecs_shape[i]);
-    }
-
-    if (src_nelems == 0) {
+    if (check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) {
         // nothing to do
         return std::make_pair(sycl::event(), sycl::event());
     }
diff --git a/dpnp/backend/extensions/lapack/evd_common.hpp b/dpnp/backend/extensions/lapack/evd_common.hpp
index 1ed517665a19..cdccb3e79d3d 100644
--- a/dpnp/backend/extensions/lapack/evd_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_common.hpp
@@ -31,11 +31,14 @@
 // dpctl tensor headers
 #include "utils/type_dispatch.hpp"
 
+#include "common_helpers.hpp"
 #include "evd_common_utils.hpp"
 #include "types_matrix.hpp"
 
 namespace dpnp::extensions::lapack::evd
 {
+using dpnp::extensions::lapack::helper::check_zeros_shape;
+
 typedef sycl::event (*evd_impl_fn_ptr_t)(sycl::queue &,
                                          const oneapi::mkl::job,
                                          const oneapi::mkl::uplo,
@@ -73,13 +76,7 @@ std::pair<sycl::event, sycl::event>
             "Eigenvectors and eigenvalues have different shapes");
     }
 
-    size_t src_nelems(1);
-
-    for (int i = 0; i < eig_vecs_nd; ++i) {
-        src_nelems *= static_cast<size_t>(eig_vecs_shape[i]);
-    }
-
-    if (src_nelems == 0) {
+    if (check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) {
         // nothing to do
         return std::make_pair(sycl::event(), sycl::event());
     }

From 48b2d63c27ec915448d7eae7d190d9bd7fcba6fc Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 25 Jul 2024 15:39:08 +0200
Subject: [PATCH 14/17] Implement alloc_scratchpad function to
 evd_batch_common.hpp

---
 .../extensions/lapack/evd_batch_common.hpp    | 34 +++++++++++++++++--
 .../backend/extensions/lapack/heevd_batch.cpp | 26 ++------------
 .../backend/extensions/lapack/syevd_batch.cpp | 26 ++------------
 3 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index c37ffea341d2..2bee52240657 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -37,8 +37,6 @@
 
 namespace dpnp::extensions::lapack::evd
 {
-using dpnp::extensions::lapack::helper::check_zeros_shape;
-
 typedef sycl::event (*evd_batch_impl_fn_ptr_t)(
     sycl::queue &,
     const oneapi::mkl::job,
@@ -83,7 +81,7 @@ std::pair<sycl::event, sycl::event>
             " and n = " + std::to_string(eig_vecs_shape[1]));
     }
 
-    if (check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) {
+    if (helper::check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) {
         // nothing to do
         return std::make_pair(sycl::event(), sycl::event());
     }
@@ -120,4 +118,34 @@ std::pair<sycl::event, sycl::event>
 
     return std::make_pair(ht_ev, evd_batch_ev);
 }
+
+template <typename T>
+T *alloc_scratchpad(std::int64_t scratchpad_size,
+                    std::int64_t n_linear_streams,
+                    sycl::queue &exec_q)
+{
+    // Get padding size to ensure memory allocations are aligned to 256 bytes
+    // for better performance
+    const std::int64_t padding = 256 / sizeof(T);
+
+    if (scratchpad_size <= 0) {
+        throw std::runtime_error(
+            "Invalid scratchpad size: must be greater than zero."
+            " Calculated scratchpad size: " +
+            std::to_string(scratchpad_size));
+    }
+
+    // Calculate the total scratchpad memory size needed for all linear
+    // streams with proper alignment
+    const size_t alloc_scratch_size =
+        helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
+
+    // Allocate memory for the total scratchpad
+    T *scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
+    if (!scratchpad) {
+        throw std::runtime_error("Device allocation for scratchpad failed");
+    }
+
+    return scratchpad;
+}
 } // namespace dpnp::extensions::lapack::evd
diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp
index 23b08db9265d..bc21b3da35d5 100644
--- a/dpnp/backend/extensions/lapack/heevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp
@@ -64,29 +64,9 @@ static sycl::event heevd_batch_impl(sycl::queue &exec_q,
 
     const std::int64_t scratchpad_size =
         mkl_lapack::heevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
-    T *scratchpad = nullptr;
-
-    // Get padding size to ensure memory allocations are aligned to 256 bytes
-    // for better performance
-    const std::int64_t padding = 256 / sizeof(T);
-
-    if (scratchpad_size > 0) {
-        // Calculate the total scratchpad memory size needed for all linear
-        // streams with proper alignment
-        const size_t alloc_scratch_size =
-            helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
-
-        // Allocate memory for the total scratchpad
-        scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
-        if (!scratchpad)
-            throw std::runtime_error("Device allocation for scratchpad failed");
-    }
-    else {
-        throw std::runtime_error(
-            "Invalid scratchpad size: must be greater than zero."
-            "Calculated scratchpad size: " +
-            std::to_string(scratchpad_size));
-    }
+
+    T *scratchpad =
+        evd::alloc_scratchpad<T>(scratchpad_size, n_linear_streams, exec_q);
 
     // Computation events to manage dependencies for each linear stream
     std::vector<std::vector<sycl::event>> comp_evs(n_linear_streams, depends);
diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp
index de75d58b756e..d2b87fc260cc 100644
--- a/dpnp/backend/extensions/lapack/syevd_batch.cpp
+++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp
@@ -64,29 +64,9 @@ static sycl::event syevd_batch_impl(sycl::queue &exec_q,
 
     const std::int64_t scratchpad_size =
         mkl_lapack::syevd_scratchpad_size<T>(exec_q, jobz, upper_lower, n, lda);
-    T *scratchpad = nullptr;
-
-    // Get padding size to ensure memory allocations are aligned to 256 bytes
-    // for better performance
-    const std::int64_t padding = 256 / sizeof(T);
-
-    if (scratchpad_size > 0) {
-        // Calculate the total scratchpad memory size needed for all linear
-        // streams with proper alignment
-        const size_t alloc_scratch_size =
-            helper::round_up_mult(n_linear_streams * scratchpad_size, padding);
-
-        // Allocate memory for the total scratchpad
-        scratchpad = sycl::malloc_device<T>(alloc_scratch_size, exec_q);
-        if (!scratchpad)
-            throw std::runtime_error("Device allocation for scratchpad failed");
-    }
-    else {
-        throw std::runtime_error(
-            "Invalid scratchpad size: must be greater than zero."
-            "Calculated scratchpad size: " +
-            std::to_string(scratchpad_size));
-    }
+
+    T *scratchpad =
+        evd::alloc_scratchpad<T>(scratchpad_size, n_linear_streams, exec_q);
 
     // Computation events to manage dependencies for each linear stream
     std::vector<std::vector<sycl::event>> comp_evs(n_linear_streams, depends);

From e974eff463ed0354aa6f730d41e5bb684b20abf6 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 26 Jul 2024 11:01:02 +0200
Subject: [PATCH 15/17] Make round_up_mult as inline

---
 dpnp/backend/extensions/lapack/common_helpers.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp
index 88ff8772d1b1..df2b87028e48 100644
--- a/dpnp/backend/extensions/lapack/common_helpers.hpp
+++ b/dpnp/backend/extensions/lapack/common_helpers.hpp
@@ -47,7 +47,7 @@ struct value_type_of<std::complex<T>>
 
 // Rounds up the number `value` to the nearest multiple of `mult`.
 template <typename intT>
-intT round_up_mult(intT value, intT mult)
+inline intT round_up_mult(intT value, intT mult)
 {
     intT q = (value + (mult - 1)) / mult;
     return q * mult;

From 21dc0b0acc176aa42dc6bb84b388fed0b35b8244 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 26 Jul 2024 11:23:12 +0200
Subject: [PATCH 16/17] Add comment for check_zeros_shape

---
 dpnp/backend/extensions/lapack/evd_batch_common.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 2bee52240657..4e8ee4921bc7 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -81,6 +81,7 @@ std::pair<sycl::event, sycl::event>
             " and n = " + std::to_string(eig_vecs_shape[1]));
     }
 
+    // Ensure `batch_size` and `n` are non-zero, otherwise return empty events
     if (helper::check_zeros_shape(eig_vecs_nd, eig_vecs_shape)) {
         // nothing to do
         return std::make_pair(sycl::event(), sycl::event());

From 894ac54843c86c506aceaabcc12e6f7d532ad62e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 26 Jul 2024 11:38:12 +0200
Subject: [PATCH 17/17] Make alloc_scratchpad as inline

---
 dpnp/backend/extensions/lapack/evd_batch_common.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
index 4e8ee4921bc7..5a7d985ff821 100644
--- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp
+++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp
@@ -121,9 +121,9 @@ std::pair<sycl::event, sycl::event>
 }
 
 template <typename T>
-T *alloc_scratchpad(std::int64_t scratchpad_size,
-                    std::int64_t n_linear_streams,
-                    sycl::queue &exec_q)
+inline T *alloc_scratchpad(std::int64_t scratchpad_size,
+                           std::int64_t n_linear_streams,
+                           sycl::queue &exec_q)
 {
     // Get padding size to ensure memory allocations are aligned to 256 bytes
     // for better performance