From 18d63631d451f7eb4b4d53b84b72ea577b61d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20M=C3=B6ller?= Date: Fri, 5 Dec 2025 16:20:37 +0100 Subject: [PATCH 01/12] update internal gtest to v1.17.0 (#437) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matthias Möller Signed-off-by: softcookiepp --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4bd75fc7..1f514ec9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,7 +202,7 @@ endif() if(KOMPUTE_OPT_BUILD_TESTS OR KOMPUTE_OPT_ENABLE_BENCHMARK) if(KOMPUTE_OPT_USE_BUILT_IN_GOOGLE_TEST) FetchContent_Declare(googletest GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.11.0) # Source: https://github.com/google/googletest/releases + GIT_TAG v1.17.0) # Source: https://github.com/google/googletest/releases # Use a shared C runtime in case we build shared set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) From cf3363bf7122f753280be5458f4502ca86512fe5 Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Sat, 3 Jan 2026 14:02:25 -0800 Subject: [PATCH 02/12] moved shader module creation to its own class in preparation for cache implementation Signed-off-by: softcookiepp --- src/Algorithm.cpp | 37 +++++-------------------- src/CMakeLists.txt | 4 ++- src/Cache.cpp | 1 + src/Shader.cpp | 28 +++++++++++++++++++ src/include/kompute/Algorithm.hpp | 4 +-- src/include/kompute/Cache.hpp | 27 +++++++++++++++++++ src/include/kompute/Shader.hpp | 45 +++++++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 33 deletions(-) create mode 100644 src/Cache.cpp create mode 100644 src/Shader.cpp create mode 100644 src/include/kompute/Cache.hpp create mode 100644 src/include/kompute/Shader.hpp diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index af12e091..0619f7b1 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -18,7 +18,7 @@ Algorithm::isInit() { return this->mPipeline && this->mPipelineCache && this->mPipelineLayout && this->mDescriptorPool && this->mDescriptorSet && - this->mDescriptorSetLayout && this->mShaderModule; + this->mDescriptorSetLayout && this->mModule; } void @@ -73,18 +73,8 @@ Algorithm::destroy() (vk::Optional)nullptr); this->mPipelineLayout = nullptr; } - - if (this->mFreeShaderModule && this->mShaderModule) { - KP_LOG_DEBUG("Kompute Algorithm Destroying shader module"); - if (!this->mShaderModule) { - KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader " - "module but it is null"); - } - this->mDevice->destroy( - *this->mShaderModule, - (vk::Optional)nullptr); - this->mShaderModule = nullptr; - } + + this->mModule = nullptr; // We don't call freeDescriptorSet as the descriptor pool is not created // with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at @@ -221,22 +211,9 @@ Algorithm::createParameters() void Algorithm::createShaderModule() { - KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); - - vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), - sizeof(uint32_t) * - this->mSpirv.size(), - this->mSpirv.data()); - - KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}", - this->mSpirv.size()); - this->mFreeShaderModule = true; - this->mShaderModule = std::make_shared(); - this->mDevice->createShaderModule( - &shaderModuleInfo, nullptr, this->mShaderModule.get()); - this->mFreeShaderModule = true; - - KP_LOG_DEBUG("Kompute Algorithm create shader module success"); + KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); + this->mModule = std::make_shared(this->mDevice, this->mSpirv); + KP_LOG_DEBUG("Kompute Algorithm create shader module success"); } void @@ -289,7 +266,7 @@ Algorithm::createPipeline() vk::PipelineShaderStageCreateInfo shaderStage( vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, - *this->mShaderModule, + this->mModule->getShaderModule(), "main", &specializationInfo); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8805e842..6b7313f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,7 +19,9 @@ add_library(kompute Algorithm.cpp Tensor.cpp Core.cpp Image.cpp - Memory.cpp) + Memory.cpp + Shader.cpp + Cache.cpp) add_library(kompute::kompute ALIAS kompute) diff --git a/src/Cache.cpp b/src/Cache.cpp new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/Cache.cpp @@ -0,0 +1 @@ + diff --git a/src/Shader.cpp b/src/Shader.cpp new file mode 100644 index 00000000..cfd3f3e3 --- /dev/null +++ b/src/Shader.cpp @@ -0,0 +1,28 @@ +#include "kompute/Shader.hpp" + +namespace kp { + +Module::Module(const std::shared_ptr& device, + const std::vector& spv) : + mDevice(device) +{ + KP_LOG_DEBUG("Kompute Module constructor started"); + KP_LOG_DEBUG("Kompute Module Creating shader module. ShaderFileSize: {}", + spv.size()); + vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), + sizeof(uint32_t) * spv.size(), spv.data()); + this->mDevice.lock()->createShaderModule( + &shaderModuleInfo, nullptr, &(this->mShaderModule) ); + KP_LOG_DEBUG("Kompute Module constructor success"); +} + +Module::~Module() +{ + KP_LOG_DEBUG("Kompute Module destructor started"); + KP_LOG_DEBUG("Kompute Module Destroying shader module"); + if (!mDevice.expired() ) + mDevice.lock()->destroyShaderModule(mShaderModule); + KP_LOG_DEBUG("Kompute Module destructor success"); +} + +} // end namespace kp diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index c459babf..e3fcee8f 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -10,6 +10,7 @@ #endif #include "kompute/Tensor.hpp" +#include "kompute/Shader.hpp" #include "logger/Logger.hpp" namespace kp { @@ -303,8 +304,7 @@ class Algorithm bool mFreeDescriptorPool = false; std::shared_ptr mDescriptorSet; bool mFreeDescriptorSet = false; - std::shared_ptr mShaderModule; - bool mFreeShaderModule = false; + std::shared_ptr mModule = nullptr; std::shared_ptr mPipelineLayout; bool mFreePipelineLayout = false; std::shared_ptr mPipelineCache; diff --git a/src/include/kompute/Cache.hpp b/src/include/kompute/Cache.hpp new file mode 100644 index 00000000..7e8e7555 --- /dev/null +++ b/src/include/kompute/Cache.hpp @@ -0,0 +1,27 @@ +#pragma once +#include "kompute/Core.hpp" + +namespace kp { + +typedef std::shared_ptr shader_module_ptr; +typedef std::shared_ptr pipeline_ptr; + + +class Pipeline + +class PipelineCache +{ + std::map< + // shader module, entry point, spec constant data + std::tuple>, + pipeline_ptr> + mPipelines; +public: + // gets pipeline if it exists; if not, pipeline is created and returned + pipeline_ptr getPipeline(const shader_module_ptr& shaderModule, + const std::string& entryPoint, const std::vector& specConstants); + + void clear() { mPipelines.clear(); } +}; + +} // End namespace kp diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp new file mode 100644 index 00000000..d6c1f804 --- /dev/null +++ b/src/include/kompute/Shader.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include "kompute/Core.hpp" +#include "logger/Logger.hpp" +#include + +namespace kp { + +// forward declarations for std::shared_from_this +class Module; + +/* + * Wrapper for Vulkan's shader modules. + * The purpose of this is to manage the module lifetime, while + * building the groundwork for easily integrating things like + * SPIR-V reflection and multiple entry points in the future. + */ +class Module : public std::enable_shared_from_this +{ + // the vulkan device; not owned by this object + std::weak_ptr mDevice; + + // the shader module handle + vk::ShaderModule mShaderModule; + +public: + + /* + * Constructor accepting a device and a SPIR-V binary + */ + Module(const std::shared_ptr& device, + const std::vector& spv); + + /* + * getter for mShaderModule + */ + vk::ShaderModule& getShaderModule() { return mShaderModule; } + + /* + * Destroys the shader module properly. + */ + ~Module(); +}; + +} // End namespace kp From a9d99bdf93a115d419efba82eae92bc1a72981ba Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Tue, 6 Jan 2026 13:58:32 -0800 Subject: [PATCH 03/12] Removed Cache.cpp, as it will not be used. Signed-off-by: softcookiepp Signed-off-by: softcookiepp --- src/CMakeLists.txt | 3 +-- src/Cache.cpp | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 src/Cache.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6b7313f6..04aa4e77 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,8 +20,7 @@ add_library(kompute Algorithm.cpp Core.cpp Image.cpp Memory.cpp - Shader.cpp - Cache.cpp) + Shader.cpp) add_library(kompute::kompute ALIAS kompute) diff --git a/src/Cache.cpp b/src/Cache.cpp deleted file mode 100644 index 8b137891..00000000 --- a/src/Cache.cpp +++ /dev/null @@ -1 +0,0 @@ - From 4fff1951c093b7ebd981c73b4a77f5e721af5007 Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Thu, 8 Jan 2026 10:24:09 -0800 Subject: [PATCH 04/12] completely forgot to remove Cache.hpp, as it is not used Signed-off-by: softcookiepp --- src/include/kompute/Cache.hpp | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 src/include/kompute/Cache.hpp diff --git a/src/include/kompute/Cache.hpp b/src/include/kompute/Cache.hpp deleted file mode 100644 index 7e8e7555..00000000 --- a/src/include/kompute/Cache.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once -#include "kompute/Core.hpp" - -namespace kp { - -typedef std::shared_ptr shader_module_ptr; -typedef std::shared_ptr pipeline_ptr; - - -class Pipeline - -class PipelineCache -{ - std::map< - // shader module, entry point, spec constant data - std::tuple>, - pipeline_ptr> - mPipelines; -public: - // gets pipeline if it exists; if not, pipeline is created and returned - pipeline_ptr getPipeline(const shader_module_ptr& shaderModule, - const std::string& entryPoint, const std::vector& specConstants); - - void clear() { mPipelines.clear(); } -}; - -} // End namespace kp From 67ba9c350164b04426c2cbe270110fbdba31fe8a Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Thu, 8 Jan 2026 10:54:56 -0800 Subject: [PATCH 05/12] as you wish Signed-off-by: softcookiepp --- src/Algorithm.cpp | 9 ++++---- src/Shader.cpp | 25 +++++++++++++++++----- src/include/kompute/Algorithm.hpp | 2 +- src/include/kompute/Shader.hpp | 35 +++++++++++++++---------------- 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 0619f7b1..33c0b7d5 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -18,7 +18,7 @@ Algorithm::isInit() { return this->mPipeline && this->mPipelineCache && this->mPipelineLayout && this->mDescriptorPool && this->mDescriptorSet && - this->mDescriptorSetLayout && this->mModule; + this->mDescriptorSetLayout && this->mShader; } void @@ -74,7 +74,8 @@ Algorithm::destroy() this->mPipelineLayout = nullptr; } - this->mModule = nullptr; + this->mShader->destroy(); + this->mShader = nullptr; // We don't call freeDescriptorSet as the descriptor pool is not created // with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at @@ -212,7 +213,7 @@ void Algorithm::createShaderModule() { KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); - this->mModule = std::make_shared(this->mDevice, this->mSpirv); + this->mShader = std::make_shared(this->mDevice, this->mSpirv); KP_LOG_DEBUG("Kompute Algorithm create shader module success"); } @@ -266,7 +267,7 @@ Algorithm::createPipeline() vk::PipelineShaderStageCreateInfo shaderStage( vk::PipelineShaderStageCreateFlags(), vk::ShaderStageFlagBits::eCompute, - this->mModule->getShaderModule(), + this->mShader->getShaderModule(), "main", &specializationInfo); diff --git a/src/Shader.cpp b/src/Shader.cpp index cfd3f3e3..bfe604b0 100644 --- a/src/Shader.cpp +++ b/src/Shader.cpp @@ -2,7 +2,7 @@ namespace kp { -Module::Module(const std::shared_ptr& device, +Shader::Shader(const std::shared_ptr& device, const std::vector& spv) : mDevice(device) { @@ -11,18 +11,33 @@ Module::Module(const std::shared_ptr& device, spv.size()); vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(), sizeof(uint32_t) * spv.size(), spv.data()); - this->mDevice.lock()->createShaderModule( + this->mDevice->createShaderModule( &shaderModuleInfo, nullptr, &(this->mShaderModule) ); KP_LOG_DEBUG("Kompute Module constructor success"); } -Module::~Module() +const vk::ShaderModule& Shader::getShaderModule() +{ + if (this->mDestroyed) + throw std::runtime_error("Attempting to get vk::ShaderModule from destroyed kp::Shader instance"); + return this->mShaderModule; +} + +void Shader::destroy() { KP_LOG_DEBUG("Kompute Module destructor started"); KP_LOG_DEBUG("Kompute Module Destroying shader module"); - if (!mDevice.expired() ) - mDevice.lock()->destroyShaderModule(mShaderModule); + if (!this->mDestroyed) + { + this->mDestroyed = true; + this->mDevice->destroyShaderModule(this->mShaderModule); + } KP_LOG_DEBUG("Kompute Module destructor success"); } +Shader::~Shader() +{ + this->destroy(); +} + } // end namespace kp diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index e3fcee8f..eaa437ab 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -304,7 +304,6 @@ class Algorithm bool mFreeDescriptorPool = false; std::shared_ptr mDescriptorSet; bool mFreeDescriptorSet = false; - std::shared_ptr mModule = nullptr; std::shared_ptr mPipelineLayout; bool mFreePipelineLayout = false; std::shared_ptr mPipelineCache; @@ -321,6 +320,7 @@ class Algorithm uint32_t mPushConstantsDataTypeMemorySize = 0; uint32_t mPushConstantsSize = 0; Workgroup mWorkgroup; + std::shared_ptr mShader = nullptr; // Create util functions void createShaderModule(); diff --git a/src/include/kompute/Shader.hpp b/src/include/kompute/Shader.hpp index d6c1f804..2d430391 100644 --- a/src/include/kompute/Shader.hpp +++ b/src/include/kompute/Shader.hpp @@ -7,39 +7,38 @@ namespace kp { // forward declarations for std::shared_from_this -class Module; +class Shader; /* * Wrapper for Vulkan's shader modules. - * The purpose of this is to manage the module lifetime, while - * building the groundwork for easily integrating things like - * SPIR-V reflection and multiple entry points in the future. */ -class Module : public std::enable_shared_from_this +class Shader { - // the vulkan device; not owned by this object - std::weak_ptr mDevice; + // not-owned resources + std::shared_ptr mDevice; - // the shader module handle + // owned resources vk::ShaderModule mShaderModule; + bool mDestroyed = false; public: - /* + /** * Constructor accepting a device and a SPIR-V binary - */ - Module(const std::shared_ptr& device, + * @param device The vk::Device for the shader module to be compiled for + * @param spv The SPIR-V binary + **/ + Shader(const std::shared_ptr& device, const std::vector& spv); - /* + /** * getter for mShaderModule - */ - vk::ShaderModule& getShaderModule() { return mShaderModule; } + **/ + const vk::ShaderModule& getShaderModule(); - /* - * Destroys the shader module properly. - */ - ~Module(); + void destroy(); + + ~Shader(); }; } // End namespace kp From ff7be3495e0790c9c600c8228942a4faae94e466 Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Mon, 12 Jan 2026 09:17:39 -0800 Subject: [PATCH 06/12] Fixed incorrect kp::Manager usage in TestAsyncOperations.cpp Signed-off-by: softcookiepp --- test/TestAsyncOperations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 92ec664b..28d05256 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -86,8 +86,8 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) std::vector> algosAsync; for (uint32_t i = 0; i < numParallel; i++) { - inputsAsyncB.push_back(mgr.tensor(data)); - algosAsync.push_back(mgr.algorithm({ inputsAsyncB[i] }, spirv)); + inputsAsyncB.push_back(mgrAsync.tensor(data)); + algosAsync.push_back(mgrAsync.algorithm({ inputsAsyncB[i] }, spirv)); } std::vector> sqs; From c2518a71c4ecbcd2163cce2663a05d52f588d7cb Mon Sep 17 00:00:00 2001 From: Robert Quill Date: Mon, 12 Jan 2026 13:41:35 +0000 Subject: [PATCH 07/12] Fix incorrect creation of python arrays in Tensor.data (#440) Previously the Python array would just be filled with the first element of the data repeated to fill the array. Fixes a number of the Python tests. Signed-off-by: Robert Quill Signed-off-by: softcookiepp --- python/src/main.cpp | 88 +++++++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 26 deletions(-) diff --git a/python/src/main.cpp b/python/src/main.cpp index 94e4b134..8bec2970 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -163,24 +163,39 @@ PYBIND11_MODULE(kp, m) m, "Tensor", DOC(kp, Tensor)) .def( "data", - [](kp::Tensor& self) { + [](kp::Tensor& self) -> py::array { // Non-owning container exposing the underlying pointer switch (self.dataType()) { case kp::Memory::DataTypes::eFloat: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(float)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eUnsignedInt: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(uint32_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eInt: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(int32_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eDouble: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(double)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eBool: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(bool)}, // strides + self.data(), // ptr + py::cast(&self)); // parent default: throw std::runtime_error( "Kompute Python data type not supported"); @@ -200,30 +215,51 @@ PYBIND11_MODULE(kp, m) m, "Image", DOC(kp, Image)) .def( "data", - [](kp::Image& self) { + [](kp::Image& self) -> py::array { // Non-owning container exposing the underlying pointer switch (self.dataType()) { case kp::Memory::DataTypes::eFloat: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(float)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eUnsignedInt: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(uint32_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eInt: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(int32_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eUnsignedShort: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(uint16_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eShort: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(int16_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eUnsignedChar: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(uint8_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent case kp::Memory::DataTypes::eChar: - return py::array( - self.size(), self.data(), py::cast(&self)); + return py::array_t( + {static_cast(self.size())}, // shape + {sizeof(int8_t)}, // strides + self.data(), // ptr + py::cast(&self)); // parent default: throw std::runtime_error( "Kompute Python data type not supported"); From fdcff9f35c9dd9ef6ecbd3296369da5d2ee9ac51 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 12 Jan 2026 16:01:30 +0100 Subject: [PATCH 08/12] Updated to pybind 3.0.0 (#431) Signed-off-by: Alejandro Saucedo Signed-off-by: softcookiepp --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f514ec9..bd72356f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -226,7 +226,7 @@ endif() if(KOMPUTE_OPT_BUILD_PYTHON) if(KOMPUTE_OPT_USE_BUILT_IN_PYBIND11) FetchContent_Declare(pybind GIT_REPOSITORY https://github.com/pybind/pybind11.git - GIT_TAG v2.9.2 + GIT_TAG v3.0.0 GIT_SHALLOW 1) # Source: https://github.com/pybind/pybind11/releases FetchContent_MakeAvailable(pybind) else() From b6b19e4b5cbeb7c9307f193ad0700bee0d3d3174 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 12 Jan 2026 17:15:39 +0100 Subject: [PATCH 09/12] Clarify status of llama.cpp in README (#446) Updated description for llama.cpp to indicate it is now decommissioned. Signed-off-by: Alejandro Saucedo Signed-off-by: softcookiepp --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ff60ebf..9d7b4cf4 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ ## Projects using Kompute ❤️ 🤖 * [GPT4ALL](https://github.com/nomic-ai/gpt4all) ![](https://img.shields.io/github/stars/nomic-ai/gpt4all.svg?style=social) - An ecosystem of open-source on-edge large language models that run locally on your CPU and nearly any GPU. -* [llama.cpp](https://github.com/ggerganov/llama.cpp) ![](https://img.shields.io/github/stars/ggerganov/llama.cpp.svg?style=social) - Port of Facebook's LLaMA model in C/C++. +* [llama.cpp](https://github.com/ggerganov/llama.cpp) ![](https://img.shields.io/github/stars/ggerganov/llama.cpp.svg?style=social) - Port of Facebook's LLaMA model in C/C++ (now decomissioned). * [tpoisonooo/how-to-optimize-gemm](https://github.com/tpoisonooo/how-to-optimize-gemm) ![](https://img.shields.io/github/stars/tpoisonooo/how-to-optimize-gemm.svg?style=social) - row-major matmul optimization. * [vkJAX](https://github.com/alexander-g/vkJAX) ![](https://img.shields.io/github/stars/alexander-g/vkJAX.svg?style=social) - JAX interpreter for Vulkan. From d94ea8e7bb4ebebba1a3dca8e7884c53f6287351 Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Mon, 12 Jan 2026 11:40:30 -0800 Subject: [PATCH 10/12] Tests pass now Signed-off-by: softcookiepp --- src/Algorithm.cpp | 7 +++++-- src/Manager.cpp | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 33c0b7d5..70db02d4 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -74,8 +74,11 @@ Algorithm::destroy() this->mPipelineLayout = nullptr; } - this->mShader->destroy(); - this->mShader = nullptr; + if (this->mShader) + { + this->mShader->destroy(); + this->mShader = nullptr; + } // We don't call freeDescriptorSet as the descriptor pool is not created // with VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT more at diff --git a/src/Manager.cpp b/src/Manager.cpp index b74dc59c..4327fab7 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -381,11 +381,11 @@ Manager::createDevice(const std::vector& familyQueueIndices, uint32_t computeQueueFamilyIndex = 0; bool computeQueueSupported = false; for (uint32_t i = 0; i < allQueueFamilyProperties.size(); i++) { - vk::QueueFamilyProperties queueFamilyProperties = + const vk::QueueFamilyProperties& queueFamilyProperties = allQueueFamilyProperties[i]; if (queueFamilyProperties.queueFlags & - vk::QueueFlagBits::eCompute) { + (vk::QueueFlagBits::eCompute | vk::QueueFlagBits::eTransfer) ) { computeQueueFamilyIndex = i; computeQueueSupported = true; break; From 7abb72852f56c95ffc04a69b501109fbdfc94b12 Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Wed, 14 Jan 2026 11:02:55 -0800 Subject: [PATCH 11/12] removed mSpirv from kp::Algorithm Signed-off-by: softcookiepp --- src/Algorithm.cpp | 4 ++-- src/include/kompute/Algorithm.hpp | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index 70db02d4..b8f7cbfa 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -213,10 +213,10 @@ Algorithm::createParameters() } void -Algorithm::createShaderModule() +Algorithm::createShaderModule(const std::vector& spirv) { KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); - this->mShader = std::make_shared(this->mDevice, this->mSpirv); + this->mShader = std::make_shared(this->mDevice, spirv); KP_LOG_DEBUG("Kompute Algorithm create shader module success"); } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index eaa437ab..6de6aa06 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -96,7 +96,6 @@ class Algorithm KP_LOG_DEBUG("Kompute Algorithm rebuild started"); this->mMemObjects = memObjects; - this->mSpirv = spirv; if (specializationConstants.size()) { if (this->mSpecializationConstantsData) { @@ -138,7 +137,7 @@ class Algorithm } this->createParameters(); - this->createShaderModule(); + this->createShaderModule(spirv); this->createPipeline(); } @@ -312,7 +311,6 @@ class Algorithm bool mFreePipeline = false; // -------------- ALWAYS OWNED RESOURCES - std::vector mSpirv; void* mSpecializationConstantsData = nullptr; uint32_t mSpecializationConstantsDataTypeMemorySize = 0; uint32_t mSpecializationConstantsSize = 0; @@ -323,7 +321,7 @@ class Algorithm std::shared_ptr mShader = nullptr; // Create util functions - void createShaderModule(); + void createShaderModule(const std::vector& spirv); void createPipeline(); // Parameters From f89f4930d08ba369ab3eeab40a9c3df44db9d7e0 Mon Sep 17 00:00:00 2001 From: softcookiepp Date: Thu, 15 Jan 2026 10:15:39 -0800 Subject: [PATCH 12/12] revered a silly change made based on silly assumptions Signed-off-by: softcookiepp --- src/Manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Manager.cpp b/src/Manager.cpp index 4327fab7..3b8f91a5 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -385,7 +385,7 @@ Manager::createDevice(const std::vector& familyQueueIndices, allQueueFamilyProperties[i]; if (queueFamilyProperties.queueFlags & - (vk::QueueFlagBits::eCompute | vk::QueueFlagBits::eTransfer) ) { + vk::QueueFlagBits::eCompute ) { computeQueueFamilyIndex = i; computeQueueSupported = true; break;