From 7969663b52d982b301d4225eaf1e8a92eb62ad6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20M=C3=B6ller?= Date: Tue, 27 Jan 2026 19:37:37 +0100 Subject: [PATCH 1/2] checks if given family queue has compute capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matthias Möller --- src/Manager.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/Manager.cpp b/src/Manager.cpp index b74dc59c..b5facd0c 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -398,6 +398,22 @@ Manager::createDevice(const std::vector& familyQueueIndices, this->mComputeQueueFamilyIndices.push_back(computeQueueFamilyIndex); } else { + std::vector allQueueFamilyProperties = + physicalDevice.getQueueFamilyProperties(); + for (auto queueIndexGiven : familyQueueIndices) { + if (queueIndexGiven >= allQueueFamilyProperties.size()) { + throw std::runtime_error( + "Given family queue index does not exists. Index given: " + + std::to_string(queueIndexGiven)); + } + if (!(allQueueFamilyProperties[queueIndexGiven].queueFlags & + vk::QueueFlagBits::eCompute)) { + throw std::runtime_error( + "Given family queue index does not support compute " + "operations. Index given: " + + std::to_string(queueIndexGiven)); + } + } this->mComputeQueueFamilyIndices = familyQueueIndices; } From e35633fba3a60c3273a86dd03b8be07aeb2c940f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20M=C3=B6ller?= Date: Tue, 27 Jan 2026 19:38:10 +0100 Subject: [PATCH 2/2] test will search for queues with compute capabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Matthias Möller --- test/TestAsyncOperations.cpp | 43 +++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index 92ec664b..db7ff370 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -8,14 +8,32 @@ #include "kompute/logger/Logger.hpp" #include "shaders/Utils.hpp" +namespace { +std::vector +distinctFamilyQueueIndices(const vk::PhysicalDevice& device) +{ + const std::vector allQueueFamilyProperties = + device.getQueueFamilyProperties(); + std::vector distinctQueuesIndices; + + for (uint32_t i = 0; i < allQueueFamilyProperties.size(); i++) { + if (allQueueFamilyProperties[i].queueFlags & + (vk::QueueFlagBits::eCompute)) { + distinctQueuesIndices.push_back(i); + } + } + return distinctQueuesIndices; +} +} + TEST(TestAsyncOperations, TestManagerParallelExecution) { - // This test is built for NVIDIA 1650. It assumes: - // * Queue family 0 and 2 have compute capabilities + // This test assumes: + // * There are at least 2 different Queue families with compute capabilities // * GPU is able to process parallel shader code across different families - uint32_t size = 10; + constexpr uint32_t size = 10; - uint32_t numParallel = 2; + constexpr uint32_t numParallel = 2; std::string shader(R"( #version 450 @@ -79,7 +97,18 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) EXPECT_EQ(inputsSyncB[i]->vector(), resultSync); } - kp::Manager mgrAsync(0, { 0, 2 }); + constexpr uint32_t deviceId = + 0u; // device 0 exists, because "mgr" could be created already + auto queues = distinctFamilyQueueIndices( + mgr.getVkInstance()->enumeratePhysicalDevices().at(deviceId)); + if (queues.size() < numParallel) { + GTEST_SKIP() << "GPU does not support multiple compute queues. Only " + << queues.size() << " are supported. Skipping test."; + } + + queues.resize(numParallel); + + kp::Manager mgrAsync(deviceId, std::move(queues)); std::vector> inputsAsyncB; @@ -118,7 +147,9 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) } // The speedup should be at least 40% - EXPECT_LT(durationAsync, durationSync * 0.6); + EXPECT_LT(durationAsync, durationSync * 0.6) + << "There was no speedup in using multiple queues from different " + "QueueFamilies. Maybe your GPU does not support parallel execution."; } TEST(TestAsyncOperations, TestManagerAsyncExecution)