From 652ae2382ecba88ebbfe12351624ea04c541e035 Mon Sep 17 00:00:00 2001 From: ssjia Date: Tue, 3 Feb 2026 09:36:05 -0800 Subject: [PATCH 1/3] [ET-VK] Refactor Context::flush() and make ComputeGraph destructor exception-safe Refactored Context::flush() by extracting two new public functions: - wait_for_queue(): blocks until the GPU queue is idle - clear_resources(): clears command/descriptor pools and cleanup lists This allows callers to use these operations independently. ComputeGraph's destructor now uses these functions directly and wraps them in try/catch blocks to ensure it never throws exceptions, following C++ best practices for destructors. Authored with Claude. Differential Revision: [D92171362](https://our.internmc.facebook.com/intern/diff/D92171362/) ghstack-source-id: 337901647 Pull Request resolved: https://github.com/pytorch/executorch/pull/17158 --- backends/vulkan/runtime/api/Context.cpp | 10 ++++++++-- backends/vulkan/runtime/api/Context.h | 4 ++++ .../vulkan/runtime/graph/ComputeGraph.cpp | 19 ++++++++++++++----- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp index 326391424df..80aef97fc04 100644 --- a/backends/vulkan/runtime/api/Context.cpp +++ b/backends/vulkan/runtime/api/Context.cpp @@ -226,13 +226,14 @@ void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) { } } -void Context::flush() { +void Context::wait_for_queue() { VK_CHECK(vkQueueWaitIdle(queue().handle)); +} +void Context::clear_resources() { command_pool_.flush(); descriptor_pool_.flush(); - // If there is an existing command buffer, invalidate it if (cmd_) { cmd_.invalidate(); } @@ -243,6 +244,11 @@ void Context::flush() { images_to_clear_.clear(); } +void Context::flush() { + wait_for_queue(); + clear_resources(); +} + bool available() { return context(); } diff --git a/backends/vulkan/runtime/api/Context.h b/backends/vulkan/runtime/api/Context.h index 5764cb6a894..49003435e3b 100644 --- a/backends/vulkan/runtime/api/Context.h +++ b/backends/vulkan/runtime/api/Context.h @@ -232,6 +232,10 @@ class Context final { return cmd_; } + void wait_for_queue(); + + void clear_resources(); + void flush(); #if defined(VK_KHR_pipeline_executable_properties) && \ diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 3a5aabb1c7d..bb2df30a174 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -183,13 +183,22 @@ ComputeGraph::ComputeGraph(GraphConfig config) } ComputeGraph::~ComputeGraph() { - values_.clear(); + // Wait for all currently executing commands to complete before cleaning up. + // If wait_for_queue() throws an exception, still proceed with cleanup. + try { + context_->wait_for_queue(); + } catch (...) { + } - prepack_nodes_.clear(); - execute_nodes_.clear(); - clear_deferred_cmds(); + // Wrap in try/catch to ensure that destructor does not throw + try { + values_.clear(); - context_->flush(); + prepack_nodes_.clear(); + execute_nodes_.clear(); + clear_deferred_cmds(); + } catch (...) { + } } std::vector ComputeGraph::extract_int_or_symint_list( From 08f1b085149c10cf6bc764aae0d2ca86c0b7ce2d Mon Sep 17 00:00:00 2001 From: ssjia Date: Tue, 3 Feb 2026 09:36:09 -0800 Subject: [PATCH 2/3] [ET-VK] Fix pipeline indexing bug in batch create_pipelines The original code created pipelines for all descriptors but used a separate index counter during cache insertion that only incremented for non-cached entries. This caused incorrect pipeline handles to be associated with keys when some descriptors were already cached, and leaked handles for duplicates. Fix by filtering out already-cached descriptors upfront, ensuring 1:1 correspondence between the created pipelines array and cache insertions. Authored with Claude. Differential Revision: [D92171360](https://our.internmc.facebook.com/intern/diff/D92171360/) ghstack-source-id: 337901649 Pull Request resolved: https://github.com/pytorch/executorch/pull/17159 --- backends/vulkan/runtime/vk_api/Pipeline.cpp | 31 ++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/backends/vulkan/runtime/vk_api/Pipeline.cpp b/backends/vulkan/runtime/vk_api/Pipeline.cpp index 6fa85924223..522c4b8589b 100644 --- a/backends/vulkan/runtime/vk_api/Pipeline.cpp +++ b/backends/vulkan/runtime/vk_api/Pipeline.cpp @@ -459,7 +459,22 @@ void ComputePipelineCache::create_pipelines( const std::unordered_set& descriptors) { std::lock_guard lock(cache_mutex_); - const auto num_pipelines = descriptors.size(); + // Filter out descriptors already in cache to avoid creating duplicate + // pipelines and to ensure correct indexing between created pipelines and + // cache insertion. + std::vector keys_to_create; + keys_to_create.reserve(descriptors.size()); + for (const auto& key : descriptors) { + if (cache_.find(key) == cache_.cend()) { + keys_to_create.push_back(key); + } + } + + if (keys_to_create.empty()) { + return; + } + + const auto num_pipelines = keys_to_create.size(); std::vector pipelines(num_pipelines); std::vector> map_entries; @@ -474,7 +489,7 @@ void ComputePipelineCache::create_pipelines( std::vector create_infos; create_infos.reserve(num_pipelines); - for (auto& key : descriptors) { + for (const auto& key : keys_to_create) { map_entries.push_back(key.specialization_constants.generate_map_entries()); specialization_infos.push_back(VkSpecializationInfo{ @@ -513,14 +528,10 @@ void ComputePipelineCache::create_pipelines( nullptr, pipelines.data())); - uint32_t i = 0; - for (auto& key : descriptors) { - auto it = cache_.find(key); - if (it != cache_.cend()) { - continue; - } - cache_.insert({key, ComputePipelineCache::Value(device_, pipelines[i])}); - ++i; + for (size_t i = 0; i < keys_to_create.size(); ++i) { + cache_.insert( + {keys_to_create[i], + ComputePipelineCache::Value(device_, pipelines[i])}); } } From 65bc680d41701fd1b299f3666fe5342be2a09d7a Mon Sep 17 00:00:00 2001 From: ssjia Date: Tue, 3 Feb 2026 13:09:21 -0800 Subject: [PATCH 3/3] [ET-VK] Add Vulkan 1.0 compatibility for extension feature querying Pull Request resolved: https://github.com/pytorch/executorch/pull/17160 The previous implementation used `vkGetPhysicalDeviceFeatures2` unconditionally, which is only available in Vulkan 1.1+. Devices running Vulkan 1.0 would fail to query extension features correctly. This change refactors PhysicalDevice to detect the API version at runtime and use the appropriate method: - Vulkan 1.0: Uses KHR extension functions (vkGetPhysicalDeviceFeatures2KHR) obtained via vkGetInstanceProcAddr, with graceful fallback if unavailable - Vulkan 1.1+: Uses core vkGetPhysicalDeviceFeatures2 directly Also fixes a bug where shader_int_dot_product_properties was incorrectly chained into the features linked list instead of being queried separately via vkGetPhysicalDeviceProperties2. This PR was authored with Claude. ghstack-source-id: 337979631 @exported-using-ghexport Differential Revision: [D92171361](https://our.internmc.facebook.com/intern/diff/D92171361/) --- backends/vulkan/runtime/vk_api/Adapter.cpp | 2 +- backends/vulkan/runtime/vk_api/Device.cpp | 219 ++++++++++++++++----- backends/vulkan/runtime/vk_api/Device.h | 11 +- backends/vulkan/runtime/vk_api/Runtime.cpp | 2 +- 4 files changed, 180 insertions(+), 54 deletions(-) diff --git a/backends/vulkan/runtime/vk_api/Adapter.cpp b/backends/vulkan/runtime/vk_api/Adapter.cpp index 57fb4252c74..0a5b1601dea 100644 --- a/backends/vulkan/runtime/vk_api/Adapter.cpp +++ b/backends/vulkan/runtime/vk_api/Adapter.cpp @@ -266,7 +266,7 @@ Adapter::Adapter( const uint32_t num_queues, const std::string& cache_data_path) : queue_usage_mutex_{}, - physical_device_(physical_device), + physical_device_(instance, physical_device), queues_{}, queue_usage_{}, queue_mutexes_{}, diff --git a/backends/vulkan/runtime/vk_api/Device.cpp b/backends/vulkan/runtime/vk_api/Device.cpp index 7a3a825f5ec..249038ed51e 100644 --- a/backends/vulkan/runtime/vk_api/Device.cpp +++ b/backends/vulkan/runtime/vk_api/Device.cpp @@ -21,30 +21,40 @@ namespace vkcompute { namespace vkapi { -PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle) - : handle(physical_device_handle), +PhysicalDevice::PhysicalDevice( + VkInstance instance_handle, + VkPhysicalDevice physical_device_handle) + : instance(instance_handle), + handle(physical_device_handle), properties{}, memory_properties{}, #ifdef VK_KHR_16bit_storage shader_16bit_storage{ - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES}, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, + nullptr}, #endif /* VK_KHR_16bit_storage */ #ifdef VK_KHR_8bit_storage shader_8bit_storage{ - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES}, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, + nullptr}, #endif /* VK_KHR_8bit_storage */ #ifdef VK_KHR_shader_float16_int8 shader_float16_int8_types{ - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR}, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, + nullptr}, #endif /* VK_KHR_shader_float16_int8 */ #ifdef VK_KHR_shader_integer_dot_product shader_int_dot_product_features{ - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR}, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR, + nullptr}, shader_int_dot_product_properties{ - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR}, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR, + nullptr}, #endif queue_families{}, num_compute_queues(0), + api_version_major(0), + api_version_minor(0), supports_int16_shader_types(false), supports_int64_shader_types(false), supports_float64_shader_types(false), @@ -57,6 +67,9 @@ PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle) // Extract physical device properties vkGetPhysicalDeviceProperties(handle, &properties); + api_version_major = VK_VERSION_MAJOR(properties.apiVersion); + api_version_minor = VK_VERSION_MINOR(properties.apiVersion); + // Extract fields of interest has_timestamps = properties.limits.timestampComputeAndGraphics; timestamp_period = properties.limits.timestampPeriod; @@ -64,55 +77,18 @@ PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle) vkGetPhysicalDeviceMemoryProperties(handle, &memory_properties); - VkPhysicalDeviceFeatures2 features2{ - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; - - // Create linked list to query availability of extensions - - void* extension_list_top = nullptr; - -#ifdef VK_KHR_16bit_storage - shader_16bit_storage.pNext = extension_list_top; - extension_list_top = &shader_16bit_storage; -#endif /* VK_KHR_16bit_storage */ - -#ifdef VK_KHR_8bit_storage - shader_8bit_storage.pNext = extension_list_top; - extension_list_top = &shader_8bit_storage; -#endif /* VK_KHR_8bit_storage */ - -#ifdef VK_KHR_shader_float16_int8 - shader_float16_int8_types.pNext = extension_list_top; - extension_list_top = &shader_float16_int8_types; -#endif /* VK_KHR_shader_float16_int8 */ - -#ifdef VK_KHR_shader_integer_dot_product - shader_int_dot_product_features.pNext = extension_list_top; - extension_list_top = &shader_int_dot_product_features; - shader_int_dot_product_properties.pNext = extension_list_top; - extension_list_top = &shader_int_dot_product_properties; -#endif /* VK_KHR_shader_integer_dot_product */ - - features2.pNext = extension_list_top; - - vkGetPhysicalDeviceFeatures2(handle, &features2); - - if (features2.features.shaderInt16 == VK_TRUE) { - supports_int16_shader_types = true; - } - if (features2.features.shaderInt64 == VK_TRUE) { - supports_int64_shader_types = true; - } - if (features2.features.shaderFloat64 == VK_TRUE) { - supports_float64_shader_types = true; + if (properties.apiVersion >= VK_API_VERSION_1_1) { + query_extensions_vk_1_1(); + } else { + query_extensions_vk_1_0(); } // Check if there are any memory types have both the HOST_VISIBLE and the // DEVICE_LOCAL property flags const VkMemoryPropertyFlags unified_memory_flags = - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; for (size_t i = 0; i < memory_properties.memoryTypeCount; ++i) { - if (memory_properties.memoryTypes[i].propertyFlags | unified_memory_flags) { + if (memory_properties.memoryTypes[i].propertyFlags & unified_memory_flags) { has_unified_memory = true; break; } @@ -153,6 +129,149 @@ PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle) } } +void PhysicalDevice::query_extensions_vk_1_0() { + VkPhysicalDeviceFeatures features{}; + vkGetPhysicalDeviceFeatures(handle, &features); + + if (features.shaderInt16 == VK_TRUE) { + supports_int16_shader_types = true; + } + if (features.shaderInt64 == VK_TRUE) { + supports_int64_shader_types = true; + } + if (features.shaderFloat64 == VK_TRUE) { + supports_float64_shader_types = true; + } + + // Try to query extension features using + // VK_KHR_get_physical_device_properties2 + auto vkGetPhysicalDeviceFeatures2KHR = + (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr( + instance, "vkGetPhysicalDeviceFeatures2KHR"); + + if (vkGetPhysicalDeviceFeatures2KHR == nullptr) { + // Manually set extension features to false if the function is not available +#ifdef VK_KHR_16bit_storage + shader_16bit_storage.storageBuffer16BitAccess = VK_FALSE; + shader_16bit_storage.uniformAndStorageBuffer16BitAccess = VK_FALSE; + shader_16bit_storage.storagePushConstant16 = VK_FALSE; + shader_16bit_storage.storageInputOutput16 = VK_FALSE; +#endif /* VK_KHR_16bit_storage */ +#ifdef VK_KHR_8bit_storage + shader_8bit_storage.storageBuffer8BitAccess = VK_FALSE; + shader_8bit_storage.uniformAndStorageBuffer8BitAccess = VK_FALSE; + shader_8bit_storage.storagePushConstant8 = VK_FALSE; +#endif /* VK_KHR_8bit_storage */ +#ifdef VK_KHR_shader_float16_int8 + shader_float16_int8_types.shaderFloat16 = VK_FALSE; + shader_float16_int8_types.shaderInt8 = VK_FALSE; +#endif /* VK_KHR_shader_float16_int8 */ +#ifdef VK_KHR_shader_integer_dot_product + shader_int_dot_product_features.shaderIntegerDotProduct = VK_FALSE; +#endif /* VK_KHR_shader_integer_dot_product */ + return; + } + + VkPhysicalDeviceFeatures2KHR features2{ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR}; + + void* extension_list_top = nullptr; + +#ifdef VK_KHR_16bit_storage + shader_16bit_storage.pNext = extension_list_top; + extension_list_top = &shader_16bit_storage; +#endif /* VK_KHR_16bit_storage */ + +#ifdef VK_KHR_8bit_storage + shader_8bit_storage.pNext = extension_list_top; + extension_list_top = &shader_8bit_storage; +#endif /* VK_KHR_8bit_storage */ + +#ifdef VK_KHR_shader_float16_int8 + shader_float16_int8_types.pNext = extension_list_top; + extension_list_top = &shader_float16_int8_types; +#endif /* VK_KHR_shader_float16_int8 */ + +#ifdef VK_KHR_shader_integer_dot_product + shader_int_dot_product_features.pNext = extension_list_top; + extension_list_top = &shader_int_dot_product_features; +#endif /* VK_KHR_shader_integer_dot_product */ + + features2.pNext = extension_list_top; + + vkGetPhysicalDeviceFeatures2KHR(handle, &features2); + + // Query properties separately from features + auto vkGetPhysicalDeviceProperties2KHR = + (PFN_vkGetPhysicalDeviceProperties2KHR)vkGetInstanceProcAddr( + instance, "vkGetPhysicalDeviceProperties2KHR"); + + if (vkGetPhysicalDeviceProperties2KHR != nullptr) { + VkPhysicalDeviceProperties2KHR properties2{ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR}; + +#ifdef VK_KHR_shader_integer_dot_product + shader_int_dot_product_properties.pNext = nullptr; + properties2.pNext = &shader_int_dot_product_properties; +#endif /* VK_KHR_shader_integer_dot_product */ + + vkGetPhysicalDeviceProperties2KHR(handle, &properties2); + } +} + +void PhysicalDevice::query_extensions_vk_1_1() { + VkPhysicalDeviceFeatures2 features2{ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; + + // Create linked list to query availability of extensions + void* extension_list_top = nullptr; + +#ifdef VK_KHR_16bit_storage + shader_16bit_storage.pNext = extension_list_top; + extension_list_top = &shader_16bit_storage; +#endif /* VK_KHR_16bit_storage */ + +#ifdef VK_KHR_8bit_storage + shader_8bit_storage.pNext = extension_list_top; + extension_list_top = &shader_8bit_storage; +#endif /* VK_KHR_8bit_storage */ + +#ifdef VK_KHR_shader_float16_int8 + shader_float16_int8_types.pNext = extension_list_top; + extension_list_top = &shader_float16_int8_types; +#endif /* VK_KHR_shader_float16_int8 */ + +#ifdef VK_KHR_shader_integer_dot_product + shader_int_dot_product_features.pNext = extension_list_top; + extension_list_top = &shader_int_dot_product_features; +#endif /* VK_KHR_shader_integer_dot_product */ + + features2.pNext = extension_list_top; + + vkGetPhysicalDeviceFeatures2(handle, &features2); + + if (features2.features.shaderInt16 == VK_TRUE) { + supports_int16_shader_types = true; + } + if (features2.features.shaderInt64 == VK_TRUE) { + supports_int64_shader_types = true; + } + if (features2.features.shaderFloat64 == VK_TRUE) { + supports_float64_shader_types = true; + } + + // Query properties separately from features + VkPhysicalDeviceProperties2 properties2{ + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; + +#ifdef VK_KHR_shader_integer_dot_product + shader_int_dot_product_properties.pNext = nullptr; + properties2.pNext = &shader_int_dot_product_properties; +#endif /* VK_KHR_shader_integer_dot_product */ + + vkGetPhysicalDeviceProperties2(handle, &properties2); +} + // // DeviceHandle // diff --git a/backends/vulkan/runtime/vk_api/Device.h b/backends/vulkan/runtime/vk_api/Device.h index 917df514c4b..2bc3075ffb4 100644 --- a/backends/vulkan/runtime/vk_api/Device.h +++ b/backends/vulkan/runtime/vk_api/Device.h @@ -27,7 +27,8 @@ enum class DeviceType : uint32_t { }; struct PhysicalDevice final { - // Handle + // Handles + VkInstance instance; VkPhysicalDevice handle; // Properties obtained from Vulkan @@ -56,6 +57,8 @@ struct PhysicalDevice final { // Metadata uint32_t num_compute_queues; + uint32_t api_version_major; + uint32_t api_version_minor; bool supports_int16_shader_types; bool supports_int64_shader_types; bool supports_float64_shader_types; @@ -68,7 +71,11 @@ struct PhysicalDevice final { std::string device_name; DeviceType device_type; - explicit PhysicalDevice(VkPhysicalDevice); + explicit PhysicalDevice(VkInstance instance, VkPhysicalDevice); + + private: + void query_extensions_vk_1_0(); + void query_extensions_vk_1_1(); }; struct DeviceHandle final { diff --git a/backends/vulkan/runtime/vk_api/Runtime.cpp b/backends/vulkan/runtime/vk_api/Runtime.cpp index 8bd4f8843bf..3d3a146d80d 100644 --- a/backends/vulkan/runtime/vk_api/Runtime.cpp +++ b/backends/vulkan/runtime/vk_api/Runtime.cpp @@ -171,7 +171,7 @@ std::vector create_physical_devices( std::vector device_mappings; device_mappings.reserve(device_count); for (VkPhysicalDevice physical_device : devices) { - device_mappings.emplace_back(PhysicalDevice(physical_device), -1); + device_mappings.emplace_back(PhysicalDevice(instance, physical_device), -1); } return device_mappings;