From ac504c8845ef1109960526b674a80f00a2acd438 Mon Sep 17 00:00:00 2001
From: ssjia <ssjia@devvm26340.ftw0.facebook.com>
Date: Mon, 2 Feb 2026 09:13:57 -0800
Subject: [PATCH] [ET-VK][ez] Implement helper functions to get fastest moving
 dim

Add C++ and GLSL helpers to query the fastest moving dimension (the
dimension with stride 1 in buffer layout). This is useful for optimizing
memory access patterns in shaders, as iterating along the fastest moving
dimension maximizes cache locality.

The C++ `fastest_whcn_dim()` method accounts for block-transposed layouts by
returning `outer_packed_dim` instead of `packed_dim` when applicable. A
corresponding GLSL macro extracts this info from the hashed layout.

Differential Revision: [D92061369](https://our.internmc.facebook.com/intern/diff/D92061369/)

[ghstack-poisoned]
---
 backends/vulkan/runtime/api/containers/Tensor.h       | 10 ++++++++++
 backends/vulkan/runtime/graph/ComputeGraph.h          |  4 ++++
 backends/vulkan/runtime/graph/ops/glsl/indexing.glslh |  2 ++
 3 files changed, 16 insertions(+)

diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h
index 8341097b490..bfe68cd8d60 100644
--- a/backends/vulkan/runtime/api/containers/Tensor.h
+++ b/backends/vulkan/runtime/api/containers/Tensor.h
@@ -513,6 +513,16 @@ class vTensor final {
     return packed_dim_info_.packed_dim;
   }
 
+  /*
+   * Returns the WHCN index of the fastest moving dimension (dim_order[0]).
+   * This is the dimension with stride 1 in the buffer layout.
+   * Note: dim_order_ is in NCHW order, so we convert to WHCN (3 - nchw_dim).
+   */
+  inline int32_t fastest_whcn_dim() const {
+    return packed_dim_info_.block_transposed ? packed_dim_info_.outer_packed_dim
+                                             : packed_dim_info_.packed_dim;
+  }
+
   inline const PackedDimInfo& packed_dim_info() const {
     return packed_dim_info_;
   }
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index 2231b78964a..a7c8cffffd1 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -456,6 +456,10 @@ class ComputeGraph final {
     return values_.at(idx).toConstTensor().packed_dim();
   }
 
+  inline int32_t fastest_whcn_dim_of(const ValueRef idx) const {
+    return values_.at(idx).toConstTensor().fastest_whcn_dim();
+  }
+
   inline const api::PackedDimInfo& packed_dim_info_of(
       const ValueRef idx) const {
     return values_.at(idx).toConstTensor().packed_dim_info();
diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh b/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh
index 24f050b694e..16c4112547c 100644
--- a/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh
+++ b/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh
@@ -70,6 +70,8 @@ bool is_channels_last(const int hashed_layout) {
   return layout_id(hashed_layout) == CHANNELS_LAST_BUFFER_LAYOUT_ID;
 }
 
+#define get_fastest_dim(layout) ((layout) & 0xF)
+
 // Extract packed dim info from hashed_layout (bits 16-31)
 // These match the format created by create_hashed_layout() in Tensor.cpp
 #define get_packed_dim(layout) (((layout) >> 16) & 0xF)