Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 50 additions & 50 deletions backends/vulkan/runtime/graph/ops/glsl/permute_texture.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -9,97 +9,97 @@
#version 450 core

${define_required_extensions("texture3d", DTYPE)}
${define_explicit_type_extensions(DTYPE)}

#define PRECISION ${PRECISION}

#define VEC4_T ${texel_type(DTYPE)}
#define T ${buffer_scalar_type(DTYPE)}
#define VEC4_T ${texel_load_type(DTYPE, "texture3d")}
#define T ${texel_load_component_type(DTYPE, "texture3d")}

${define_active_storage_type("texture3d")}

#extension GL_EXT_control_flow_attributes : require

layout(std430) buffer;

#include "indexing_utils.h"
#include "common.glslh"
#include "indexing.glslh"

${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}

${layout_declare_ubo(B, "TextureMetadata", "outp")}
${layout_declare_ubo(B, "TextureMetadata", "inp")}

layout(push_constant) uniform restrict Block {
ivec4 out_sizes;
ivec4 in_sizes;
ivec4 permute_dims; // Permutation mapping: permute_dims[i] = j means output dim i comes from input dim j
ivec4 permute_dims;
};

${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
const lowp int out_packed_dim = unhash_packed_dim(out_layout);

${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
const lowp int in_packed_dim = unhash_packed_dim(in_layout);
${layout_declare_spec_const(C, "int", "out_layout", "CONTIG_LAYOUT_INT")}
${layout_declare_spec_const(C, "int", "in_layout", "CONTIG_LAYOUT_INT")}
const int out_packed_dim = get_packed_dim(out_layout);
const int in_packed_dim = get_packed_dim(in_layout);

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

// Convert output tensor index to input tensor index based on permutation
// Convert output tensor index to input tensor index based on permutation.
// permute_dims[i] = j means output dim i comes from input dim j.
// We write: in_tidx[permute_dims.{x,y,z,w}] = out_tidx.{x,y,z,w}
// This uses literal component access on the push constant (safe) and dynamic
// indexing into the local in_tidx variable (also safe).
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
ivec4 in_tidx;

// Apply the permutation mapping: in_tidx[permute_dims[i]] = out_tidx[i]
in_tidx[permute_dims.x] = out_tidx.x;
in_tidx[permute_dims.y] = out_tidx.y;
in_tidx[permute_dims.z] = out_tidx.z;
in_tidx[permute_dims.w] = out_tidx.w;

return in_tidx;
}

// Check if we can use the fast path where texels from the input tensor can be
// copied directly into the output tensor. This occurs when the packed dimension
// is preserved in the permutation, i.e. reading a texel from the output tensor
// produces 4 texels along the same dimension as reading a texel from the input
// tensor.
bool can_use_fast_path() {
// Fast path is possible when the packed dimension is preserved in the permutation
// This means permute_dims[out_packed_dim] == in_packed_dim
return permute_dims[out_packed_dim] == in_packed_dim;
}

void main() {
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
ivec4 out_tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, out_packed_dim);
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);

if (any(greaterThanEqual(out_tidx, out_sizes))) {
if (out_of_bounds(out_pos, outp)) {
return;
}

if (can_use_fast_path()) {
TensorIndex4D out_tidx =
texture_pos_to_tensor4d_idx_simple(outp, out_pos, out_layout);

// Check if packed dimension is preserved in the permutation. Use safe_idx
// to avoid dynamic indexing of push constant with spec-const-derived index.
const bool fast_path =
safe_idx(permute_dims, out_packed_dim) == in_packed_dim;

if (fast_path) {
// Fast path: packed dimension is preserved, so we can copy texels directly
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
ivec4 in_tidx_data = out_tidx_to_in_tidx(out_tidx.data);
TensorIndex4D in_tidx;
in_tidx.data = in_tidx_data;

write_texel_lpos(t_out, lpos, in_texel, out_axis_map);
}
else {
ivec3 in_pos =
tensor4d_idx_to_texel_pos_simple(inp, in_tidx, in_layout);
VEC4_T in_texel = texelFetch(t_in, in_pos, 0);

imageStore(t_out, out_pos, in_texel);
} else {
// Slow path: packed dimension is not preserved, so each element of the
// output texel may be "sourced" from a different texel in the input tensor.
// Therefore each output texel element is processed individually.
// output texel may come from a different texel in the input tensor.
VEC4_T out_texel = VEC4_T(0);

for (int texel_i = 0; texel_i < 4; ++texel_i) {
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
int element_idx = in_tidx[in_packed_dim] % 4;
for (int comp = 0; comp < 4; comp++) {
ivec4 in_tidx_data = out_tidx_to_in_tidx(out_tidx.data);
TensorIndex4D in_tidx;
in_tidx.data = in_tidx_data;

VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
T selected_value = T(in_texel[element_idx]);
TextureElementIndex in_elem =
tensor4d_idx_to_texture_element_idx_simple(inp, in_tidx, in_layout);

out_texel[texel_i] = selected_value;
VEC4_T in_texel = texelFetch(t_in, in_elem.pos, 0);
out_texel[comp] = in_texel[in_elem.comp];

out_tidx[out_packed_dim]++;
out_tidx.data[out_packed_dim]++;
}

write_texel_lpos(t_out, lpos, out_texel, out_axis_map);
imageStore(t_out, out_pos, out_texel);
}
}
Loading
Loading