You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The lowering of vector shuffles (and all other vector operations) are still decomposed into copies of 32-bit pieces on targets with support for v_pk_mov_b32:
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s
; Not using v_pk_mov_b32
define <4 x float> @shuffle_v4f32_2367(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <4 x float>, ptr addrspace(1) %arg0
%val1 = load <4 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x float> %val0, <4 x float> %val1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x float> %shuffle
}
; Not using v_pk_mov_b32
define <4 x double> @shuffle_v4f64_2367(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <4 x double>, ptr addrspace(1) %arg0
%val1 = load <4 x double>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x double> %val0, <4 x double> %val1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x double> %shuffle
}
define <8 x half> @shuffle_v4f16_2345_891011(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <8 x half>, ptr addrspace(1) %arg0
%val1 = load <8 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <8 x half> %val0, <8 x half> %val1, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11>
ret <8 x half> %shuffle
}
All of these cases codegen using pairs of v_mov_b32, but can compress these into one v_pk_mov_b32 on gfx90a+.
The 2-element 32-bit shuffles can be directly legal:
define <2 x float> @shuffle_v2f32_00(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 0>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_01(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 1>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_02(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 2>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_03(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 0, i32 3>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_10(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 0>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_11(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 1>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_12(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 2>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_3(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 1, i32 3>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_20(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 0>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_21(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 1>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_22(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 2>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_23(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 2, i32 3>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_30(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 0>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_31(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 1>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_32(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 2>
ret <2 x float> %shuffle
}
define <2 x float> @shuffle_v2f32_33(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <2 x float>, ptr addrspace(1) %arg0
%val1 = load <2 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <2 x float> %val0, <2 x float> %val1, <2 x i32> <i32 3, i32 3>
ret <2 x float> %shuffle
}
The text was updated successfully, but these errors were encountered:
The lowering of vector shuffles (and all other vector operations) are still decomposed into copies of 32-bit pieces on targets with support for v_pk_mov_b32:
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s
; Not using v_pk_mov_b32
define <4 x float> @<!-- -->shuffle_v4f32_2367(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <4 x float>, ptr addrspace(1) %arg0
%val1 = load <4 x float>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x float> %val0, <4 x float> %val1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x float> %shuffle
}
; Not using v_pk_mov_b32
define <4 x double> @<!-- -->shuffle_v4f64_2367(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <4 x double>, ptr addrspace(1) %arg0
%val1 = load <4 x double>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x double> %val0, <4 x double> %val1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
ret <4 x double> %shuffle
}
define <8 x half> @<!-- -->shuffle_v4f16_2345_891011(ptr addrspace(1) %arg0, ptr addrspace(1) %arg1) {
%val0 = load <8 x half>, ptr addrspace(1) %arg0
%val1 = load <8 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <8 x half> %val0, <8 x half> %val1, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11>
ret <8 x half> %shuffle
}
All of these cases codegen using pairs of v_mov_b32, but can compress these into one v_pk_mov_b32 on gfx90a+.
The lowering of vector shuffles (and all other vector operations) are still decomposed into copies of 32-bit pieces on targets with support for v_pk_mov_b32:
All of these cases codegen using pairs of v_mov_b32, but can compress these into one v_pk_mov_b32 on gfx90a+.
The 2-element 32-bit shuffles can be directly legal:
The text was updated successfully, but these errors were encountered: