Skip to content

Commit eacc40e

Browse files
authored
Strictly more general shuffle flattening in Hexagon. (#8979)
* Strictly more general shuffle flattening in Hexagon. Vdelta fix doing weird things with replicating don't cares. * clang-format
1 parent e70074c commit eacc40e

2 files changed

Lines changed: 30 additions & 17 deletions

File tree

src/CodeGen_Hexagon.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,15 +1186,16 @@ Value *CodeGen_Hexagon::shuffle_vectors(Value *a, Value *b,
11861186
create_bitcast(a_call->getArgOperand(1), native_ty),
11871187
create_bitcast(a_call->getArgOperand(0), native_ty), indices);
11881188
} else if (ShuffleVectorInst *a_shuffle = dyn_cast<ShuffleVectorInst>(a)) {
1189-
bool is_identity = true;
1190-
for (int i = 0; i < a_elements; i++) {
1191-
int mask_i = a_shuffle->getMaskValue(i);
1192-
is_identity = is_identity && (mask_i == i || mask_i == -1);
1193-
}
1194-
if (is_identity) {
1195-
return shuffle_vectors(a_shuffle->getOperand(0),
1196-
a_shuffle->getOperand(1), indices);
1189+
std::vector<int> new_indices(indices.size());
1190+
for (size_t i = 0; i < indices.size(); i++) {
1191+
if (indices[i] != -1) {
1192+
new_indices[i] = a_shuffle->getMaskValue(indices[i]);
1193+
} else {
1194+
new_indices[i] = -1;
1195+
}
11971196
}
1197+
return shuffle_vectors(a_shuffle->getOperand(0),
1198+
a_shuffle->getOperand(1), new_indices);
11981199
}
11991200
}
12001201

@@ -1516,7 +1517,11 @@ Value *CodeGen_Hexagon::vdelta(Value *lut, const vector<int> &indices) {
15161517
vector<int> i8_indices(indices.size() * replicate);
15171518
for (size_t i = 0; i < indices.size(); i++) {
15181519
for (int j = 0; j < replicate; j++) {
1519-
i8_indices[i * replicate + j] = indices[i] * replicate + j;
1520+
if (indices[i] == -1) {
1521+
i8_indices[i * replicate + j] = -1; // Replicate the don't-care.
1522+
} else {
1523+
i8_indices[i * replicate + j] = indices[i] * replicate + j;
1524+
}
15201525
}
15211526
}
15221527
Value *result = vdelta(i8_lut, i8_indices);

test/correctness/simd_op_check_hvx.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,24 @@ class SimdOpCheckHVX : public SimdOpCheckTest {
5454
isa_version = 62;
5555
}
5656

57+
auto valign_test_u8 = [&](int off) {
58+
return in_u8(x + off) + in_u8(x + off + 1);
59+
};
60+
61+
auto valign_test_u16 = [&](int off) {
62+
return in_u16(x + off) + in_u16(x + off + 1);
63+
};
64+
5765
// Verify that unaligned loads use the right instructions, and don't try to use
5866
// immediates of more than 3 bits.
59-
check("valign(v*,v*,#7)", hvx_width / 1, in_u8(x + 7));
60-
check("vlalign(v*,v*,#7)", hvx_width / 1, in_u8(x + hvx_width - 7));
61-
check("valign(v*,v*,r*)", hvx_width / 1, in_u8(x + 8));
62-
check("valign(v*,v*,r*)", hvx_width / 1, in_u8(x + hvx_width - 8));
63-
check("valign(v*,v*,#6)", hvx_width / 1, in_u16(x + 3));
64-
check("vlalign(v*,v*,#6)", hvx_width / 1, in_u16(x + hvx_width - 3));
65-
check("valign(v*,v*,r*)", hvx_width / 1, in_u16(x + 4));
66-
check("valign(v*,v*,r*)", hvx_width / 1, in_u16(x + hvx_width - 4));
67+
check("valign(v*,v*,#7)", hvx_width / 1, valign_test_u8(6));
68+
check("vlalign(v*,v*,#7)", hvx_width / 1, valign_test_u8(hvx_width - 7));
69+
check("valign(v*,v*,r*)", hvx_width / 1, valign_test_u8(8));
70+
check("valign(v*,v*,r*)", hvx_width / 1, valign_test_u8(hvx_width - 8));
71+
check("valign(v*,v*,#6)", hvx_width / 1, valign_test_u16(3));
72+
check("vlalign(v*,v*,#6)", hvx_width / 1, valign_test_u16(hvx_width - 3));
73+
check("valign(v*,v*,r*)", hvx_width / 1, valign_test_u16(4));
74+
check("valign(v*,v*,r*)", hvx_width / 1, valign_test_u16(hvx_width - 4));
6775

6876
check("vunpack(v*.ub)", hvx_width / 1, u16(u8_1));
6977
check("vunpack(v*.ub)", hvx_width / 1, i16(u8_1));

0 commit comments

Comments
 (0)