[SLP] Improve horizontal vectorization for non-power-of-2 number of

instructions.

If number of instructions in horizontal reduction list is not power of 2
then only PowerOf2Floor(NumberOfInstructions) last elements are actually
vectorized, other instructions remain scalar. Patch tries to vectorize
the remaining elements either.

Differential Revision: https://reviews.llvm.org/D28959

llvm-svn: 293042
This commit is contained in:
Alexey Bataev
2017-01-25 09:54:38 +00:00
parent 16f1e5f1ca
commit d28ab559a7
2 changed files with 76 additions and 100 deletions

View File

@@ -4262,7 +4262,7 @@ public:
Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ReductionOps);
if (V.shouldReorder()) {
@@ -4270,7 +4270,7 @@ public:
V.buildTree(Reversed, ReductionOps);
}
if (V.isTreeTinyAndNotFullyVectorizable())
continue;
break;
V.computeMinimumValueSizes();
@@ -4296,6 +4296,8 @@ public:
ReducedSubTree, "bin.rdx");
} else
VectorizedTree = ReducedSubTree;
i += ReduxWidth;
ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}
if (VectorizedTree) {