[SLP] Improve horizontal vectorization for non-power-of-2 number of
instructions. If number of instructions in horizontal reduction list is not power of 2 then only PowerOf2Floor(NumberOfInstructions) last elements are actually vectorized, other instructions remain scalar. Patch tries to vectorize the remaining elements either. Differential Revision: https://reviews.llvm.org/D28959 llvm-svn: 293042
This commit is contained in:
@@ -4262,7 +4262,7 @@ public:
|
||||
Builder.setFastMathFlags(Unsafe);
|
||||
unsigned i = 0;
|
||||
|
||||
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
|
||||
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
|
||||
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
|
||||
V.buildTree(VL, ReductionOps);
|
||||
if (V.shouldReorder()) {
|
||||
@@ -4270,7 +4270,7 @@ public:
|
||||
V.buildTree(Reversed, ReductionOps);
|
||||
}
|
||||
if (V.isTreeTinyAndNotFullyVectorizable())
|
||||
continue;
|
||||
break;
|
||||
|
||||
V.computeMinimumValueSizes();
|
||||
|
||||
@@ -4296,6 +4296,8 @@ public:
|
||||
ReducedSubTree, "bin.rdx");
|
||||
} else
|
||||
VectorizedTree = ReducedSubTree;
|
||||
i += ReduxWidth;
|
||||
ReduxWidth = PowerOf2Floor(NumReducedVals - i);
|
||||
}
|
||||
|
||||
if (VectorizedTree) {
|
||||
|
||||
Reference in New Issue
Block a user