[LV] Avoid redundant operations manipulating masks

The Loop Vectorizer generates redundant operations when manipulating masks:
AND with true, OR with false, compare equal to true. Instead of relying on
a subsequent pass to clean them up, this patch avoids generating them.

Use null (no-mask) to represent all-one full masks, instead of a constant
all-one vector, following the convention of masked gathers and scatters.

Preparing for a follow-up VPlan patch in which these mask manipulating
operations are modeled using recipes.

Differential Revision: https://reviews.llvm.org/D35725

llvm-svn: 309558
This commit is contained in:
Ayal Zaks
2017-07-31 13:21:42 +00:00
parent 058fdd3d49
commit e841b214b1
4 changed files with 57 additions and 72 deletions

View File

@@ -3045,13 +3045,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
PartPtr =
Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
if (Mask[Part]) // The reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr =
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(SI))
if (Legal->isMaskRequired(SI) && Mask[Part])
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
Mask[Part]);
else
@@ -3083,12 +3084,13 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
if (Mask[Part]) // The reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr =
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
if (Legal->isMaskRequired(LI))
if (Legal->isMaskRequired(LI) && Mask[Part])
NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
UndefValue::get(DataTy),
"wide.masked.load");
@@ -3136,10 +3138,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
Value *Cmp = nullptr;
if (IfPredicateInstr) {
Cmp = Cond[Part];
if (Cmp->getType()->isVectorTy())
if (!Cmp) // Block in mask is all-one.
Cmp = Builder.getTrue();
else if (Cmp->getType()->isVectorTy())
Cmp = Builder.CreateExtractElement(Cmp, Builder.getInt32(Lane));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
ConstantInt::get(Cmp->getType(), 1));
}
Instruction *Cloned = Instr->clone();
@@ -4518,24 +4520,22 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
assert(BI && "Unexpected terminator found");
if (BI->isConditional()) {
if (!BI->isConditional())
return EdgeMaskCache[Edge] = SrcMask;
VectorParts EdgeMask(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
if (BI->getSuccessor(0) != Dst)
EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
VectorParts EdgeMask(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
if (BI->getSuccessor(0) != Dst)
EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
if (SrcMask[Part]) // Otherwise block in-mask is all-one, no need to AND.
EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
EdgeMask[Part] = EdgeMaskPart;
}
EdgeMaskCache[Edge] = EdgeMask;
return EdgeMask;
EdgeMask[Part] = EdgeMaskPart;
}
EdgeMaskCache[Edge] = SrcMask;
return SrcMask;
return EdgeMaskCache[Edge] = EdgeMask;
}
InnerLoopVectorizer::VectorParts
@@ -4547,31 +4547,32 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
if (BCEntryIt != BlockMaskCache.end())
return BCEntryIt->second;
// All-one mask is modelled as no-mask following the convention for masked
// load/store/gather/scatter. Initialize BlockMask to no-mask.
VectorParts BlockMask(UF);
for (unsigned Part = 0; Part < UF; ++Part)
BlockMask[Part] = nullptr;
// Loop incoming mask is all-one.
if (OrigLoop->getHeader() == BB) {
Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
if (OrigLoop->getHeader() == BB)
return BlockMaskCache[BB] = BlockMask;
// This is the block mask. We OR all incoming edges.
for (auto *Predecessor : predecessors(BB)) {
VectorParts EdgeMask = createEdgeMask(Predecessor, BB);
if (!EdgeMask[0]) // Mask of predecessor is all-one so mask of block is too.
return BlockMaskCache[BB] = EdgeMask;
if (!BlockMask[0]) { // BlockMask has its initialized nullptr value.
BlockMask = EdgeMask;
continue;
}
for (unsigned Part = 0; Part < UF; ++Part)
BlockMask[Part] = getOrCreateVectorValue(C, Part);
BlockMaskCache[BB] = BlockMask;
return BlockMask;
BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EdgeMask[Part]);
}
// This is the block mask. We OR all incoming edges, and with zero.
Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
for (unsigned Part = 0; Part < UF; ++Part)
BlockMask[Part] = getOrCreateVectorValue(Zero, Part);
// For each pred:
for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) {
VectorParts EM = createEdgeMask(*It, BB);
for (unsigned Part = 0; Part < UF; ++Part)
BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]);
}
BlockMaskCache[BB] = BlockMask;
return BlockMask;
return BlockMaskCache[BB] = BlockMask;
}
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,