Adds the ability to use an epilog remainder loop during loop unrolling and makes

this the default behavior.

Patch by Evgeny Stupachenko (evstupac@gmail.com).

Differential Revision: http://reviews.llvm.org/D18158

llvm-svn: 265388
This commit is contained in:
David L Kreitzer
2016-04-05 12:19:35 +00:00
parent 849045f2aa
commit 188de5ae69
15 changed files with 501 additions and 167 deletions

View File

@@ -16,8 +16,8 @@
// case, we need to generate code to execute these 'left over' iterations.
//
// The current strategy generates an if-then-else sequence prior to the
// unrolled loop to execute the 'left over' iterations. Other strategies
// include generate a loop before or after the unrolled loop.
// unrolled loop to execute the 'left over' iterations before or after the
// unrolled loop.
//
//===----------------------------------------------------------------------===//
@@ -60,33 +60,35 @@ STATISTIC(NumRuntimeUnrolled,
/// than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *PrologExit, BasicBlock *PreHeader,
BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
// The new PHI node is inserted in the prolog end basic block.
// The new PHI name is added as an operand of a PHI node in either
// The new PHI node value is added as an operand of a PHI node in either
// the loop header or the loop exit block.
for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
SBI != SBE; ++SBI) {
for (BasicBlock::iterator BBI = (*SBI)->begin();
PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
for (BasicBlock *Succ : successors(Latch)) {
for (Instruction &BBI : *Succ) {
PHINode *PN = dyn_cast<PHINode>(&BBI);
// Exit when we passed all PHI nodes.
if (!PN)
break;
// Add a new PHI node to the prolog end block and add the
// appropriate incoming values.
PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
PrologEnd->getTerminator());
PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
PrologExit->getFirstNonPHI());
// Adding a value to the new PHI node from the original loop preheader.
// This is the value that skips all the prolog code.
if (L->contains(PN)) {
NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
PreHeader);
} else {
NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
}
Value *V = PN->getIncomingValueForBlock(Latch);
@@ -97,22 +99,22 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
}
// Adding a value to the new PHI node from the last prolog block
// that was created.
NewPN->addIncoming(V, LastPrologBB);
NewPN->addIncoming(V, PrologLatch);
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
if (L->contains(PN)) {
PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
} else {
PN->addIncoming(NewPN, PrologEnd);
PN->addIncoming(NewPN, PrologExit);
}
}
}
// Create a branch around the original loop, which is taken if there are no
// iterations remaining to be executed after running the prologue.
Instruction *InsertPt = PrologEnd->getTerminator();
Instruction *InsertPt = PrologExit->getTerminator();
IRBuilder<> B(InsertPt);
assert(Count != 0 && "nonsensical Count!");
@@ -126,25 +128,152 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, Exit, NewPH);
B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
InsertPt->eraseFromParent();
}
/// Connect the unrolling epilog code to the original loop.
/// The unrolling epilog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
/// unroll count is non-zero.
///
/// This function performs the following:
/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
/// - Create PHI nodes at the unrolling loop exit to combine
/// values that exit the unrolling loop code and jump around it.
/// - Update PHI operands in the epilog loop by the new PHI nodes
/// - Branch around the epilog loop if extra iters (ModVal) is zero.
///
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
BasicBlock *Exit, BasicBlock *PreHeader,
BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
ValueToValueMapTy &VMap, DominatorTree *DT,
LoopInfo *LI, bool PreserveLCSSA) {
BasicBlock *Latch = L->getLoopLatch();
assert(Latch && "Loop must have a latch");
BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
// Loop structure should be the following:
//
// PreHeader
// NewPreHeader
// Header
// ...
// Latch
// NewExit (PN)
// EpilogPreHeader
// EpilogHeader
// ...
// EpilogLatch
// Exit (EpilogPN)
// Update PHI nodes at NewExit and Exit.
for (Instruction &BBI : *NewExit) {
PHINode *PN = dyn_cast<PHINode>(&BBI);
// Exit when we passed all PHI nodes.
if (!PN)
break;
// PN should be used in another PHI located in Exit block as
// Exit was split by SplitBlockPredecessors into Exit and NewExit
// Basicaly it should look like:
// NewExit:
// PN = PHI [I, Latch]
// ...
// Exit:
// EpilogPN = PHI [PN, EpilogPreHeader]
//
// There is EpilogPreHeader incoming block instead of NewExit as
// NewExit was spilt 1 more time to get EpilogPreHeader.
assert(PN->hasOneUse() && "The phi should have 1 use");
PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
// Add incoming PreHeader from branch around the Loop
PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
Value *V = PN->getIncomingValueForBlock(Latch);
Instruction *I = dyn_cast<Instruction>(V);
if (I && L->contains(I))
// If value comes from an instruction in the loop add VMap value.
V = VMap[I];
// For the instruction out of the loop, constant or undefined value
// insert value itself.
EpilogPN->addIncoming(V, EpilogLatch);
assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
"EpilogPN should have EpilogPreHeader incoming block");
// Change EpilogPreHeader incoming block to NewExit.
EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
NewExit);
// Now PHIs should look like:
// NewExit:
// PN = PHI [I, Latch], [undef, PreHeader]
// ...
// Exit:
// EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
}
// Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
// Update corresponding PHI nodes in epilog loop.
for (BasicBlock *Succ : successors(Latch)) {
// Skip this as we already updated phis in exit blocks.
if (!L->contains(Succ))
continue;
for (Instruction &BBI : *Succ) {
PHINode *PN = dyn_cast<PHINode>(&BBI);
// Exit when we passed all PHI nodes.
if (!PN)
break;
// Add new PHI nodes to the loop exit block and update epilog
// PHIs with the new PHI values.
PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
NewExit->getFirstNonPHI());
// Adding a value to the new PHI node from the unrolling loop preheader.
NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
// Adding a value to the new PHI node from the unrolling loop latch.
NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);
// Update the existing PHI node operand with the value from the new PHI
// node. Corresponding instruction in epilog loop should be PHI.
PHINode *VPN = cast<PHINode>(VMap[&BBI]);
VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
}
}
Instruction *InsertPt = NewExit->getTerminator();
IRBuilder<> B(InsertPt);
Value *BrLoopExit = B.CreateIsNotNull(ModVal);
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
}
/// Create a clone of the blocks in a loop and connect them together.
/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
/// loop will be created including all cloned blocks, and the iterator of it
/// switches to count NewIter down to 0.
/// If CreateRemainderLoop is false, loop structure will not be cloned,
/// otherwise a new loop will be created including all cloned blocks, and the
/// iterator of it switches to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
///
static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
static void CloneLoopBlocks(Loop *L, Value *NewIter,
const bool CreateRemainderLoop,
const bool UseEpilogRemainder,
BasicBlock *InsertTop, BasicBlock *InsertBot,
BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
LoopInfo *LI) {
BasicBlock *Preheader = L->getLoopPreheader();
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
@@ -152,7 +281,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
if (!UnrollProlog) {
if (CreateRemainderLoop) {
NewLoop = new Loop();
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
@@ -163,7 +292,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
if (NewLoop)
@@ -179,16 +308,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
}
if (Latch == *BB) {
// For the last block, if UnrollProlog is true, create a direct jump to
// InsertBot. If not, create a loop back to cloned head.
// For the last block, if CreateRemainderLoop is false, create a direct
// jump to InsertBot. If not, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
if (UnrollProlog) {
if (!CreateRemainderLoop) {
Builder.CreateBr(InsertBot);
} else {
PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
suffix + ".iter",
FirstLoopBB->getFirstNonPHI());
Value *IdxSub =
Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
@@ -207,9 +337,15 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
if (UnrollProlog) {
VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
if (!CreateRemainderLoop) {
if (UseEpilogRemainder) {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
NewPHI->setIncomingBlock(idx, InsertTop);
NewPHI->removeIncomingValue(Latch, false);
} else {
VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
}
} else {
unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
NewPHI->setIncomingBlock(idx, InsertTop);
@@ -254,7 +390,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
}
}
/// Insert code in the prolog code when unrolling a loop with a
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
@@ -266,6 +402,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
/// the switch instruction is generated.
///
/// ***Prolog case***
/// extraiters = tripcount % loopfactor
/// if (extraiters == 0) jump Loop:
/// else jump Prol
@@ -277,17 +414,35 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
/// ...
/// End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
bool AllowExpensiveTripCount, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
bool PreserveLCSSA) {
// For now, only unroll loops that contain a single exit.
/// ***Epilog case***
/// extraiters = tripcount % loopfactor
/// if (extraiters == tripcount) jump LoopExit:
/// unroll_iters = tripcount - extraiters
/// Loop: LoopBody; (executes unroll_iter times);
/// unroll_iter -= 1
/// if (unroll_iter != 0) jump Loop:
/// LoopExit:
/// if (extraiters == 0) jump EpilExit:
/// Epil: LoopBody; (executes extraiters times)
/// extraiters -= 1 // Omitted if unroll factor is 2.
/// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
/// EpilExit:
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, bool PreserveLCSSA) {
// for now, only unroll loops that contain a single exit
if (!L->getExitingBlock())
return false;
// Make sure the loop is in canonical form, and there is a single
// exit block only.
if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
if (!L->isLoopSimplifyForm())
return false;
BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
if (!Exit)
return false;
// Use Scalar Evolution to compute the trip count. This allows more loops to
@@ -311,8 +466,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
return false;
BasicBlock *Header = L->getHeader();
BasicBlock *PH = L->getLoopPreheader();
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
BasicBlock *PreHeader = L->getLoopPreheader();
BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
const DataLayout &DL = Header->getModule()->getDataLayout();
SCEVExpander Expander(*SE, DL, "loop-unroll");
if (!AllowExpensiveTripCount &&
@@ -330,26 +485,75 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
SE->forgetLoop(ParentLoop);
BasicBlock *Latch = L->getLoopLatch();
// It helps to split the original preheader twice, one for the end of the
// prolog code and one for a new loop preheader.
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
PreHeaderBR = cast<BranchInst>(PH->getTerminator());
// Loop structure is the following:
//
// PreHeader
// Header
// ...
// Latch
// Exit
BasicBlock *NewPreHeader;
BasicBlock *NewExit = nullptr;
BasicBlock *PrologExit = nullptr;
BasicBlock *EpilogPreHeader = nullptr;
BasicBlock *PrologPreHeader = nullptr;
if (UseEpilogRemainder) {
// If epilog remainder
// Split PreHeader to insert a branch around loop for unrolling.
NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
NewPreHeader->setName(PreHeader->getName() + ".new");
// Split Exit to create phi nodes from branch above.
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
DT, LI, PreserveLCSSA);
// Split NewExit to insert epilog remainder loop.
EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
} else {
// If prolog remainder
// Split the original preheader twice to insert prolog remainder loop
PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
PrologPreHeader->setName(Header->getName() + ".prol.preheader");
PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
DT, LI);
PrologExit->setName(Header->getName() + ".prol.loopexit");
// Split PrologExit to get NewPreHeader.
NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
NewPreHeader->setName(PreHeader->getName() + ".new");
}
// Loop structure should be the following:
// Epilog Prolog
//
// PreHeader PreHeader
// *NewPreHeader *PrologPreHeader
// Header *PrologExit
// ... *NewPreHeader
// Latch Header
// *NewExit ...
// *EpilogPreHeader Latch
// Exit Exit
// Calculate conditions for branch around loop for unrolling
// in epilog case and around prolog remainder loop in prolog case.
// Compute the number of extra iterations required, which is:
// extra iterations = run-time trip count % (loop unroll factor + 1)
// extra iterations = run-time trip count % loop unroll factor
PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
Value *ModVal;
// Calculate ModVal = (BECount + 1) % Count.
// Note that TripCount is BECount + 1.
if (isPowerOf2_32(Count)) {
// When Count is power of 2 we don't BECount for epilog case, however we'll
// need it for a branch around unrolling loop for prolog case.
ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
// 1. There are no iterations to be run in the prologue loop.
// 1. There are no iterations to be run in the prolog/epilog loop.
// OR
// 2. The addition computing TripCount overflowed.
//
@@ -371,18 +575,18 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
ConstantInt::get(BECount->getType(), Count),
"xtraiter");
}
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
// Branch to either the extra iterations or the cloned/unrolled loop.
// We will fix up the true branch label when adding loop body copies.
B.CreateCondBr(BranchVal, PEnd, PEnd);
assert(PreHeaderBR->isUnconditional() &&
PreHeaderBR->getSuccessor(0) == PEnd &&
"CFG edges in Preheader are not correct");
Value *CmpOperand =
UseEpilogRemainder ? TripCount :
ConstantInt::get(TripCount->getType(), 0);
Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod");
BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader;
BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
B.CreateCondBr(BranchVal, FirstLoop, SecondLoop);
PreHeaderBR->eraseFromParent();
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog code.
// cloned blocks in the prolog/epilog code
LoopBlocksDFS LoopBlocks(L);
LoopBlocks.perform(LI);
@@ -394,17 +598,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
bool UnrollPrologue = Count == 2;
// For unroll factor 2 remainder loop will have 1 iterations.
// Do not create 1 iteration loop.
bool CreateRemainderLoop = (Count != 2);
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
VMap, LI);
BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
// Insert the cloned blocks into the function just before the original loop.
F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
NewBlocks[0]->getIterator(), F->end());
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
F->getBasicBlockList(),
NewBlocks[0]->getIterator(),
F->end());
// Loop structure should be the following:
// Epilog Prolog
//
// PreHeader PreHeader
// NewPreHeader PrologPreHeader
// Header PrologHeader
// ... ...
// Latch PrologLatch
// NewExit PrologExit
// EpilogPreHeader NewPreHeader
// EpilogHeader Header
// ... ...
// EpilogLatch Latch
// Exit Exit
// Rewrite the cloned instruction operands to use the values created when the
// clone is created.
@@ -415,11 +640,38 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
}
}
// Connect the prolog code to the original loop and update the
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
PreserveLCSSA);
if (UseEpilogRemainder) {
// Connect the epilog code to the original loop and update the
// PHI functions.
ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
EpilogPreHeader, NewPreHeader, VMap, DT, LI,
PreserveLCSSA);
// Update counter in loop for unrolling.
// I should be multiply of Count.
IRBuilder<> B2(NewPreHeader->getTerminator());
Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
B2.SetInsertPoint(LatchBR);
PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
Header->getFirstNonPHI());
Value *IdxSub =
B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
NewIdx->getName() + ".nsub");
Value *IdxCmp;
if (LatchBR->getSuccessor(0) == Header)
IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
else
IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
NewIdx->addIncoming(TestVal, NewPreHeader);
NewIdx->addIncoming(IdxSub, Latch);
LatchBR->setCondition(IdxCmp);
} else {
// Connect the prolog code to the original loop and update the
// PHI functions.
ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
VMap, DT, LI, PreserveLCSSA);
}
NumRuntimeUnrolled++;
return true;
}