Revert "Codegen: Tail-duplicate during placement."
This reverts commit 71c312652c10f1855b28d06697c08d47e7a243e4. llvm-svn: 283647
This commit is contained in:
@@ -186,13 +186,8 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const {
|
|||||||
template<class BlockT, class LoopT>
|
template<class BlockT, class LoopT>
|
||||||
void LoopBase<BlockT, LoopT>::
|
void LoopBase<BlockT, LoopT>::
|
||||||
addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) {
|
addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) {
|
||||||
#ifndef NDEBUG
|
assert((Blocks.empty() || LIB[getHeader()] == this) &&
|
||||||
if (!Blocks.empty()) {
|
"Incorrect LI specified for this loop!");
|
||||||
auto SameHeader = LIB[getHeader()];
|
|
||||||
assert(contains(SameHeader) && getHeader() == SameHeader->getHeader()
|
|
||||||
&& "Incorrect LI specified for this loop!");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
assert(NewBB && "Cannot add a null basic block to the loop!");
|
assert(NewBB && "Cannot add a null basic block to the loop!");
|
||||||
assert(!LIB[NewBB] && "BasicBlock already in the loop!");
|
assert(!LIB[NewBB] && "BasicBlock already in the loop!");
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
#ifndef LLVM_CODEGEN_TAILDUPLICATOR_H
|
#ifndef LLVM_CODEGEN_TAILDUPLICATOR_H
|
||||||
#define LLVM_CODEGEN_TAILDUPLICATOR_H
|
#define LLVM_CODEGEN_TAILDUPLICATOR_H
|
||||||
|
|
||||||
#include "llvm/ADT/STLExtras.h"
|
|
||||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
@@ -38,7 +37,6 @@ class TailDuplicator {
|
|||||||
MachineRegisterInfo *MRI;
|
MachineRegisterInfo *MRI;
|
||||||
MachineFunction *MF;
|
MachineFunction *MF;
|
||||||
bool PreRegAlloc;
|
bool PreRegAlloc;
|
||||||
bool LayoutMode;
|
|
||||||
unsigned TailDupSize;
|
unsigned TailDupSize;
|
||||||
|
|
||||||
// A list of virtual registers for which to update SSA form.
|
// A list of virtual registers for which to update SSA form.
|
||||||
@@ -52,16 +50,10 @@ class TailDuplicator {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
/// Prepare to run on a specific machine function.
|
/// Prepare to run on a specific machine function.
|
||||||
/// @param MF - Function that will be processed
|
/// @param TailDupSize - Maxmimum size of blocks to tail-duplicate.
|
||||||
/// @param MBPI - Branch Probability Info. Used to propagate correct
|
|
||||||
/// probabilities when modifying the CFG.
|
|
||||||
/// @param LayoutMode - When true, don't use the existing layout to make
|
|
||||||
/// decisions.
|
|
||||||
/// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. Zero
|
|
||||||
/// default implies using the command line value TailDupSize.
|
|
||||||
void initMF(MachineFunction &MF,
|
void initMF(MachineFunction &MF,
|
||||||
const MachineBranchProbabilityInfo *MBPI,
|
const MachineBranchProbabilityInfo *MBPI,
|
||||||
bool LayoutMode, unsigned TailDupSize = 0);
|
unsigned TailDupSize = 0);
|
||||||
bool tailDuplicateBlocks();
|
bool tailDuplicateBlocks();
|
||||||
static bool isSimpleBB(MachineBasicBlock *TailBB);
|
static bool isSimpleBB(MachineBasicBlock *TailBB);
|
||||||
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB);
|
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB);
|
||||||
@@ -71,13 +63,9 @@ public:
|
|||||||
/// up.
|
/// up.
|
||||||
/// If \p DuplicatePreds is not null, it will be updated to contain the list
|
/// If \p DuplicatePreds is not null, it will be updated to contain the list
|
||||||
/// of predecessors that received a copy of \p MBB.
|
/// of predecessors that received a copy of \p MBB.
|
||||||
/// If \p RemovalCallback is non-null. It will be called before MBB is
|
|
||||||
/// deleted.
|
|
||||||
bool tailDuplicateAndUpdate(
|
bool tailDuplicateAndUpdate(
|
||||||
bool IsSimple, MachineBasicBlock *MBB,
|
bool IsSimple, MachineBasicBlock *MBB,
|
||||||
MachineBasicBlock *ForcedLayoutPred,
|
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr);
|
||||||
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr,
|
|
||||||
llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
|
typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
|
||||||
@@ -101,18 +89,14 @@ private:
|
|||||||
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
||||||
const DenseSet<unsigned> &RegsUsedByPhi,
|
const DenseSet<unsigned> &RegsUsedByPhi,
|
||||||
SmallVectorImpl<MachineInstr *> &Copies);
|
SmallVectorImpl<MachineInstr *> &Copies);
|
||||||
bool tailDuplicate(bool IsSimple,
|
bool tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
|
||||||
MachineBasicBlock *TailBB,
|
|
||||||
MachineBasicBlock *ForcedLayoutPred,
|
|
||||||
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
||||||
SmallVectorImpl<MachineInstr *> &Copies);
|
SmallVectorImpl<MachineInstr *> &Copies);
|
||||||
void appendCopies(MachineBasicBlock *MBB,
|
void appendCopies(MachineBasicBlock *MBB,
|
||||||
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
|
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
|
||||||
SmallVectorImpl<MachineInstr *> &Copies);
|
SmallVectorImpl<MachineInstr *> &Copies);
|
||||||
|
|
||||||
void removeDeadBlock(
|
void removeDeadBlock(MachineBasicBlock *MBB);
|
||||||
MachineBasicBlock *MBB,
|
|
||||||
llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|||||||
@@ -40,7 +40,6 @@
|
|||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||||
#include "llvm/CodeGen/TailDuplicator.h"
|
|
||||||
#include "llvm/Support/Allocator.h"
|
#include "llvm/Support/Allocator.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
@@ -122,12 +121,6 @@ static cl::opt<unsigned> MisfetchCost(
|
|||||||
static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
|
static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
|
||||||
cl::desc("Cost of jump instructions."),
|
cl::desc("Cost of jump instructions."),
|
||||||
cl::init(1), cl::Hidden);
|
cl::init(1), cl::Hidden);
|
||||||
static cl::opt<bool>
|
|
||||||
TailDupPlacement("tail-dup-placement",
|
|
||||||
cl::desc("Perform tail duplication during placement. "
|
|
||||||
"Creates more fallthrough opportunites in "
|
|
||||||
"outline branches."),
|
|
||||||
cl::init(true), cl::Hidden);
|
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
BranchFoldPlacement("branch-fold-placement",
|
BranchFoldPlacement("branch-fold-placement",
|
||||||
@@ -135,14 +128,6 @@ BranchFoldPlacement("branch-fold-placement",
|
|||||||
"Reduces code size."),
|
"Reduces code size."),
|
||||||
cl::init(true), cl::Hidden);
|
cl::init(true), cl::Hidden);
|
||||||
|
|
||||||
// Heuristic for tail duplication.
|
|
||||||
static cl::opt<unsigned> TailDuplicatePlacementThreshold(
|
|
||||||
"tail-dup-placement-threshold",
|
|
||||||
cl::desc("Instruction cutoff for tail duplication during layout. "
|
|
||||||
"Tail merging during layout is forced to have a threshold "
|
|
||||||
"that won't conflict."), cl::init(2),
|
|
||||||
cl::Hidden);
|
|
||||||
|
|
||||||
extern cl::opt<unsigned> StaticLikelyProb;
|
extern cl::opt<unsigned> StaticLikelyProb;
|
||||||
extern cl::opt<unsigned> ProfileLikelyProb;
|
extern cl::opt<unsigned> ProfileLikelyProb;
|
||||||
|
|
||||||
@@ -200,16 +185,6 @@ public:
|
|||||||
/// \brief End of blocks within the chain.
|
/// \brief End of blocks within the chain.
|
||||||
iterator end() { return Blocks.end(); }
|
iterator end() { return Blocks.end(); }
|
||||||
|
|
||||||
bool remove(MachineBasicBlock* BB) {
|
|
||||||
for(iterator i = begin(); i != end(); ++i) {
|
|
||||||
if (*i == BB) {
|
|
||||||
Blocks.erase(i);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Merge a block chain into this one.
|
/// \brief Merge a block chain into this one.
|
||||||
///
|
///
|
||||||
/// This routine merges a block chain into this one. It takes care of forming
|
/// This routine merges a block chain into this one. It takes care of forming
|
||||||
@@ -291,13 +266,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
|||||||
/// \brief A handle to the post dominator tree.
|
/// \brief A handle to the post dominator tree.
|
||||||
MachineDominatorTree *MDT;
|
MachineDominatorTree *MDT;
|
||||||
|
|
||||||
/// \brief Duplicator used to duplicate tails during placement.
|
|
||||||
///
|
|
||||||
/// Placement decisions can open up new tail duplication opportunities, but
|
|
||||||
/// since tail duplication affects placement decisions of later blocks, it
|
|
||||||
/// must be done inline.
|
|
||||||
TailDuplicator TailDup;
|
|
||||||
|
|
||||||
/// \brief A set of blocks that are unavoidably execute, i.e. they dominate
|
/// \brief A set of blocks that are unavoidably execute, i.e. they dominate
|
||||||
/// all terminators of the MachineFunction.
|
/// all terminators of the MachineFunction.
|
||||||
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
|
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
|
||||||
@@ -319,18 +287,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
|||||||
/// between basic blocks.
|
/// between basic blocks.
|
||||||
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
|
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
|
||||||
|
|
||||||
/// Decrease the UnscheduledPredecessors count for all blocks in chain, and
|
|
||||||
/// if the count goes to 0, add them to the appropriate work list.
|
|
||||||
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
|
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
|
||||||
const BlockFilterSet *BlockFilter = nullptr);
|
const BlockFilterSet *BlockFilter = nullptr);
|
||||||
|
|
||||||
/// Decrease the UnscheduledPredecessors count for a single block, and
|
|
||||||
/// if the count goes to 0, add them to the appropriate work list.
|
|
||||||
void markBlockSuccessors(
|
|
||||||
BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
|
|
||||||
const BlockFilterSet *BlockFilter = nullptr);
|
|
||||||
|
|
||||||
|
|
||||||
BranchProbability
|
BranchProbability
|
||||||
collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
|
collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
|
||||||
const BlockFilterSet *BlockFilter,
|
const BlockFilterSet *BlockFilter,
|
||||||
@@ -340,16 +298,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
|||||||
const BlockFilterSet *BlockFilter,
|
const BlockFilterSet *BlockFilter,
|
||||||
BranchProbability SuccProb,
|
BranchProbability SuccProb,
|
||||||
BranchProbability HotProb);
|
BranchProbability HotProb);
|
||||||
bool repeatedlyTailDuplicateBlock(
|
|
||||||
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
|
|
||||||
MachineBasicBlock *LoopHeaderBB,
|
|
||||||
BlockChain &Chain, BlockFilterSet *BlockFilter,
|
|
||||||
MachineFunction::iterator &PrevUnplacedBlockIt);
|
|
||||||
bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
|
|
||||||
const BlockChain &Chain,
|
|
||||||
BlockFilterSet *BlockFilter,
|
|
||||||
MachineFunction::iterator &PrevUnplacedBlockIt,
|
|
||||||
bool &DuplicatedToPred);
|
|
||||||
bool
|
bool
|
||||||
hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
|
hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
|
||||||
BlockChain &SuccChain, BranchProbability SuccProb,
|
BlockChain &SuccChain, BranchProbability SuccProb,
|
||||||
@@ -375,7 +323,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
|
|||||||
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
|
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
|
||||||
const BlockFilterSet *BlockFilter);
|
const BlockFilterSet *BlockFilter);
|
||||||
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
|
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
|
||||||
BlockFilterSet *BlockFilter = nullptr);
|
const BlockFilterSet *BlockFilter = nullptr);
|
||||||
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
|
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
|
||||||
const BlockFilterSet &LoopBlockSet);
|
const BlockFilterSet &LoopBlockSet);
|
||||||
MachineBasicBlock *findBestLoopExit(MachineLoop &L,
|
MachineBasicBlock *findBestLoopExit(MachineLoop &L,
|
||||||
@@ -440,49 +388,37 @@ static std::string getBlockName(MachineBasicBlock *BB) {
|
|||||||
/// When a chain is being merged into the "placed" chain, this routine will
|
/// When a chain is being merged into the "placed" chain, this routine will
|
||||||
/// quickly walk the successors of each block in the chain and mark them as
|
/// quickly walk the successors of each block in the chain and mark them as
|
||||||
/// having one fewer active predecessor. It also adds any successors of this
|
/// having one fewer active predecessor. It also adds any successors of this
|
||||||
/// chain which reach the zero-predecessor state to the appropriate worklist.
|
/// chain which reach the zero-predecessor state to the worklist passed in.
|
||||||
void MachineBlockPlacement::markChainSuccessors(
|
void MachineBlockPlacement::markChainSuccessors(
|
||||||
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
|
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
|
||||||
const BlockFilterSet *BlockFilter) {
|
const BlockFilterSet *BlockFilter) {
|
||||||
// Walk all the blocks in this chain, marking their successors as having
|
// Walk all the blocks in this chain, marking their successors as having
|
||||||
// a predecessor placed.
|
// a predecessor placed.
|
||||||
for (MachineBasicBlock *MBB : Chain) {
|
for (MachineBasicBlock *MBB : Chain) {
|
||||||
markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
|
// Add any successors for which this is the only un-placed in-loop
|
||||||
}
|
// predecessor to the worklist as a viable candidate for CFG-neutral
|
||||||
}
|
// placement. No subsequent placement of this block will violate the CFG
|
||||||
|
// shape, so we get to use heuristics to choose a favorable placement.
|
||||||
|
for (MachineBasicBlock *Succ : MBB->successors()) {
|
||||||
|
if (BlockFilter && !BlockFilter->count(Succ))
|
||||||
|
continue;
|
||||||
|
BlockChain &SuccChain = *BlockToChain[Succ];
|
||||||
|
// Disregard edges within a fixed chain, or edges to the loop header.
|
||||||
|
if (&Chain == &SuccChain || Succ == LoopHeaderBB)
|
||||||
|
continue;
|
||||||
|
|
||||||
/// \brief Mark a single block's successors as having one fewer preds.
|
// This is a cross-chain edge that is within the loop, so decrement the
|
||||||
///
|
// loop predecessor count of the destination chain.
|
||||||
/// Under normal circumstances, this is only called by markChainSuccessors,
|
if (SuccChain.UnscheduledPredecessors == 0 ||
|
||||||
/// but if a block that was to be placed is completely tail-duplicated away,
|
--SuccChain.UnscheduledPredecessors > 0)
|
||||||
/// and was duplicated into the chain end, we need to redo markBlockSuccessors
|
continue;
|
||||||
/// for just that block.
|
|
||||||
void MachineBlockPlacement::markBlockSuccessors(
|
|
||||||
BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
|
|
||||||
const BlockFilterSet *BlockFilter) {
|
|
||||||
// Add any successors for which this is the only un-placed in-loop
|
|
||||||
// predecessor to the worklist as a viable candidate for CFG-neutral
|
|
||||||
// placement. No subsequent placement of this block will violate the CFG
|
|
||||||
// shape, so we get to use heuristics to choose a favorable placement.
|
|
||||||
for (MachineBasicBlock *Succ : MBB->successors()) {
|
|
||||||
if (BlockFilter && !BlockFilter->count(Succ))
|
|
||||||
continue;
|
|
||||||
BlockChain &SuccChain = *BlockToChain[Succ];
|
|
||||||
// Disregard edges within a fixed chain, or edges to the loop header.
|
|
||||||
if (&Chain == &SuccChain || Succ == LoopHeaderBB)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// This is a cross-chain edge that is within the loop, so decrement the
|
auto *MBB = *SuccChain.begin();
|
||||||
// loop predecessor count of the destination chain.
|
if (MBB->isEHPad())
|
||||||
if (SuccChain.UnscheduledPredecessors == 0 ||
|
EHPadWorkList.push_back(MBB);
|
||||||
--SuccChain.UnscheduledPredecessors > 0)
|
else
|
||||||
continue;
|
BlockWorkList.push_back(MBB);
|
||||||
|
}
|
||||||
auto *NewBB = *SuccChain.begin();
|
|
||||||
if (NewBB->isEHPad())
|
|
||||||
EHPadWorkList.push_back(NewBB);
|
|
||||||
else
|
|
||||||
BlockWorkList.push_back(NewBB);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -966,7 +902,7 @@ void MachineBlockPlacement::fillWorkLists(
|
|||||||
|
|
||||||
void MachineBlockPlacement::buildChain(
|
void MachineBlockPlacement::buildChain(
|
||||||
MachineBasicBlock *BB, BlockChain &Chain,
|
MachineBasicBlock *BB, BlockChain &Chain,
|
||||||
BlockFilterSet *BlockFilter) {
|
const BlockFilterSet *BlockFilter) {
|
||||||
assert(BB && "BB must not be null.\n");
|
assert(BB && "BB must not be null.\n");
|
||||||
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
|
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
|
||||||
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
|
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
|
||||||
@@ -1001,17 +937,6 @@ void MachineBlockPlacement::buildChain(
|
|||||||
"layout successor until the CFG reduces\n");
|
"layout successor until the CFG reduces\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Placement may have changed tail duplication opportunities.
|
|
||||||
// Check for that now.
|
|
||||||
if (TailDupPlacement && BestSucc) {
|
|
||||||
// If the chosen successor was duplicated into all its predecessors,
|
|
||||||
// don't bother laying it out, just go round the loop again with BB as
|
|
||||||
// the chain end.
|
|
||||||
if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
|
|
||||||
BlockFilter, PrevUnplacedBlockIt))
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Place this block, updating the datastructures to reflect its placement.
|
// Place this block, updating the datastructures to reflect its placement.
|
||||||
BlockChain &SuccChain = *BlockToChain[BestSucc];
|
BlockChain &SuccChain = *BlockToChain[BestSucc];
|
||||||
// Zero out UnscheduledPredecessors for the successor we're about to merge in case
|
// Zero out UnscheduledPredecessors for the successor we're about to merge in case
|
||||||
@@ -1793,175 +1718,6 @@ void MachineBlockPlacement::alignBlocks() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
|
|
||||||
/// it was duplicated into its chain predecessor and removed.
|
|
||||||
/// \p BB - Basic block that may be duplicated.
|
|
||||||
///
|
|
||||||
/// \p LPred - Chosen layout predecessor of \p BB.
|
|
||||||
/// Updated to be the chain end if LPred is removed.
|
|
||||||
/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
|
|
||||||
/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
|
|
||||||
/// Used to identify which blocks to update predecessor
|
|
||||||
/// counts.
|
|
||||||
/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
|
|
||||||
/// chosen in the given order due to unnatural CFG
|
|
||||||
/// only needed if \p BB is removed and
|
|
||||||
/// \p PrevUnplacedBlockIt pointed to \p BB.
|
|
||||||
/// @return true if \p BB was removed.
|
|
||||||
bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
|
|
||||||
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
|
|
||||||
MachineBasicBlock *LoopHeaderBB,
|
|
||||||
BlockChain &Chain, BlockFilterSet *BlockFilter,
|
|
||||||
MachineFunction::iterator &PrevUnplacedBlockIt) {
|
|
||||||
bool Removed, DuplicatedToLPred;
|
|
||||||
bool DuplicatedToOriginalLPred;
|
|
||||||
Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
|
|
||||||
PrevUnplacedBlockIt,
|
|
||||||
DuplicatedToLPred);
|
|
||||||
if (!Removed)
|
|
||||||
return false;
|
|
||||||
DuplicatedToOriginalLPred = DuplicatedToLPred;
|
|
||||||
// Iteratively try to duplicate again. It can happen that a block that is
|
|
||||||
// duplicated into is still small enough to be duplicated again.
|
|
||||||
// No need to call markBlockSuccessors in this case, as the blocks being
|
|
||||||
// duplicated from here on are already scheduled.
|
|
||||||
// Note that DuplicatedToLPred always implies Removed.
|
|
||||||
while (DuplicatedToLPred) {
|
|
||||||
assert (Removed && "Block must have been removed to be duplicated into its "
|
|
||||||
"layout predecessor.");
|
|
||||||
MachineBasicBlock *DupBB, *DupPred;
|
|
||||||
// The removal callback causes Chain.end() to be updated when a block is
|
|
||||||
// removed. On the first pass through the loop, the chain end should be the
|
|
||||||
// same as it was on function entry. On subsequent passes, because we are
|
|
||||||
// duplicating the block at the end of the chain, if it is removed the
|
|
||||||
// chain will have shrunk by one block.
|
|
||||||
BlockChain::iterator ChainEnd = Chain.end();
|
|
||||||
DupBB = *(--ChainEnd);
|
|
||||||
// Now try to duplicate again.
|
|
||||||
if (ChainEnd == Chain.begin())
|
|
||||||
break;
|
|
||||||
DupPred = *std::prev(ChainEnd);
|
|
||||||
Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
|
|
||||||
PrevUnplacedBlockIt,
|
|
||||||
DuplicatedToLPred);
|
|
||||||
}
|
|
||||||
// If BB was duplicated into LPred, it is now scheduled. But because it was
|
|
||||||
// removed, markChainSuccessors won't be called for its chain. Instead we
|
|
||||||
// call markBlockSuccessors for LPred to achieve the same effect. This must go
|
|
||||||
// at the end because repeating the tail duplication can increase the number
|
|
||||||
// of unscheduled predecessors.
|
|
||||||
if (DuplicatedToOriginalLPred)
|
|
||||||
markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
|
|
||||||
|
|
||||||
LPred = *std::prev(Chain.end());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tail duplicate \p BB into (some) predecessors if profitable.
|
|
||||||
/// \p BB - Basic block that may be duplicated
|
|
||||||
/// \p LPred - Chosen layout predecessor of \p BB
|
|
||||||
/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
|
|
||||||
/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
|
|
||||||
/// Used to identify which blocks to update predecessor
|
|
||||||
/// counts.
|
|
||||||
/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
|
|
||||||
/// chosen in the given order due to unnatural CFG
|
|
||||||
/// only needed if \p BB is removed and
|
|
||||||
/// \p PrevUnplacedBlockIt pointed to \p BB.
|
|
||||||
/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
|
|
||||||
/// only be true if the block was removed.
|
|
||||||
/// \return - True if the block was duplicated into all preds and removed.
|
|
||||||
bool MachineBlockPlacement::maybeTailDuplicateBlock(
|
|
||||||
MachineBasicBlock *BB, MachineBasicBlock *LPred,
|
|
||||||
const BlockChain &Chain, BlockFilterSet *BlockFilter,
|
|
||||||
MachineFunction::iterator &PrevUnplacedBlockIt,
|
|
||||||
bool &DuplicatedToLPred) {
|
|
||||||
|
|
||||||
DuplicatedToLPred = false;
|
|
||||||
DEBUG(dbgs() << "Redoing tail duplication for Succ#"
|
|
||||||
<< BB->getNumber() << "\n");
|
|
||||||
bool IsSimple = TailDup.isSimpleBB(BB);
|
|
||||||
// Blocks with single successors don't create additional fallthrough
|
|
||||||
// opportunities. Don't duplicate them. TODO: When conditional exits are
|
|
||||||
// analyzable, allow them to be duplicated.
|
|
||||||
if (!IsSimple && BB->succ_size() == 1)
|
|
||||||
return false;
|
|
||||||
if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
|
|
||||||
return false;
|
|
||||||
// This has to be a callback because none of it can be done after
|
|
||||||
// BB is deleted.
|
|
||||||
bool Removed = false;
|
|
||||||
auto RemovalCallback =
|
|
||||||
[&](MachineBasicBlock *RemBB) {
|
|
||||||
// Signal to outer function
|
|
||||||
Removed = true;
|
|
||||||
|
|
||||||
// Conservative default.
|
|
||||||
bool InWorkList = true;
|
|
||||||
// Remove from the Chain and Chain Map
|
|
||||||
if (BlockToChain.count(RemBB)) {
|
|
||||||
BlockChain *Chain = BlockToChain[RemBB];
|
|
||||||
InWorkList = Chain->UnscheduledPredecessors == 0;
|
|
||||||
Chain->remove(RemBB);
|
|
||||||
BlockToChain.erase(RemBB);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle the unplaced block iterator
|
|
||||||
if (&(*PrevUnplacedBlockIt) == RemBB) {
|
|
||||||
PrevUnplacedBlockIt++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle the Work Lists
|
|
||||||
if (InWorkList) {
|
|
||||||
SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
|
|
||||||
if (RemBB->isEHPad())
|
|
||||||
RemoveList = EHPadWorkList;
|
|
||||||
RemoveList.erase(
|
|
||||||
remove_if(RemoveList,
|
|
||||||
[RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
|
|
||||||
RemoveList.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle the filter set
|
|
||||||
if (BlockFilter) {
|
|
||||||
BlockFilter->erase(RemBB);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove the block from loop info.
|
|
||||||
MLI->removeBlock(RemBB);
|
|
||||||
|
|
||||||
// TailDuplicator handles removing it from loops.
|
|
||||||
DEBUG(dbgs() << "TailDuplicator deleted block: "
|
|
||||||
<< getBlockName(RemBB) << "\n");
|
|
||||||
};
|
|
||||||
auto RemovalCallbackRef =
|
|
||||||
llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
|
|
||||||
|
|
||||||
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
|
|
||||||
TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
|
|
||||||
&DuplicatedPreds, &RemovalCallbackRef);
|
|
||||||
|
|
||||||
// Update UnscheduledPredecessors to reflect tail-duplication.
|
|
||||||
DuplicatedToLPred = false;
|
|
||||||
for (MachineBasicBlock *Pred : DuplicatedPreds) {
|
|
||||||
// We're only looking for unscheduled predecessors that match the filter.
|
|
||||||
BlockChain* PredChain = BlockToChain[Pred];
|
|
||||||
if (Pred == LPred)
|
|
||||||
DuplicatedToLPred = true;
|
|
||||||
if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
|
|
||||||
|| PredChain == &Chain)
|
|
||||||
continue;
|
|
||||||
for (MachineBasicBlock *NewSucc : Pred->successors()) {
|
|
||||||
if (BlockFilter && !BlockFilter->count(NewSucc))
|
|
||||||
continue;
|
|
||||||
BlockChain *NewChain = BlockToChain[NewSucc];
|
|
||||||
if (NewChain != &Chain && NewChain != PredChain)
|
|
||||||
NewChain->UnscheduledPredecessors++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Removed;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (skipFunction(*MF.getFunction()))
|
if (skipFunction(*MF.getFunction()))
|
||||||
return false;
|
return false;
|
||||||
@@ -1978,13 +1734,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
TII = MF.getSubtarget().getInstrInfo();
|
TII = MF.getSubtarget().getInstrInfo();
|
||||||
TLI = MF.getSubtarget().getTargetLowering();
|
TLI = MF.getSubtarget().getTargetLowering();
|
||||||
MDT = &getAnalysis<MachineDominatorTree>();
|
MDT = &getAnalysis<MachineDominatorTree>();
|
||||||
if (TailDupPlacement) {
|
|
||||||
unsigned TailDupSize = TailDuplicatePlacementThreshold;
|
|
||||||
if (MF.getFunction()->optForSize())
|
|
||||||
TailDupSize = 1;
|
|
||||||
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(BlockToChain.empty());
|
assert(BlockToChain.empty());
|
||||||
|
|
||||||
buildCFGChains();
|
buildCFGChains();
|
||||||
@@ -1998,7 +1747,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
BranchFoldPlacement;
|
BranchFoldPlacement;
|
||||||
// No tail merging opportunities if the block number is less than four.
|
// No tail merging opportunities if the block number is less than four.
|
||||||
if (MF.size() > 3 && EnableTailMerge) {
|
if (MF.size() > 3 && EnableTailMerge) {
|
||||||
unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
|
// Default to the standard tail-merge-size option.
|
||||||
|
unsigned TailMergeSize = 0;
|
||||||
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
|
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
|
||||||
*MBPI, TailMergeSize);
|
*MBPI, TailMergeSize);
|
||||||
|
|
||||||
@@ -2007,8 +1757,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
/*AfterBlockPlacement=*/true)) {
|
/*AfterBlockPlacement=*/true)) {
|
||||||
// Redo the layout if tail merging creates/removes/moves blocks.
|
// Redo the layout if tail merging creates/removes/moves blocks.
|
||||||
BlockToChain.clear();
|
BlockToChain.clear();
|
||||||
// Must redo the dominator tree if blocks were changed.
|
|
||||||
MDT->runOnMachineFunction(MF);
|
|
||||||
ChainAllocator.DestroyAll();
|
ChainAllocator.DestroyAll();
|
||||||
buildCFGChains();
|
buildCFGChains();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
|
|||||||
|
|
||||||
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
|
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
|
||||||
|
|
||||||
Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
|
Duplicator.initMF(MF, MBPI);
|
||||||
|
|
||||||
bool MadeChange = false;
|
bool MadeChange = false;
|
||||||
while (Duplicator.tailDuplicateBlocks())
|
while (Duplicator.tailDuplicateBlocks())
|
||||||
|
|||||||
@@ -20,7 +20,6 @@
|
|||||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||||
#include "llvm/CodeGen/Passes.h"
|
#include "llvm/CodeGen/Passes.h"
|
||||||
#include "llvm/IR/Function.h"
|
#include "llvm/IR/Function.h"
|
||||||
@@ -65,7 +64,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
|
|||||||
|
|
||||||
void TailDuplicator::initMF(MachineFunction &MFin,
|
void TailDuplicator::initMF(MachineFunction &MFin,
|
||||||
const MachineBranchProbabilityInfo *MBPIin,
|
const MachineBranchProbabilityInfo *MBPIin,
|
||||||
bool LayoutModeIn, unsigned TailDupSizeIn) {
|
unsigned TailDupSizeIn) {
|
||||||
MF = &MFin;
|
MF = &MFin;
|
||||||
TII = MF->getSubtarget().getInstrInfo();
|
TII = MF->getSubtarget().getInstrInfo();
|
||||||
TRI = MF->getSubtarget().getRegisterInfo();
|
TRI = MF->getSubtarget().getRegisterInfo();
|
||||||
@@ -76,7 +75,6 @@ void TailDuplicator::initMF(MachineFunction &MFin,
|
|||||||
|
|
||||||
assert(MBPI != nullptr && "Machine Branch Probability Info required");
|
assert(MBPI != nullptr && "Machine Branch Probability Info required");
|
||||||
|
|
||||||
LayoutMode = LayoutModeIn;
|
|
||||||
PreRegAlloc = MRI->isSSA();
|
PreRegAlloc = MRI->isSSA();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -129,23 +127,18 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
|
|||||||
/// Tail duplicate the block and cleanup.
|
/// Tail duplicate the block and cleanup.
|
||||||
/// \p IsSimple - return value of isSimpleBB
|
/// \p IsSimple - return value of isSimpleBB
|
||||||
/// \p MBB - block to be duplicated
|
/// \p MBB - block to be duplicated
|
||||||
/// \p ForcedLayoutPred - If non-null, treat this block as the layout
|
|
||||||
/// predecessor, instead of using the ordering in MF
|
|
||||||
/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
|
/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
|
||||||
/// all Preds that received a copy of \p MBB.
|
/// all Preds that received a copy of \p MBB.
|
||||||
/// \p RemovalCallback - if non-null, called just before MBB is deleted.
|
|
||||||
bool TailDuplicator::tailDuplicateAndUpdate(
|
bool TailDuplicator::tailDuplicateAndUpdate(
|
||||||
bool IsSimple, MachineBasicBlock *MBB,
|
bool IsSimple, MachineBasicBlock *MBB,
|
||||||
MachineBasicBlock *ForcedLayoutPred,
|
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds) {
|
||||||
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
|
|
||||||
llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
|
|
||||||
// Save the successors list.
|
// Save the successors list.
|
||||||
SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
|
SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
|
||||||
MBB->succ_end());
|
MBB->succ_end());
|
||||||
|
|
||||||
SmallVector<MachineBasicBlock *, 8> TDBBs;
|
SmallVector<MachineBasicBlock *, 8> TDBBs;
|
||||||
SmallVector<MachineInstr *, 16> Copies;
|
SmallVector<MachineInstr *, 16> Copies;
|
||||||
if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies))
|
if (!tailDuplicate(IsSimple, MBB, TDBBs, Copies))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
++NumTails;
|
++NumTails;
|
||||||
@@ -163,7 +156,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(
|
|||||||
// If it is dead, remove it.
|
// If it is dead, remove it.
|
||||||
if (isDead) {
|
if (isDead) {
|
||||||
NumTailDupRemoved += MBB->size();
|
NumTailDupRemoved += MBB->size();
|
||||||
removeDeadBlock(MBB, RemovalCallback);
|
removeDeadBlock(MBB);
|
||||||
++NumDeadBlocks;
|
++NumDeadBlocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -262,7 +255,7 @@ bool TailDuplicator::tailDuplicateBlocks() {
|
|||||||
if (!shouldTailDuplicate(IsSimple, *MBB))
|
if (!shouldTailDuplicate(IsSimple, *MBB))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
|
MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PreRegAlloc && TailDupVerify)
|
if (PreRegAlloc && TailDupVerify)
|
||||||
@@ -521,10 +514,8 @@ void TailDuplicator::updateSuccessorsPHIs(
|
|||||||
/// Determine if it is profitable to duplicate this block.
|
/// Determine if it is profitable to duplicate this block.
|
||||||
bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
|
bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
|
||||||
MachineBasicBlock &TailBB) {
|
MachineBasicBlock &TailBB) {
|
||||||
// When doing tail-duplication during layout, the block ordering is in flux,
|
// Only duplicate blocks that end with unconditional branches.
|
||||||
// so canFallThrough returns a result based on incorrect information and
|
if (TailBB.canFallThrough())
|
||||||
// should just be ignored.
|
|
||||||
if (!LayoutMode && TailBB.canFallThrough())
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Don't try to tail-duplicate single-block loops.
|
// Don't try to tail-duplicate single-block loops.
|
||||||
@@ -744,7 +735,7 @@ bool TailDuplicator::duplicateSimpleBB(
|
|||||||
|
|
||||||
bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
|
bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
|
||||||
MachineBasicBlock *PredBB) {
|
MachineBasicBlock *PredBB) {
|
||||||
// EH edges are ignored by analyzeBranch.
|
// EH edges are ignored by AnalyzeBranch.
|
||||||
if (PredBB->succ_size() > 1)
|
if (PredBB->succ_size() > 1)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@@ -759,16 +750,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
|
|||||||
|
|
||||||
/// If it is profitable, duplicate TailBB's contents in each
|
/// If it is profitable, duplicate TailBB's contents in each
|
||||||
/// of its predecessors.
|
/// of its predecessors.
|
||||||
/// \p IsSimple result of isSimpleBB
|
|
||||||
/// \p TailBB Block to be duplicated.
|
|
||||||
/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
|
|
||||||
/// instead of the previous block in MF's order.
|
|
||||||
/// \p TDBBs A vector to keep track of all blocks tail-duplicated
|
|
||||||
/// into.
|
|
||||||
/// \p Copies A vector of copy instructions inserted. Used later to
|
|
||||||
/// walk all the inserted copies and remove redundant ones.
|
|
||||||
bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
|
bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
|
||||||
MachineBasicBlock *ForcedLayoutPred,
|
|
||||||
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
||||||
SmallVectorImpl<MachineInstr *> &Copies) {
|
SmallVectorImpl<MachineInstr *> &Copies) {
|
||||||
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
|
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
|
||||||
@@ -793,12 +775,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Don't duplicate into a fall-through predecessor (at least for now).
|
// Don't duplicate into a fall-through predecessor (at least for now).
|
||||||
bool IsLayoutSuccessor = false;
|
if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
|
||||||
if (ForcedLayoutPred)
|
|
||||||
IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
|
|
||||||
else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
|
|
||||||
IsLayoutSuccessor = true;
|
|
||||||
if (IsLayoutSuccessor)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
|
DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
|
||||||
@@ -851,20 +828,16 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
|
|||||||
// If TailBB was duplicated into all its predecessors except for the prior
|
// If TailBB was duplicated into all its predecessors except for the prior
|
||||||
// block, which falls through unconditionally, move the contents of this
|
// block, which falls through unconditionally, move the contents of this
|
||||||
// block into the prior block.
|
// block into the prior block.
|
||||||
MachineBasicBlock *PrevBB = ForcedLayoutPred;
|
MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
|
||||||
if (!PrevBB)
|
|
||||||
PrevBB = &*std::prev(TailBB->getIterator());
|
|
||||||
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
|
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
|
||||||
SmallVector<MachineOperand, 4> PriorCond;
|
SmallVector<MachineOperand, 4> PriorCond;
|
||||||
// This has to check PrevBB->succ_size() because EH edges are ignored by
|
// This has to check PrevBB->succ_size() because EH edges are ignored by
|
||||||
// analyzeBranch.
|
// AnalyzeBranch.
|
||||||
if (PrevBB->succ_size() == 1 &&
|
if (PrevBB->succ_size() == 1 &&
|
||||||
// Layout preds are not always CFG preds. Check.
|
// Layout preds are not always CFG preds. Check.
|
||||||
*PrevBB->succ_begin() == TailBB &&
|
*PrevBB->succ_begin() == TailBB &&
|
||||||
!TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
|
!TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
|
||||||
PriorCond.empty() &&
|
PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
|
||||||
(!PriorTBB || PriorTBB == TailBB) &&
|
|
||||||
TailBB->pred_size() == 1 &&
|
|
||||||
!TailBB->hasAddressTaken()) {
|
!TailBB->hasAddressTaken()) {
|
||||||
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
|
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
|
||||||
<< "From MBB: " << *TailBB);
|
<< "From MBB: " << *TailBB);
|
||||||
@@ -891,7 +864,6 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
|
|||||||
}
|
}
|
||||||
appendCopies(PrevBB, CopyInfos, Copies);
|
appendCopies(PrevBB, CopyInfos, Copies);
|
||||||
} else {
|
} else {
|
||||||
TII->removeBranch(*PrevBB);
|
|
||||||
// No PHIs to worry about, just splice the instructions over.
|
// No PHIs to worry about, just splice the instructions over.
|
||||||
PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
|
PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
|
||||||
}
|
}
|
||||||
@@ -964,15 +936,10 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
|
|||||||
|
|
||||||
/// Remove the specified dead machine basic block from the function, updating
|
/// Remove the specified dead machine basic block from the function, updating
|
||||||
/// the CFG.
|
/// the CFG.
|
||||||
void TailDuplicator::removeDeadBlock(
|
void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
|
||||||
MachineBasicBlock *MBB,
|
|
||||||
llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
|
|
||||||
assert(MBB->pred_empty() && "MBB must be dead!");
|
assert(MBB->pred_empty() && "MBB must be dead!");
|
||||||
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
|
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
|
||||||
|
|
||||||
if (RemovalCallback)
|
|
||||||
(*RemovalCallback)(MBB);
|
|
||||||
|
|
||||||
// Remove all successors.
|
// Remove all successors.
|
||||||
while (!MBB->succ_empty())
|
while (!MBB->succ_empty())
|
||||||
MBB->removeSuccessor(MBB->succ_end() - 1);
|
MBB->removeSuccessor(MBB->succ_end() - 1);
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ bb1:
|
|||||||
%tmp2 = load i16, i16* %ptr, align 2
|
%tmp2 = load i16, i16* %ptr, align 2
|
||||||
br label %bb2
|
br label %bb2
|
||||||
bb2:
|
bb2:
|
||||||
|
; CHECK: %bb2
|
||||||
; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
|
; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
|
||||||
; CHECK: cmp [[REG]], #23
|
; CHECK: cmp [[REG]], #23
|
||||||
%tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
|
%tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
|
||||||
|
|||||||
@@ -1,8 +1,4 @@
|
|||||||
; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s
|
; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
|
||||||
; -tail-dup-placement causes tail duplication during layout. This breaks the
|
|
||||||
; assumptions of the test case as written (specifically, it creates an
|
|
||||||
; additional cmp instruction, creating a false positive), so we pass
|
|
||||||
; -tail-dup-placement=0 to restore the original behavior
|
|
||||||
|
|
||||||
; marked as external to prevent possible optimizations
|
; marked as external to prevent possible optimizations
|
||||||
@a = external global i32
|
@a = external global i32
|
||||||
|
|||||||
@@ -1,69 +0,0 @@
|
|||||||
; RUN: llc -O3 -o - -verify-machineinstrs %s | FileCheck %s
|
|
||||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
|
||||||
|
|
||||||
%struct.s1 = type { %struct.s3*, %struct.s1* }
|
|
||||||
%struct.s2 = type opaque
|
|
||||||
%struct.s3 = type { i32 }
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
define internal fastcc i32 @repeated_dup_worklist(%struct.s1** %pp1, %struct.s2* %p2, i32 %state, i1 %i1_1, i32 %i32_1) unnamed_addr #0 {
|
|
||||||
entry:
|
|
||||||
br label %while.cond.outer
|
|
||||||
|
|
||||||
; The loop gets laid out:
|
|
||||||
; %while.cond.outer
|
|
||||||
; %(null)
|
|
||||||
; %(null)
|
|
||||||
; %dup2
|
|
||||||
; and then %dup1 gets chosen as the next block.
|
|
||||||
; when dup2 is duplicated into dup1, %worklist could erroneously be placed on
|
|
||||||
; the worklist, because all of its current predecessors are now scheduled.
|
|
||||||
; However, after dup2 is tail-duplicated, %worklist can't be on the worklist
|
|
||||||
; because it now has unscheduled predecessors.q
|
|
||||||
; CHECK-LABEL: repeated_dup_worklist
|
|
||||||
; CHECK: // %entry
|
|
||||||
; CHECK: // %while.cond.outer
|
|
||||||
; first %(null) block
|
|
||||||
; CHECK: // in Loop:
|
|
||||||
; CHECK: ldr
|
|
||||||
; CHECK-NEXT: tbnz
|
|
||||||
; second %(null) block
|
|
||||||
; CHECK: // in Loop:
|
|
||||||
; CHECK: // %dup2
|
|
||||||
; CHECK: // %worklist
|
|
||||||
; CHECK: // %if.then96.i
|
|
||||||
while.cond.outer: ; preds = %dup1, %entry
|
|
||||||
%progress.0.ph = phi i32 [ 0, %entry ], [ %progress.1, %dup1 ]
|
|
||||||
%inc77 = add nsw i32 %progress.0.ph, 1
|
|
||||||
%cmp = icmp slt i32 %progress.0.ph, %i32_1
|
|
||||||
br i1 %cmp, label %dup2, label %dup1
|
|
||||||
|
|
||||||
dup2: ; preds = %if.then96.i, %worklist, %while.cond.outer
|
|
||||||
%progress.1.ph = phi i32 [ 0, %while.cond.outer ], [ %progress.1, %if.then96.i ], [ %progress.1, %worklist ]
|
|
||||||
%.pr = load %struct.s1*, %struct.s1** %pp1, align 8
|
|
||||||
br label %dup1
|
|
||||||
|
|
||||||
dup1: ; preds = %dup2, %while.cond.outer
|
|
||||||
%0 = phi %struct.s1* [ %.pr, %dup2 ], [ undef, %while.cond.outer ]
|
|
||||||
%progress.1 = phi i32 [ %progress.1.ph, %dup2 ], [ %inc77, %while.cond.outer ]
|
|
||||||
br i1 %i1_1, label %while.cond.outer, label %worklist
|
|
||||||
|
|
||||||
worklist: ; preds = %dup1
|
|
||||||
%snode94 = getelementptr inbounds %struct.s1, %struct.s1* %0, i64 0, i32 0
|
|
||||||
%1 = load %struct.s3*, %struct.s3** %snode94, align 8
|
|
||||||
%2 = getelementptr inbounds %struct.s3, %struct.s3* %1, i32 0, i32 0
|
|
||||||
%3 = load i32, i32* %2, align 4
|
|
||||||
%tobool95.i = icmp eq i32 %3, 0
|
|
||||||
br i1 %tobool95.i, label %if.then96.i, label %dup2
|
|
||||||
|
|
||||||
if.then96.i: ; preds = %worklist
|
|
||||||
call fastcc void @free_s3(%struct.s2* %p2, %struct.s3* %1) #1
|
|
||||||
br label %dup2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
declare fastcc void @free_s3(%struct.s2*, %struct.s3*) unnamed_addr #0
|
|
||||||
|
|
||||||
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
||||||
attributes #1 = { nounwind }
|
|
||||||
@@ -25,6 +25,7 @@ bb1: ; preds = %bb
|
|||||||
br label %bb2
|
br label %bb2
|
||||||
|
|
||||||
bb2: ; preds = %bb1, %entry
|
bb2: ; preds = %bb1, %entry
|
||||||
|
; CHECK: bb2
|
||||||
; CHECK: cmp [[REG]], #0
|
; CHECK: cmp [[REG]], #0
|
||||||
; CHECK: ble
|
; CHECK: ble
|
||||||
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
|
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
|
||||||
|
|||||||
@@ -1,21 +1,9 @@
|
|||||||
; RUN: llc -verify-machineinstrs < %s -march=ppc32 | FileCheck %s
|
; RUN: llc -verify-machineinstrs < %s -march=ppc32 | \
|
||||||
|
; RUN: grep "b LBB.*" | count 4
|
||||||
|
|
||||||
target datalayout = "E-p:32:32"
|
target datalayout = "E-p:32:32"
|
||||||
target triple = "powerpc-apple-darwin8.7.0"
|
target triple = "powerpc-apple-darwin8.7.0"
|
||||||
|
|
||||||
;CHECK-LABEL: foo:
|
|
||||||
; There are 4 inner loops (%bb, %bb12, %bb25, %bb38) that all exit to %cond_next48
|
|
||||||
; The last (whichever it is) should have a fallthrough exit, and the other three
|
|
||||||
; need an unconditional branch. No other block should have an unconditional
|
|
||||||
; branch to cond_next48
|
|
||||||
; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
|
|
||||||
; of %cond_next48, so there should only be two unconditional branches.
|
|
||||||
|
|
||||||
;CHECK: b LBB0_13
|
|
||||||
;CHECK: b LBB0_13
|
|
||||||
;CHECK-NOT: b LBB0_13
|
|
||||||
;CHECK: LBB0_13: ; %cond_next48
|
|
||||||
|
|
||||||
define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
|
define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
|
||||||
entry:
|
entry:
|
||||||
%tmp1 = and i32 %W, 1 ; <i32> [#uses=1]
|
%tmp1 = and i32 %W, 1 ; <i32> [#uses=1]
|
||||||
|
|||||||
@@ -74,24 +74,24 @@ return: ; preds = %if.end, %if.then
|
|||||||
; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill
|
; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill
|
||||||
; CHECK-DAG: std 1, 16([[REGA]])
|
; CHECK-DAG: std 1, 16([[REGA]])
|
||||||
; CHECK-DAG: std 2, 24([[REGA]])
|
; CHECK-DAG: std 2, 24([[REGA]])
|
||||||
; CHECK: bcl 20, 31, .LBB1_3
|
; CHECK: bcl 20, 31, .LBB1_5
|
||||||
; CHECK: li 3, 1
|
; CHECK: li 3, 1
|
||||||
; CHECK: #EH_SjLj_Setup .LBB1_3
|
; CHECK: #EH_SjLj_Setup .LBB1_5
|
||||||
; CHECK: b .LBB1_1
|
; CHECK: b .LBB1_1
|
||||||
|
|
||||||
; CHECK: .LBB1_3:
|
; CHECK: .LBB1_4:
|
||||||
; CHECK: mflr [[REGL:[0-9]+]]
|
|
||||||
; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
|
|
||||||
; CHECK: std [[REGL]], 8([[REG2]])
|
|
||||||
; CHECK: li 3, 0
|
|
||||||
|
|
||||||
; CHECK: .LBB1_5:
|
|
||||||
|
|
||||||
; CHECK: lfd
|
; CHECK: lfd
|
||||||
; CHECK: lxvd2x
|
; CHECK: lxvd2x
|
||||||
; CHECK: ld
|
; CHECK: ld
|
||||||
; CHECK: blr
|
; CHECK: blr
|
||||||
|
|
||||||
|
; CHECK: .LBB1_5:
|
||||||
|
; CHECK: mflr [[REGL:[0-9]+]]
|
||||||
|
; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
|
||||||
|
; CHECK: std [[REGL]], 8([[REG2]])
|
||||||
|
; CHECK: li 3, 0
|
||||||
|
|
||||||
; CHECK-NOAV: @main
|
; CHECK-NOAV: @main
|
||||||
; CHECK-NOAV-NOT: stxvd2x
|
; CHECK-NOAV-NOT: stxvd2x
|
||||||
; CHECK-NOAV: bcl
|
; CHECK-NOAV: bcl
|
||||||
|
|||||||
@@ -1,100 +0,0 @@
|
|||||||
; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s
|
|
||||||
target datalayout = "e-m:e-i64:64-n32:64"
|
|
||||||
target triple = "powerpc64le-grtev4-linux-gnu"
|
|
||||||
|
|
||||||
; Intended layout:
|
|
||||||
; The outlining flag produces the layout
|
|
||||||
; test1
|
|
||||||
; test2
|
|
||||||
; test3
|
|
||||||
; test4
|
|
||||||
; exit
|
|
||||||
; optional1
|
|
||||||
; optional2
|
|
||||||
; optional3
|
|
||||||
; optional4
|
|
||||||
; Tail duplication puts test n+1 at the end of optional n
|
|
||||||
; so optional1 includes a copy of test2 at the end, and branches
|
|
||||||
; to test3 (at the top) or falls through to optional 2.
|
|
||||||
; The CHECK statements check for the whole string of tests and exit block,
|
|
||||||
; and then check that the correct test has been duplicated into the end of
|
|
||||||
; the optional blocks and that the optional blocks are in the correct order.
|
|
||||||
;CHECK-LABEL: f:
|
|
||||||
; test1 may have been merged with entry
|
|
||||||
;CHECK: mr [[TAGREG:[0-9]+]], 3
|
|
||||||
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
|
|
||||||
;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]
|
|
||||||
;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2
|
|
||||||
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
||||||
;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]
|
|
||||||
;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
|
|
||||||
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
||||||
;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
|
|
||||||
;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4
|
|
||||||
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
|
|
||||||
;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]]
|
|
||||||
;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
|
|
||||||
;CHECK: blr
|
|
||||||
;CHECK-NEXT: [[OPT1LABEL]]
|
|
||||||
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
|
|
||||||
;CHECK-NEXT: beq 0, [[TEST3LABEL]]
|
|
||||||
;CHECK-NEXT: [[OPT2LABEL]]
|
|
||||||
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
|
|
||||||
;CHECK-NEXT: beq 0, [[TEST4LABEL]]
|
|
||||||
;CHECK-NEXT: [[OPT3LABEL]]
|
|
||||||
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
|
|
||||||
;CHECK-NEXT: beq 0, [[EXITLABEL]]
|
|
||||||
;CHECK-NEXT: [[OPT4LABEL]]
|
|
||||||
;CHECK: b [[EXITLABEL]]
|
|
||||||
|
|
||||||
define void @f(i32 %tag) {
|
|
||||||
entry:
|
|
||||||
br label %test1
|
|
||||||
test1:
|
|
||||||
%tagbit1 = and i32 %tag, 1
|
|
||||||
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
|
|
||||||
br i1 %tagbit1eq0, label %test2, label %optional1
|
|
||||||
optional1:
|
|
||||||
call void @a()
|
|
||||||
call void @a()
|
|
||||||
call void @a()
|
|
||||||
call void @a()
|
|
||||||
br label %test2
|
|
||||||
test2:
|
|
||||||
%tagbit2 = and i32 %tag, 2
|
|
||||||
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
|
|
||||||
br i1 %tagbit2eq0, label %test3, label %optional2
|
|
||||||
optional2:
|
|
||||||
call void @b()
|
|
||||||
call void @b()
|
|
||||||
call void @b()
|
|
||||||
call void @b()
|
|
||||||
br label %test3
|
|
||||||
test3:
|
|
||||||
%tagbit3 = and i32 %tag, 4
|
|
||||||
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
|
|
||||||
br i1 %tagbit3eq0, label %test4, label %optional3
|
|
||||||
optional3:
|
|
||||||
call void @c()
|
|
||||||
call void @c()
|
|
||||||
call void @c()
|
|
||||||
call void @c()
|
|
||||||
br label %test4
|
|
||||||
test4:
|
|
||||||
%tagbit4 = and i32 %tag, 8
|
|
||||||
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
|
|
||||||
br i1 %tagbit4eq0, label %exit, label %optional4
|
|
||||||
optional4:
|
|
||||||
call void @d()
|
|
||||||
call void @d()
|
|
||||||
call void @d()
|
|
||||||
call void @d()
|
|
||||||
br label %exit
|
|
||||||
exit:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
declare void @a()
|
|
||||||
declare void @b()
|
|
||||||
declare void @c()
|
|
||||||
declare void @d()
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false | FileCheck %s
|
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false | FileCheck %s
|
||||||
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s
|
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs -fast-isel=false | FileCheck -check-prefix=OPT %s
|
||||||
|
|
||||||
; Test the CFG stackifier pass.
|
; Test the CFG stackifier pass.
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s
|
; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s
|
||||||
|
|
||||||
; Test memcpy, memmove, and memset intrinsics.
|
; Test memcpy, memmove, and memset intrinsics.
|
||||||
|
|
||||||
|
|||||||
@@ -177,12 +177,6 @@ exit:
|
|||||||
ret i32 %sum
|
ret i32 %sum
|
||||||
}
|
}
|
||||||
|
|
||||||
; Tail duplication during layout can entirely remove body0 by duplicating it
|
|
||||||
; into the entry block and into body1. This is a good thing but it isn't what
|
|
||||||
; this test is looking for. So to make the blocks longer so they don't get
|
|
||||||
; duplicated, we add some calls to dummy.
|
|
||||||
declare void @dummy()
|
|
||||||
|
|
||||||
define i32 @test_loop_rotate(i32 %i, i32* %a) {
|
define i32 @test_loop_rotate(i32 %i, i32* %a) {
|
||||||
; Check that we rotate conditional exits from the loop to the bottom of the
|
; Check that we rotate conditional exits from the loop to the bottom of the
|
||||||
; loop, eliminating unconditional branches to the top.
|
; loop, eliminating unconditional branches to the top.
|
||||||
@@ -200,8 +194,6 @@ body0:
|
|||||||
%base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
|
%base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
|
||||||
%next = add i32 %iv, 1
|
%next = add i32 %iv, 1
|
||||||
%exitcond = icmp eq i32 %next, %i
|
%exitcond = icmp eq i32 %next, %i
|
||||||
call void @dummy()
|
|
||||||
call void @dummy()
|
|
||||||
br i1 %exitcond, label %exit, label %body1
|
br i1 %exitcond, label %exit, label %body1
|
||||||
|
|
||||||
body1:
|
body1:
|
||||||
@@ -953,7 +945,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
|
|||||||
; First rotated loop top.
|
; First rotated loop top.
|
||||||
; CHECK: .p2align
|
; CHECK: .p2align
|
||||||
; CHECK: %while.end
|
; CHECK: %while.end
|
||||||
; %for.cond gets completely tail-duplicated away.
|
; CHECK: %for.cond
|
||||||
; CHECK: %if.then
|
; CHECK: %if.then
|
||||||
; CHECK: %if.else
|
; CHECK: %if.else
|
||||||
; CHECK: %if.end10
|
; CHECK: %if.end10
|
||||||
|
|||||||
@@ -105,11 +105,9 @@ define i32 @weighted_select3(i32 %a, i32 %b) {
|
|||||||
; CHECK-NEXT: testl %edi, %edi
|
; CHECK-NEXT: testl %edi, %edi
|
||||||
; CHECK-NEXT: je [[LABEL_BB6:.*]]
|
; CHECK-NEXT: je [[LABEL_BB6:.*]]
|
||||||
; CHECK: movl %edi, %eax
|
; CHECK: movl %edi, %eax
|
||||||
; CHECK-NEXT: retq
|
|
||||||
; CHECK: [[LABEL_BB6]]
|
; CHECK: [[LABEL_BB6]]
|
||||||
; CHECK-NEXT: movl %esi, %edi
|
; CHECK-NEXT: movl %esi, %edi
|
||||||
; CHECK-NEXT: movl %edi, %eax
|
; CHECK-NEXT: jmp
|
||||||
; CHECK-NEXT: retq
|
|
||||||
;
|
;
|
||||||
%cmp = icmp ne i32 %a, 0
|
%cmp = icmp ne i32 %a, 0
|
||||||
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
|
%sel = select i1 %cmp, i32 %a, i32 %b, !prof !2
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
; CHECK-LABEL: fmaddsubpd_loop_128:
|
; CHECK-LABEL: fmaddsubpd_loop_128:
|
||||||
; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2
|
; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2
|
||||||
; CHECK: vmovapd %xmm2, %xmm0
|
; CHECK: vmovaps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -28,7 +28,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fmsubaddpd_loop_128:
|
; CHECK-LABEL: fmsubaddpd_loop_128:
|
||||||
; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2
|
; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2
|
||||||
; CHECK: vmovapd %xmm2, %xmm0
|
; CHECK: vmovaps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -54,7 +54,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fmaddpd_loop_128:
|
; CHECK-LABEL: fmaddpd_loop_128:
|
||||||
; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2
|
; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2
|
||||||
; CHECK: vmovapd %xmm2, %xmm0
|
; CHECK: vmovaps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -80,7 +80,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fmsubpd_loop_128:
|
; CHECK-LABEL: fmsubpd_loop_128:
|
||||||
; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2
|
; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2
|
||||||
; CHECK: vmovapd %xmm2, %xmm0
|
; CHECK: vmovaps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -106,7 +106,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fnmaddpd_loop_128:
|
; CHECK-LABEL: fnmaddpd_loop_128:
|
||||||
; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2
|
; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2
|
||||||
; CHECK: vmovapd %xmm2, %xmm0
|
; CHECK: vmovaps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -132,7 +132,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fnmsubpd_loop_128:
|
; CHECK-LABEL: fnmsubpd_loop_128:
|
||||||
; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2
|
; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2
|
||||||
; CHECK: vmovapd %xmm2, %xmm0
|
; CHECK: vmovaps %xmm2, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -329,7 +329,7 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
|
|||||||
|
|
||||||
; CHECK-LABEL: fmaddsubpd_loop_256:
|
; CHECK-LABEL: fmaddsubpd_loop_256:
|
||||||
; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2
|
; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2
|
||||||
; CHECK: vmovapd %ymm2, %ymm0
|
; CHECK: vmovaps %ymm2, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -355,7 +355,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fmsubaddpd_loop_256:
|
; CHECK-LABEL: fmsubaddpd_loop_256:
|
||||||
; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2
|
; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2
|
||||||
; CHECK: vmovapd %ymm2, %ymm0
|
; CHECK: vmovaps %ymm2, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -381,7 +381,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fmaddpd_loop_256:
|
; CHECK-LABEL: fmaddpd_loop_256:
|
||||||
; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2
|
; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2
|
||||||
; CHECK: vmovapd %ymm2, %ymm0
|
; CHECK: vmovaps %ymm2, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -407,7 +407,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fmsubpd_loop_256:
|
; CHECK-LABEL: fmsubpd_loop_256:
|
||||||
; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2
|
; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2
|
||||||
; CHECK: vmovapd %ymm2, %ymm0
|
; CHECK: vmovaps %ymm2, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -433,7 +433,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fnmaddpd_loop_256:
|
; CHECK-LABEL: fnmaddpd_loop_256:
|
||||||
; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2
|
; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2
|
||||||
; CHECK: vmovapd %ymm2, %ymm0
|
; CHECK: vmovaps %ymm2, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -459,7 +459,7 @@ for.end:
|
|||||||
|
|
||||||
; CHECK-LABEL: fnmsubpd_loop_256:
|
; CHECK-LABEL: fnmsubpd_loop_256:
|
||||||
; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2
|
; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2
|
||||||
; CHECK: vmovapd %ymm2, %ymm0
|
; CHECK: vmovaps %ymm2, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
||||||
entry:
|
entry:
|
||||||
|
|||||||
@@ -56,11 +56,11 @@ define double @profile_metadata(double %x, double %y) {
|
|||||||
; CHECK-NEXT: ucomisd %xmm1, %xmm0
|
; CHECK-NEXT: ucomisd %xmm1, %xmm0
|
||||||
; CHECK-NEXT: jne .LBB1_1
|
; CHECK-NEXT: jne .LBB1_1
|
||||||
; CHECK-NEXT: jp .LBB1_1
|
; CHECK-NEXT: jp .LBB1_1
|
||||||
; CHECK-NEXT: # %bb2
|
; CHECK-NEXT: .LBB1_2: # %bb2
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
; CHECK-NEXT: .LBB1_1: # %bb1
|
; CHECK-NEXT: .LBB1_1: # %bb1
|
||||||
; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0
|
; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: jmp .LBB1_2
|
||||||
|
|
||||||
entry:
|
entry:
|
||||||
%mul = fmul double %x, %y
|
%mul = fmul double %x, %y
|
||||||
|
|||||||
@@ -15,8 +15,5 @@ l2: ; preds = %l1
|
|||||||
br label %l1
|
br label %l1
|
||||||
}
|
}
|
||||||
|
|
||||||
; It is correct for either l1 or l2 to be removed.
|
; CHECK: .Ltmp0: # Address of block that was removed by CodeGen
|
||||||
; If l2 is removed, the message should be "Address of block that was removed by CodeGen"
|
|
||||||
; If l1 is removed, it should be "Block address taken."
|
|
||||||
; CHECK: .Ltmp0: # {{Address of block that was removed by CodeGen|Block address taken}}
|
|
||||||
; CHECK: .quad .Ltmp0
|
; CHECK: .quad .Ltmp0
|
||||||
|
|||||||
@@ -3,34 +3,16 @@
|
|||||||
; This testing case is reduced from 197.parser prune_match function.
|
; This testing case is reduced from 197.parser prune_match function.
|
||||||
; We make sure register copies are not generated on isupper.exit blocks.
|
; We make sure register copies are not generated on isupper.exit blocks.
|
||||||
|
|
||||||
; isupper.exit and isupper.exit223 get tail-duplicated into all their
|
; CHECK: isupper.exit
|
||||||
; predecessors.
|
|
||||||
; CHECK: cond.true.i.i
|
|
||||||
; CHECK-NEXT: in Loop
|
; CHECK-NEXT: in Loop
|
||||||
; Mem-move
|
|
||||||
; CHECK-NEXT: movl
|
|
||||||
; CHECK-NEXT: andl
|
|
||||||
; CHECK-NEXT: testl
|
; CHECK-NEXT: testl
|
||||||
; CHECK-NEXT: jne
|
; CHECK-NEXT: jne
|
||||||
; CHECK: cond.true.i.i217
|
; CHECK: isupper.exit
|
||||||
; CHECK-NEXT: in Loop
|
; CHECK-NEXT: in Loop
|
||||||
; Mem-move
|
|
||||||
; CHECK-NEXT: movl
|
|
||||||
; CHECK-NEXT: andl
|
|
||||||
; CHECK-NEXT: testl
|
; CHECK-NEXT: testl
|
||||||
; CHECK-NEXT: je
|
; CHECK-NEXT: je
|
||||||
; CHECK: cond.false.i.i
|
|
||||||
; CHECK: maskrune
|
; CHECK: maskrune
|
||||||
; CHECK-NEXT: movzbl
|
|
||||||
; CHECK-NEXT: movzbl
|
|
||||||
; CHECK-NEXT: testl
|
|
||||||
; CHECK-NEXT: je
|
|
||||||
; CHECK: cond.false.i.i219
|
|
||||||
; CHECK: maskrune
|
; CHECK: maskrune
|
||||||
; CHECK-NEXT: movzbl
|
|
||||||
; CHECK-NEXT: movzbl
|
|
||||||
; CHECK-NEXT: testl
|
|
||||||
; CHECK-NEXT: jne
|
|
||||||
|
|
||||||
%struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* }
|
%struct.List_o_links_struct = type { i32, i32, i32, %struct.List_o_links_struct* }
|
||||||
%struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* }
|
%struct.Connector_struct = type { i16, i16, i8, i8, %struct.Connector_struct*, i8* }
|
||||||
|
|||||||
@@ -58,23 +58,21 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
|
|||||||
; X32-NEXT: je .LBB1_1
|
; X32-NEXT: je .LBB1_1
|
||||||
; X32-NEXT: # BB#2: # %entry
|
; X32-NEXT: # BB#2: # %entry
|
||||||
; X32-NEXT: xorps %xmm1, %xmm1
|
; X32-NEXT: xorps %xmm1, %xmm1
|
||||||
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
; X32-NEXT: jmp .LBB1_3
|
||||||
; X32-NEXT: jne .LBB1_5
|
|
||||||
; X32-NEXT: jmp .LBB1_4
|
|
||||||
; X32-NEXT: .LBB1_1:
|
; X32-NEXT: .LBB1_1:
|
||||||
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||||
|
; X32-NEXT: .LBB1_3: # %entry
|
||||||
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||||
; X32-NEXT: je .LBB1_4
|
; X32-NEXT: je .LBB1_4
|
||||||
; X32-NEXT: .LBB1_5: # %entry
|
; X32-NEXT: # BB#5: # %entry
|
||||||
; X32-NEXT: xorps %xmm2, %xmm2
|
; X32-NEXT: xorps %xmm2, %xmm2
|
||||||
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
; X32-NEXT: jmp .LBB1_6
|
||||||
; X32-NEXT: jne .LBB1_8
|
|
||||||
; X32-NEXT: jmp .LBB1_7
|
|
||||||
; X32-NEXT: .LBB1_4:
|
; X32-NEXT: .LBB1_4:
|
||||||
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||||
|
; X32-NEXT: .LBB1_6: # %entry
|
||||||
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
|
||||||
; X32-NEXT: je .LBB1_7
|
; X32-NEXT: je .LBB1_7
|
||||||
; X32-NEXT: .LBB1_8: # %entry
|
; X32-NEXT: # BB#8: # %entry
|
||||||
; X32-NEXT: xorps %xmm3, %xmm3
|
; X32-NEXT: xorps %xmm3, %xmm3
|
||||||
; X32-NEXT: jmp .LBB1_9
|
; X32-NEXT: jmp .LBB1_9
|
||||||
; X32-NEXT: .LBB1_7:
|
; X32-NEXT: .LBB1_7:
|
||||||
@@ -97,23 +95,21 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
|
|||||||
; X64-NEXT: je .LBB1_1
|
; X64-NEXT: je .LBB1_1
|
||||||
; X64-NEXT: # BB#2: # %entry
|
; X64-NEXT: # BB#2: # %entry
|
||||||
; X64-NEXT: xorps %xmm1, %xmm1
|
; X64-NEXT: xorps %xmm1, %xmm1
|
||||||
; X64-NEXT: testl %edx, %edx
|
; X64-NEXT: jmp .LBB1_3
|
||||||
; X64-NEXT: jne .LBB1_5
|
|
||||||
; X64-NEXT: jmp .LBB1_4
|
|
||||||
; X64-NEXT: .LBB1_1:
|
; X64-NEXT: .LBB1_1:
|
||||||
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||||
|
; X64-NEXT: .LBB1_3: # %entry
|
||||||
; X64-NEXT: testl %edx, %edx
|
; X64-NEXT: testl %edx, %edx
|
||||||
; X64-NEXT: je .LBB1_4
|
; X64-NEXT: je .LBB1_4
|
||||||
; X64-NEXT: .LBB1_5: # %entry
|
; X64-NEXT: # BB#5: # %entry
|
||||||
; X64-NEXT: xorps %xmm2, %xmm2
|
; X64-NEXT: xorps %xmm2, %xmm2
|
||||||
; X64-NEXT: testl %r8d, %r8d
|
; X64-NEXT: jmp .LBB1_6
|
||||||
; X64-NEXT: jne .LBB1_8
|
|
||||||
; X64-NEXT: jmp .LBB1_7
|
|
||||||
; X64-NEXT: .LBB1_4:
|
; X64-NEXT: .LBB1_4:
|
||||||
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||||
|
; X64-NEXT: .LBB1_6: # %entry
|
||||||
; X64-NEXT: testl %r8d, %r8d
|
; X64-NEXT: testl %r8d, %r8d
|
||||||
; X64-NEXT: je .LBB1_7
|
; X64-NEXT: je .LBB1_7
|
||||||
; X64-NEXT: .LBB1_8: # %entry
|
; X64-NEXT: # BB#8: # %entry
|
||||||
; X64-NEXT: xorps %xmm3, %xmm3
|
; X64-NEXT: xorps %xmm3, %xmm3
|
||||||
; X64-NEXT: jmp .LBB1_9
|
; X64-NEXT: jmp .LBB1_9
|
||||||
; X64-NEXT: .LBB1_7:
|
; X64-NEXT: .LBB1_7:
|
||||||
|
|||||||
@@ -1,190 +0,0 @@
|
|||||||
; RUN: llc -O2 -o - %s | FileCheck %s
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
|
||||||
|
|
||||||
; Function Attrs: nounwind uwtable
|
|
||||||
; CHECK-LABEL: tail_dup_merge_loops
|
|
||||||
; CHECK: # %entry
|
|
||||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
|
||||||
; CHECK: # %inner_loop_exit
|
|
||||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
|
||||||
; CHECK: # %inner_loop_latch
|
|
||||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
|
||||||
; CHECK: # %inner_loop_test
|
|
||||||
; CHECK-NOT: # %{{[a-zA-Z_]+}}
|
|
||||||
; CHECK: # %exit
|
|
||||||
define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 {
|
|
||||||
entry:
|
|
||||||
%notlhs674.i = icmp eq i32 %a, 0
|
|
||||||
br label %outer_loop_top
|
|
||||||
|
|
||||||
outer_loop_top: ; preds = %inner_loop_exit, %entry
|
|
||||||
%dst.0.ph.i = phi i8* [ %b, %entry ], [ %scevgep679.i, %inner_loop_exit ]
|
|
||||||
br i1 %notlhs674.i, label %exit, label %inner_loop_preheader
|
|
||||||
|
|
||||||
inner_loop_preheader: ; preds = %outer_loop_top
|
|
||||||
br label %inner_loop_top
|
|
||||||
|
|
||||||
inner_loop_top: ; preds = %inner_loop_latch, %inner_loop_preheader
|
|
||||||
%dst.0.i = phi i8* [ %inc, %inner_loop_latch ], [ %dst.0.ph.i, %inner_loop_preheader ]
|
|
||||||
%var = load i8, i8* %dst.0.i
|
|
||||||
%tobool1.i = icmp slt i8 %var, 0
|
|
||||||
br label %inner_loop_test
|
|
||||||
|
|
||||||
inner_loop_test: ; preds = %inner_loop_top
|
|
||||||
br i1 %tobool1.i, label %inner_loop_exit, label %inner_loop_latch
|
|
||||||
|
|
||||||
inner_loop_exit: ; preds = %inner_loop_test
|
|
||||||
%scevgep.i = getelementptr i8, i8* %dst.0.i, i64 1
|
|
||||||
%scevgep679.i = getelementptr i8, i8* %scevgep.i, i64 0
|
|
||||||
br label %outer_loop_top
|
|
||||||
|
|
||||||
inner_loop_latch: ; preds = %inner_loop_test
|
|
||||||
%cmp75.i = icmp ult i8* %dst.0.i, %c
|
|
||||||
%inc = getelementptr i8, i8* %dst.0.i, i64 2
|
|
||||||
br label %inner_loop_top
|
|
||||||
|
|
||||||
exit: ; preds = %outer_loop_top
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
@.str.6 = external unnamed_addr constant [23 x i8], align 1
|
|
||||||
|
|
||||||
; There is an erroneus check in LoopBase::addBasicBlockToLoop(), where it
|
|
||||||
; assumes that the header block for a loop is unique.
|
|
||||||
; For most of compilation this assumption is true, but during layout we allow
|
|
||||||
; this assumption to be violated. The following code will trigger the bug:
|
|
||||||
|
|
||||||
; The loops in question is eventually headed by the block shared_loop_header
|
|
||||||
;
|
|
||||||
; During layout The block labeled outer_loop_header gets tail-duplicated into
|
|
||||||
; outer_loop_latch, and into shared_preheader, and then removed. This leaves
|
|
||||||
; shared_loop_header as the header of both loops. The end result
|
|
||||||
; is that there are 2 valid loops, and that they share a header. If we re-ran
|
|
||||||
; the loop analysis, it would classify this as a single loop.
|
|
||||||
; So far this is fine as far as layout is concerned.
|
|
||||||
; After layout we tail merge blocks merge_other and merge_predecessor_split.
|
|
||||||
; We do this even though they share only a single instruction, because
|
|
||||||
; merge_predecessor_split falls through to their shared successor:
|
|
||||||
; outer_loop_latch.
|
|
||||||
; The rest of the blocks in the function are noise unfortunately. Bugpoint
|
|
||||||
; couldn't shrink the test any further.
|
|
||||||
|
|
||||||
; CHECK-LABEL: loop_shared_header
|
|
||||||
; CHECK: # %entry
|
|
||||||
; CHECK: # %shared_preheader
|
|
||||||
; CHECK: # %shared_loop_header
|
|
||||||
; CHECK: # %inner_loop_body
|
|
||||||
; CHECK: # %merge_predecessor_split
|
|
||||||
; CHECK: # %outer_loop_latch
|
|
||||||
; CHECK: # %outer_loop_latch
|
|
||||||
; CHECK: # %cleanup
|
|
||||||
define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i32 %wwprva, i32 %e_lfanew, i8* readonly %wwp, i32 %wwpsz, i16 zeroext %sects) local_unnamed_addr #0 {
|
|
||||||
entry:
|
|
||||||
%0 = load i32, i32* undef, align 4
|
|
||||||
%mul = shl nsw i32 %0, 2
|
|
||||||
br i1 undef, label %if.end19, label %cleanup
|
|
||||||
|
|
||||||
if.end19: ; preds = %entry
|
|
||||||
%conv = zext i32 %mul to i64
|
|
||||||
%call = tail call i8* @cli_calloc(i64 %conv, i64 1)
|
|
||||||
%1 = icmp eq i32 %exesz, 0
|
|
||||||
%notrhs = icmp eq i32 %0, 0
|
|
||||||
%or.cond117.not = or i1 %1, %notrhs
|
|
||||||
%or.cond202 = or i1 %or.cond117.not, undef
|
|
||||||
%cmp35 = icmp ult i8* undef, %exe
|
|
||||||
%or.cond203 = or i1 %or.cond202, %cmp35
|
|
||||||
br i1 %or.cond203, label %cleanup, label %if.end50
|
|
||||||
|
|
||||||
if.end50: ; preds = %if.end19
|
|
||||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %call, i8* undef, i64 %conv, i32 1, i1 false)
|
|
||||||
%cmp1.i.i = icmp ugt i32 %mul, 3
|
|
||||||
br i1 %cmp1.i.i, label %shared_preheader, label %wunpsect.exit.thread.loopexit391
|
|
||||||
|
|
||||||
shared_preheader: ; preds = %if.end50
|
|
||||||
br label %outer_loop_header
|
|
||||||
|
|
||||||
outer_loop_header: ; preds = %outer_loop_latch, %shared_preheader
|
|
||||||
%bits.1.i = phi i8 [ 32, %shared_preheader ], [ %bits.43.i, %outer_loop_latch ]
|
|
||||||
%dst.0.ph.i = phi i8* [ undef, %shared_preheader ], [ %scevgep679.i, %outer_loop_latch ]
|
|
||||||
%2 = icmp eq i32 undef, 0
|
|
||||||
br i1 %2, label %while.cond.us1412.i, label %shared_loop_header
|
|
||||||
|
|
||||||
while.cond.us1412.i: ; preds = %outer_loop_header
|
|
||||||
%.pre.i = add i8 %bits.1.i, -1
|
|
||||||
%tobool2.us1420.i = icmp eq i8 %.pre.i, 0
|
|
||||||
%or.cond.us1421.i = or i1 undef, %tobool2.us1420.i
|
|
||||||
br i1 %or.cond.us1421.i, label %if.end41.us1436.i, label %cleanup
|
|
||||||
|
|
||||||
if.end41.us1436.i: ; preds = %while.cond.us1412.i
|
|
||||||
unreachable
|
|
||||||
|
|
||||||
shared_loop_header: ; preds = %dup_early2, %dup_early1
|
|
||||||
%dst.0.i = phi i8* [ undef, %inner_loop_body ], [ %dst.0.ph.i, %outer_loop_header ], [ undef, %dead_block ]
|
|
||||||
%cmp3.i1172.i = icmp ult i8* undef, %call
|
|
||||||
br i1 %cmp3.i1172.i, label %wunpsect.exit.thread.loopexit389, label %inner_loop_body
|
|
||||||
|
|
||||||
inner_loop_body: ; preds = %shared_loop_header
|
|
||||||
%3 = icmp slt i32 undef, 0
|
|
||||||
br i1 %3, label %if.end96.i, label %shared_loop_header
|
|
||||||
|
|
||||||
dead_block: ; preds = %inner_loop_body
|
|
||||||
%cmp75.i = icmp ult i8* %dst.0.i, undef
|
|
||||||
br label %shared_loop_header
|
|
||||||
|
|
||||||
if.end96.i: ; preds = %inner_loop_body
|
|
||||||
%cmp97.i = icmp ugt i32 undef, 2
|
|
||||||
br i1 %cmp97.i, label %if.then99.i, label %if.end287.i
|
|
||||||
|
|
||||||
if.then99.i: ; preds = %if.end96.i
|
|
||||||
tail call void (i8*, ...) @cli_dbgmsg(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.6, i64 0, i64 0), i32 undef)
|
|
||||||
br label %cleanup
|
|
||||||
|
|
||||||
if.end287.i: ; preds = %if.end96.i
|
|
||||||
%cmp291.i = icmp ne i32 undef, 1
|
|
||||||
%conv294.i = select i1 %cmp291.i, i16 4, i16 3
|
|
||||||
br i1 undef, label %if.end308.i, label %outer_loop_latch
|
|
||||||
|
|
||||||
if.end308.i: ; preds = %if.end287.i
|
|
||||||
br i1 undef, label %if.end335.i, label %merge_predecessor_split
|
|
||||||
|
|
||||||
merge_predecessor_split: ; preds = %if.end308.i
|
|
||||||
%4 = bitcast i8* undef to i32*
|
|
||||||
br label %outer_loop_latch
|
|
||||||
|
|
||||||
if.end335.i: ; preds = %if.end308.i
|
|
||||||
br i1 undef, label %outer_loop_latch, label %merge_other
|
|
||||||
|
|
||||||
merge_other: ; preds = %if.end335.i
|
|
||||||
br label %outer_loop_latch
|
|
||||||
|
|
||||||
outer_loop_latch: ; preds = %merge_other, %if.end335.i, %merge_predecessor_split, %if.end287.i
|
|
||||||
%bits.43.i = phi i8 [ undef, %if.end287.i ], [ undef, %merge_other ], [ 32, %merge_predecessor_split ], [ 0, %if.end335.i ]
|
|
||||||
%backsize.0.i = phi i16 [ %conv294.i, %if.end287.i ], [ 0, %merge_other ], [ 0, %merge_predecessor_split ], [ 0, %if.end335.i ]
|
|
||||||
%5 = add i16 %backsize.0.i, -1
|
|
||||||
%6 = zext i16 %5 to i64
|
|
||||||
%scevgep.i = getelementptr i8, i8* %dst.0.ph.i, i64 1
|
|
||||||
%scevgep679.i = getelementptr i8, i8* %scevgep.i, i64 %6
|
|
||||||
br label %outer_loop_header
|
|
||||||
|
|
||||||
wunpsect.exit.thread.loopexit389: ; preds = %shared_loop_header
|
|
||||||
unreachable
|
|
||||||
|
|
||||||
wunpsect.exit.thread.loopexit391: ; preds = %if.end50
|
|
||||||
unreachable
|
|
||||||
|
|
||||||
cleanup: ; preds = %if.then99.i, %while.cond.us1412.i, %if.end19, %entry
|
|
||||||
%retval.0 = phi i32 [ 0, %if.then99.i ], [ 1, %entry ], [ 1, %if.end19 ], [ 1, %while.cond.us1412.i ]
|
|
||||||
ret i32 %retval.0
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
declare void @cli_dbgmsg(i8*, ...) local_unnamed_addr #0
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
|
||||||
declare i8* @cli_calloc(i64, i64) local_unnamed_addr #0
|
|
||||||
|
|
||||||
; Function Attrs: argmemonly nounwind
|
|
||||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
|
|
||||||
attributes #0 = { nounwind }
|
|
||||||
attributes #1 = { argmemonly nounwind }
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
|
||||||
|
|
||||||
; Function Attrs: uwtable
|
|
||||||
; When tail-duplicating during placement, we work backward from blocks with
|
|
||||||
; multiple successors. In this case, the block dup1 gets duplicated into dup2
|
|
||||||
; and if.then64, and then the block dup2 gets duplicated into land.lhs.true
|
|
||||||
; and if.end70
|
|
||||||
; CHECK-LABEL: repeated_tail_dup:
|
|
||||||
define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6) #0 align 2 {
|
|
||||||
entry:
|
|
||||||
br label %for.cond
|
|
||||||
|
|
||||||
; CHECK: {{^}}.[[HEADER:LBB0_[1-9]]]: # %for.cond
|
|
||||||
for.cond: ; preds = %dup1, %entry
|
|
||||||
br i1 %a1, label %land.lhs.true, label %if.end56
|
|
||||||
|
|
||||||
land.lhs.true: ; preds = %for.cond
|
|
||||||
store i32 10, i32* %a4, align 8
|
|
||||||
br label %dup2
|
|
||||||
|
|
||||||
if.end56: ; preds = %for.cond
|
|
||||||
br i1 %a2, label %if.then64, label %if.end70
|
|
||||||
|
|
||||||
if.then64: ; preds = %if.end56
|
|
||||||
store i8 1, i8* %a6, align 1
|
|
||||||
br label %dup1
|
|
||||||
|
|
||||||
; CHECK: # %if.end70
|
|
||||||
; CHECK-NEXT: # in Loop:
|
|
||||||
; CHECK-NEXT: movl $12, (%rdx)
|
|
||||||
; CHECK-NEXT: movl $2, (%rcx)
|
|
||||||
; CHECK-NEXT: testl %eax, %eax
|
|
||||||
; CHECK-NEXT: je .[[HEADER]]
|
|
||||||
if.end70: ; preds = %if.end56
|
|
||||||
store i32 12, i32* %a4, align 8
|
|
||||||
br label %dup2
|
|
||||||
|
|
||||||
dup2: ; preds = %if.end70, %land.lhs.true
|
|
||||||
store i32 2, i32* %a5, align 4
|
|
||||||
br label %dup1
|
|
||||||
|
|
||||||
dup1: ; preds = %dup2, %if.then64
|
|
||||||
%val = load i32, i32* %a4, align 8
|
|
||||||
%switch = icmp ult i32 undef, 1
|
|
||||||
br i1 %switch, label %for.cond, label %for.end
|
|
||||||
|
|
||||||
for.end: ; preds = %dup1
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
attributes #0 = { uwtable }
|
|
||||||
@@ -5,30 +5,17 @@
|
|||||||
@a = external global i16
|
@a = external global i16
|
||||||
@b = external global i32
|
@b = external global i32
|
||||||
|
|
||||||
declare void @dummy1()
|
|
||||||
declare void @dummy2()
|
|
||||||
declare void @dummy3()
|
|
||||||
|
|
||||||
; Function Attrs: nounwind
|
; Function Attrs: nounwind
|
||||||
define void @f2() {
|
define void @f2() {
|
||||||
br i1 undef, label %bb1, label %bb3
|
br i1 undef, label %bb1, label %bb3
|
||||||
|
|
||||||
bb1:
|
bb1:
|
||||||
call void @dummy1()
|
|
||||||
call void @dummy1()
|
|
||||||
call void @dummy1()
|
|
||||||
br i1 undef, label %bb2, label %bb2
|
br i1 undef, label %bb2, label %bb2
|
||||||
|
|
||||||
bb2:
|
bb2:
|
||||||
call void @dummy2()
|
|
||||||
call void @dummy2()
|
|
||||||
call void @dummy2()
|
|
||||||
br label %bb4
|
br label %bb4
|
||||||
|
|
||||||
bb3:
|
bb3:
|
||||||
call void @dummy3()
|
|
||||||
call void @dummy3()
|
|
||||||
call void @dummy3()
|
|
||||||
br label %bb2
|
br label %bb2
|
||||||
|
|
||||||
bb4:
|
bb4:
|
||||||
@@ -53,24 +40,15 @@ body: |
|
|||||||
bb.1:
|
bb.1:
|
||||||
successors: %bb.2(100)
|
successors: %bb.2(100)
|
||||||
|
|
||||||
CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
CALL64pcrel32 @dummy1, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
JNE_1 %bb.2, implicit %eflags
|
JNE_1 %bb.2, implicit %eflags
|
||||||
|
|
||||||
bb.2:
|
bb.2:
|
||||||
successors: %bb.4(100)
|
successors: %bb.4(100)
|
||||||
|
|
||||||
CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
CALL64pcrel32 @dummy2, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
JMP_1 %bb.4
|
JMP_1 %bb.4
|
||||||
|
|
||||||
bb.3:
|
bb.3:
|
||||||
successors: %bb.2(100)
|
successors: %bb.2(100)
|
||||||
CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
CALL64pcrel32 @dummy3, csr_64, implicit %rsp, implicit-def %rsp
|
|
||||||
JMP_1 %bb.2
|
JMP_1 %bb.2
|
||||||
|
|
||||||
bb.4:
|
bb.4:
|
||||||
|
|||||||
Reference in New Issue
Block a user