Compare commits
17 Commits
mangling
...
globalisel
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0943946b0b | ||
|
|
7bb87d5338 | ||
|
|
a5d7fc1d10 | ||
|
|
29bf32efbb | ||
|
|
deefe3fbc9 | ||
|
|
07a5e31cb3 | ||
|
|
5c95484061 | ||
|
|
0e3fbfd1e1 | ||
|
|
0d08282310 | ||
|
|
44a81af510 | ||
|
|
35a66f965c | ||
|
|
631248dcd2 | ||
|
|
71defe40b7 | ||
|
|
3cd988914e | ||
|
|
893717446b | ||
|
|
aa7e4ba3ca | ||
|
|
af90e1975c |
@@ -44,6 +44,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
|
||||
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
|
||||
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
|
||||
getBBHash(HotFuncAddress, BBInputOffset)));
|
||||
(void)HotFuncAddress;
|
||||
// In case of conflicts (same Key mapping to different Vals), the last
|
||||
// update takes precedence. Of course it is not ideal to have conflicts and
|
||||
// those happen when we have an empty BB that either contained only
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt
|
||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt
|
||||
# RUN: cat %t.txt | FileCheck --check-prefix=POSTCHECK %s
|
||||
|
||||
# This test checks that we correctly handle DW_AT_low_pc [DW_FORM_addrx] that is part of DW_TAG_label.
|
||||
|
||||
@@ -35,16 +36,14 @@
|
||||
# POSTCHECK-NEXT: DW_AT_name
|
||||
# POSTCHECK-NEXT: DW_AT_decl_file
|
||||
# POSTCHECK-NEXT: DW_AT_decl_line
|
||||
# POSTCHECK-NEXT:
|
||||
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000002)
|
||||
# POSTCHECK-SAME: [0x[[#ADDR]]
|
||||
# POSTCHECK-SAME: 0x[[#ADDR]]
|
||||
# POSTCHECK: DW_TAG_label
|
||||
# POSTCHECK-NEXT: DW_AT_name
|
||||
# POSTCHECK-NEXT: DW_AT_decl_file
|
||||
# POSTCHECK-NEXT: DW_AT_decl_line
|
||||
# POSTCHECK-NEXT:
|
||||
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000003)
|
||||
# POSTCHECK-SAME: [0x[[#ADDR2]]
|
||||
# POSTCHECK-SAME: 0x[[#ADDR2]]
|
||||
|
||||
# clang++ main.cpp -g -S
|
||||
# int main() {
|
||||
|
||||
@@ -3019,6 +3019,10 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
||||
ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
|
||||
/*isNamedArg*/false);
|
||||
|
||||
// Empty records are ignored for parameter passing purposes.
|
||||
if (AI.isIgnore())
|
||||
return CGF.CreateMemTemp(Ty);
|
||||
|
||||
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
|
||||
// in the registers. If not go to step 7.
|
||||
if (!neededInt && !neededSSE)
|
||||
|
||||
23
clang/test/CodeGenCXX/x86_64-vaarg.cpp
Normal file
23
clang/test/CodeGenCXX/x86_64-vaarg.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -x c -o - %s | FileCheck %s
|
||||
|
||||
typedef struct { struct {} a; } empty;
|
||||
|
||||
// CHECK-LABEL: @{{.*}}empty_record_test
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
|
||||
// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
|
||||
// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
|
||||
// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
|
||||
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
|
||||
// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
|
||||
// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 {{.*}}, i1 false)
|
||||
// CHECK-NEXT: ret void
|
||||
empty empty_record_test(int z, ...) {
|
||||
__builtin_va_list list;
|
||||
__builtin_va_start(list, z);
|
||||
return __builtin_va_arg(list, empty);
|
||||
}
|
||||
@@ -175,7 +175,7 @@ void test_stat() {
|
||||
|
||||
s.st_dev = i;
|
||||
SAVE_ORIGINS(s)
|
||||
ret = stat("/nonexistent", &s);
|
||||
ret = stat("/nonexistent_581cb021aba7", &s);
|
||||
assert(-1 == ret);
|
||||
ASSERT_ZERO_LABEL(ret);
|
||||
ASSERT_LABEL(s.st_dev, i_label);
|
||||
|
||||
@@ -61,7 +61,8 @@ if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
|
||||
endif()
|
||||
endif()
|
||||
# We will build the GPU utilities if we are not doing a runtimes build.
|
||||
if(LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD)
|
||||
option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
|
||||
if(LIBC_BUILD_GPU_LOADER OR (LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD))
|
||||
add_subdirectory(utils/gpu)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -673,6 +673,14 @@ public:
|
||||
bool matchSDivByConst(MachineInstr &MI);
|
||||
void applySDivByConst(MachineInstr &MI);
|
||||
|
||||
/// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant,
|
||||
/// return expressions that implements it by shifting.
|
||||
bool matchDivByPow2(MachineInstr &MI, bool IsSigned);
|
||||
void applySDivByPow2(MachineInstr &MI);
|
||||
/// Given an G_UDIV \p MI expressing an unsigned divided by a pow2 constant,
|
||||
/// return expressions that implements it by shifting.
|
||||
void applyUDivByPow2(MachineInstr &MI);
|
||||
|
||||
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
|
||||
bool matchUMulHToLShr(MachineInstr &MI);
|
||||
void applyUMulHToLShr(MachineInstr &MI);
|
||||
|
||||
@@ -429,6 +429,7 @@ public:
|
||||
LegalizeResult lowerDIVREM(MachineInstr &MI);
|
||||
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
|
||||
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
|
||||
LegalizeResult lowerAbsToCNeg(MachineInstr &MI);
|
||||
LegalizeResult lowerVectorReduction(MachineInstr &MI);
|
||||
LegalizeResult lowerMemcpyInline(MachineInstr &MI);
|
||||
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
|
||||
|
||||
192
llvm/include/llvm/Passes/TargetPassRegistry.inc
Normal file
192
llvm/include/llvm/Passes/TargetPassRegistry.inc
Normal file
@@ -0,0 +1,192 @@
|
||||
//===- TargetPassRegistry.inc - Registry of passes --------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is used as the registry of passes in registerPassBuilderCallbacks
|
||||
// Just put the following lines in the body of registerPassBuilderCallbacks:
|
||||
// #define GET_PASS_REGISTRY "<Target>PassRegistry.def"
|
||||
// #include "llvm/Passes/TargetPassRegistry.inc"
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// NOTE: NO INCLUDE GUARD DESIRED!
|
||||
|
||||
#ifdef GET_PASS_REGISTRY
|
||||
|
||||
#if !__has_include(GET_PASS_REGISTRY)
|
||||
#error "must provide <Target>PassRegistry.def"
|
||||
#endif
|
||||
|
||||
if (PopulateClassToPassNames) {
|
||||
auto *PIC = PB.getPassInstrumentationCallbacks();
|
||||
|
||||
#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS) \
|
||||
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
|
||||
#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS) \
|
||||
PIC->addClassToPassName(CLASS, NAME);
|
||||
|
||||
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define MODULE_PASS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
|
||||
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define FUNCTION_PASS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
|
||||
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define LOOP_PASS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
|
||||
PARAMS) \
|
||||
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef MODULE_ANALYSIS
|
||||
#undef MODULE_PASS
|
||||
#undef MODULE_PASS_WITH_PARAMS
|
||||
#undef FUNCTION_ANALYSIS
|
||||
#undef FUNCTION_ALIAS_ANALYSIS
|
||||
#undef FUNCTION_PASS
|
||||
#undef FUNCTION_PASS_WITH_PARAMS
|
||||
#undef LOOP_ANALYSIS
|
||||
#undef LOOP_PASS
|
||||
#undef MACHINE_FUNCTION_ANALYSIS
|
||||
#undef MACHINE_FUNCTION_PASS
|
||||
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
|
||||
#undef ADD_CLASS_PASS_TO_PASS_NAME
|
||||
#undef ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS
|
||||
}
|
||||
|
||||
#define ADD_PASS(NAME, CREATE_PASS) \
|
||||
if (Name == NAME) { \
|
||||
PM.addPass(CREATE_PASS); \
|
||||
return true; \
|
||||
}
|
||||
|
||||
#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
|
||||
if (PassBuilder::checkParametrizedPassName(Name, NAME)) { \
|
||||
auto Params = PassBuilder::parsePassParameters(PARSER, Name, NAME); \
|
||||
if (!Params) \
|
||||
return false; \
|
||||
PM.addPass(CREATE_PASS(Params.get())); \
|
||||
return true; \
|
||||
}
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name, ModulePassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define MODULE_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef MODULE_PASS
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name, ModulePassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef MODULE_PASS_WITH_PARAMS
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name, FunctionPassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define FUNCTION_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef FUNCTION_PASS
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name, FunctionPassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef FUNCTION_PASS_WITH_PARAMS
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name, LoopPassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define LOOP_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||
#include GET_PASS_REGISTRY
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name,
|
||||
MachineFunctionPassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||
#include GET_PASS_REGISTRY
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerPipelineParsingCallback([=](StringRef Name, FunctionPassManager &PM,
|
||||
ArrayRef<PassBuilder::PipelineElement>) {
|
||||
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
|
||||
PARAMS) \
|
||||
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
|
||||
return false;
|
||||
});
|
||||
|
||||
#undef ADD_PASS
|
||||
#undef ADD_PASS_WITH_PARAMS
|
||||
|
||||
PB.registerAnalysisRegistrationCallback([](ModuleAnalysisManager &AM) {
|
||||
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
|
||||
AM.registerPass([&] { return CREATE_PASS; });
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef MODULE_ANALYSIS
|
||||
});
|
||||
|
||||
PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &AM) {
|
||||
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||
AM.registerPass([&] { return CREATE_PASS; });
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef FUNCTION_ANALYSIS
|
||||
});
|
||||
|
||||
PB.registerParseAACallback([](StringRef Name, AAManager &AM) {
|
||||
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
|
||||
if (Name == NAME) { \
|
||||
AM.registerFunctionAnalysis< \
|
||||
std::remove_reference_t<decltype(CREATE_PASS)>>(); \
|
||||
return true; \
|
||||
}
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef FUNCTION_ALIAS_ANALYSIS
|
||||
return false;
|
||||
});
|
||||
|
||||
PB.registerAnalysisRegistrationCallback([](LoopAnalysisManager &AM) {
|
||||
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
|
||||
AM.registerPass([&] { return CREATE_PASS; });
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef LOOP_ANALYSIS
|
||||
});
|
||||
|
||||
PB.registerAnalysisRegistrationCallback([](MachineFunctionAnalysisManager &AM) {
|
||||
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||
AM.registerPass([&] { return CREATE_PASS; });
|
||||
#include GET_PASS_REGISTRY
|
||||
#undef MACHINE_FUNCTION_ANALYSIS
|
||||
});
|
||||
|
||||
#undef GET_PASS_REGISTRY
|
||||
#endif // GET_PASS_REGISTRY
|
||||
@@ -179,6 +179,7 @@ def FmArcp : MIFlagEnum<"FmArcp">;
|
||||
def FmContract : MIFlagEnum<"FmContract">;
|
||||
def FmAfn : MIFlagEnum<"FmAfn">;
|
||||
def FmReassoc : MIFlagEnum<"FmReassoc">;
|
||||
def IsExact : MIFlagEnum<"IsExact">;
|
||||
|
||||
def MIFlags;
|
||||
// def not; -> Already defined as a SDNode
|
||||
@@ -1036,7 +1037,20 @@ def sdiv_by_const : GICombineRule<
|
||||
[{ return Helper.matchSDivByConst(*${root}); }]),
|
||||
(apply [{ Helper.applySDivByConst(*${root}); }])>;
|
||||
|
||||
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const]>;
|
||||
def sdiv_by_pow2 : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (G_SDIV $dst, $x, $y, (MIFlags (not IsExact))):$root,
|
||||
[{ return Helper.matchDivByPow2(*${root}, /*IsSigned=*/true); }]),
|
||||
(apply [{ Helper.applySDivByPow2(*${root}); }])>;
|
||||
|
||||
def udiv_by_pow2 : GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (G_UDIV $dst, $x, $y, (MIFlags (not IsExact))):$root,
|
||||
[{ return Helper.matchDivByPow2(*${root}, /*IsSigned=*/false); }]),
|
||||
(apply [{ Helper.applyUDivByPow2(*${root}); }])>;
|
||||
|
||||
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const,
|
||||
sdiv_by_pow2, udiv_by_pow2]>;
|
||||
|
||||
def reassoc_ptradd : GICombineRule<
|
||||
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
||||
|
||||
@@ -5270,6 +5270,144 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
|
||||
return MIB.buildMul(Ty, Res, Factor);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) {
|
||||
assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
|
||||
MI.getOpcode() == TargetOpcode::G_UDIV) &&
|
||||
"Expected SDIV or UDIV");
|
||||
auto &Div = cast<GenericMachineInstr>(MI);
|
||||
Register RHS = Div.getReg(2);
|
||||
auto MatchPow2 = [&](const Constant *C) {
|
||||
auto *CI = dyn_cast<ConstantInt>(C);
|
||||
return CI && (CI->getValue().isPowerOf2() ||
|
||||
(IsSigned && CI->getValue().isNegatedPowerOf2()));
|
||||
};
|
||||
return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
|
||||
}
|
||||
|
||||
void CombinerHelper::applySDivByPow2(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
|
||||
auto &SDiv = cast<GenericMachineInstr>(MI);
|
||||
Register Dst = SDiv.getReg(0);
|
||||
Register LHS = SDiv.getReg(1);
|
||||
Register RHS = SDiv.getReg(2);
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
|
||||
// Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
|
||||
// to the following version:
|
||||
//
|
||||
// %c1 = G_CTTZ %rhs
|
||||
// %inexact = G_SUB $bitwidth, %c1
|
||||
// %sign = %G_ASHR %lhs, $(bitwidth - 1)
|
||||
// %srl = G_SHR %sign, %inexact
|
||||
// %add = G_ADD %lhs, %srl
|
||||
// %sra = G_ASHR %add, %c1
|
||||
// %sra = G_SELECT, %isoneorallones, %lhs, %sra
|
||||
// %zero = G_CONSTANT $0
|
||||
// %neg = G_NEG %sra
|
||||
// %isneg = G_ICMP SLT %lhs, %zero
|
||||
// %res = G_SELECT %isneg, %neg, %sra
|
||||
//
|
||||
// When %rhs is a constant integer, or a splat vector, we can check its value
|
||||
// at compile time such that the first two G_ICMP conditional statements, as
|
||||
// well as the corresponding non-taken branches, can be eliminated. This can
|
||||
// generate compact code even w/o any constant folding afterwards. When $rhs
|
||||
// is not a splat vector, we have to generate those checks via instructions.
|
||||
|
||||
unsigned Bitwidth = Ty.getScalarSizeInBits();
|
||||
auto Zero = Builder.buildConstant(Ty, 0);
|
||||
|
||||
// TODO: It is not necessary to have this specialized version. We need it *for
|
||||
// now* because the folding/combine can't handle it. Remove this large
|
||||
// conditional statement once we can properly fold the two G_ICMP.
|
||||
if (auto RHSC = getConstantOrConstantSplatVector(RHS)) {
|
||||
// Special case: (sdiv X, 1) -> X
|
||||
if (RHSC->isOne()) {
|
||||
replaceSingleDefInstWithReg(MI, LHS);
|
||||
return;
|
||||
}
|
||||
// Special Case: (sdiv X, -1) -> 0-X
|
||||
if (RHSC->isAllOnes()) {
|
||||
auto Neg = Builder.buildNeg(Ty, LHS);
|
||||
replaceSingleDefInstWithReg(MI, Neg->getOperand(0).getReg());
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned TrailingZeros = RHSC->countTrailingZeros();
|
||||
auto C1 = Builder.buildConstant(ShiftAmtTy, TrailingZeros);
|
||||
auto Inexact = Builder.buildConstant(ShiftAmtTy, Bitwidth - TrailingZeros);
|
||||
auto Sign = Builder.buildAShr(
|
||||
Ty, LHS, Builder.buildConstant(ShiftAmtTy, Bitwidth - 1));
|
||||
// Add (LHS < 0) ? abs2 - 1 : 0;
|
||||
auto Lshr = Builder.buildLShr(Ty, Sign, Inexact);
|
||||
auto Add = Builder.buildAdd(Ty, LHS, Lshr);
|
||||
auto Shr = Builder.buildAShr(Ty, Add, C1);
|
||||
|
||||
// If dividing by a positive value, we're done. Otherwise, the result must
|
||||
// be negated.
|
||||
auto Res = RHSC->isNegative() ? Builder.buildNeg(Ty, Shr) : Shr;
|
||||
replaceSingleDefInstWithReg(MI, Res->getOperand(0).getReg());
|
||||
return;
|
||||
}
|
||||
|
||||
// RHS is not a splat vector. Build the above version with instructions.
|
||||
auto Bits = Builder.buildConstant(ShiftAmtTy, Bitwidth);
|
||||
auto C1 = Builder.buildCTTZ(Ty, RHS);
|
||||
C1 = Builder.buildZExtOrTrunc(ShiftAmtTy, C1);
|
||||
auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
|
||||
auto Sign = Builder.buildAShr(
|
||||
Ty, LHS, Builder.buildConstant(ShiftAmtTy, Bitwidth - 1));
|
||||
|
||||
// Add (LHS < 0) ? abs2 - 1 : 0;
|
||||
auto Shl = Builder.buildShl(Ty, Sign, Inexact);
|
||||
auto Add = Builder.buildAdd(Ty, LHS, Shl);
|
||||
auto Shr = Builder.buildAShr(Ty, Add, C1);
|
||||
|
||||
LLT CCVT = LLT::vector(Ty.getElementCount(), 1);
|
||||
|
||||
auto One = Builder.buildConstant(Ty, 1);
|
||||
auto AllOnes =
|
||||
Builder.buildConstant(Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
|
||||
auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
|
||||
auto IsAllOnes =
|
||||
Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, AllOnes);
|
||||
auto IsOneOrAllOnes = Builder.buildOr(CCVT, IsOne, IsAllOnes);
|
||||
Shr = Builder.buildSelect(Ty, IsOneOrAllOnes, LHS, Shr);
|
||||
|
||||
// If dividing by a positive value, we're done. Otherwise, the result must
|
||||
// be negated.
|
||||
auto Neg = Builder.buildNeg(Ty, Shr);
|
||||
auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, LHS, Zero);
|
||||
Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, Shr);
|
||||
}
|
||||
|
||||
void CombinerHelper::applyUDivByPow2(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
|
||||
auto &UDiv = cast<GenericMachineInstr>(MI);
|
||||
Register Dst = UDiv.getReg(0);
|
||||
Register LHS = UDiv.getReg(1);
|
||||
Register RHS = UDiv.getReg(2);
|
||||
LLT Ty = MRI.getType(Dst);
|
||||
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
|
||||
// TODO: It is not necessary to have this specialized version. We need it *for
|
||||
// now* because the folding/combine can't handle CTTZ.
|
||||
if (auto RHSC = getConstantOrConstantSplatVector(RHS)) {
|
||||
auto C1 = Builder.buildConstant(ShiftAmtTy, RHSC->countTrailingZeros());
|
||||
auto Res = Builder.buildLShr(Ty, LHS, C1);
|
||||
replaceSingleDefInstWithReg(MI, Res->getOperand(0).getReg());
|
||||
return;
|
||||
}
|
||||
|
||||
auto C1 = Builder.buildCTTZ(Ty, RHS);
|
||||
C1 = Builder.buildZExtOrTrunc(ShiftAmtTy, C1);
|
||||
Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
|
||||
Register RHS = MI.getOperand(2).getReg();
|
||||
@@ -6945,10 +7083,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
||||
LLT DstTy = MRI.getType(Dst);
|
||||
LLT CarryTy = MRI.getType(Carry);
|
||||
|
||||
// We want do fold the [u|s]addo.
|
||||
if (!MRI.hasOneNonDBGUse(Dst))
|
||||
return false;
|
||||
|
||||
// Fold addo, if the carry is dead -> add, undef.
|
||||
if (MRI.use_nodbg_empty(Carry) &&
|
||||
isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
|
||||
@@ -6959,10 +7093,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// We want do fold the [u|s]addo.
|
||||
if (!MRI.hasOneNonDBGUse(Carry))
|
||||
return false;
|
||||
|
||||
// Canonicalize constant to RHS.
|
||||
if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
|
||||
if (IsSigned) {
|
||||
@@ -6994,7 +7124,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Fold (addo x, 0) -> x, no borrow
|
||||
// Fold (addo x, 0) -> x, no carry
|
||||
if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
|
||||
MatchInfo = [=](MachineIRBuilder &B) {
|
||||
B.buildCopy(Dst, LHS);
|
||||
|
||||
@@ -8215,9 +8215,22 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
|
||||
// %res = G_SMAX %a, %v2
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
LLT Ty = MRI.getType(SrcReg);
|
||||
auto Zero = MIRBuilder.buildConstant(Ty, 0);
|
||||
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
|
||||
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
|
||||
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
|
||||
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
|
||||
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
|
||||
auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
|
||||
MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
20
llvm/lib/Target/AArch64/AArch64PassRegistry.def
Normal file
20
llvm/lib/Target/AArch64/AArch64PassRegistry.def
Normal file
@@ -0,0 +1,20 @@
|
||||
//===- AArch64PassRegistry.def - Registry of AArch64 passes -----*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is used as the registry of passes that are part of the
|
||||
// AArch64 backend.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// NOTE: NO INCLUDE GUARD DESIRED!
|
||||
|
||||
#ifndef LOOP_PASS
|
||||
#define LOOP_PASS(NAME, CREATE_PASS)
|
||||
#endif
|
||||
LOOP_PASS("aarch64-lit", AArch64LoopIdiomTransformPass())
|
||||
#undef LOOP_PASS
|
||||
@@ -547,6 +547,10 @@ public:
|
||||
|
||||
void AArch64TargetMachine::registerPassBuilderCallbacks(
|
||||
PassBuilder &PB, bool PopulateClassToPassNames) {
|
||||
|
||||
#define GET_PASS_REGISTRY "AArch64PassRegistry.def"
|
||||
#include "llvm/Passes/TargetPassRegistry.inc"
|
||||
|
||||
PB.registerLateLoopOptimizationsEPCallback(
|
||||
[=](LoopPassManager &LPM, OptimizationLevel Level) {
|
||||
LPM.addPass(AArch64LoopIdiomTransformPass());
|
||||
|
||||
@@ -1012,6 +1012,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
||||
ABSActions
|
||||
.legalFor({s32, s64});
|
||||
ABSActions.legalFor(PackedVectorAllTypeList)
|
||||
.customIf([=](const LegalityQuery &Q) {
|
||||
// TODO: Fix suboptimal codegen for 128+ bit types.
|
||||
LLT SrcTy = Q.Types[0];
|
||||
return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
|
||||
})
|
||||
.widenScalarIf(
|
||||
[=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
|
||||
[=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
|
||||
@@ -1264,6 +1269,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
|
||||
return legalizeDynStackAlloc(MI, Helper);
|
||||
case TargetOpcode::G_PREFETCH:
|
||||
return legalizePrefetch(MI, Helper);
|
||||
case TargetOpcode::G_ABS:
|
||||
return Helper.lowerAbsToCNeg(MI);
|
||||
}
|
||||
|
||||
llvm_unreachable("expected switch to return");
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s --implicit-check-not="Unknown instructions"
|
||||
|
||||
; CHECK: Alias sets for function 'test1':
|
||||
; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
|
||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(1))
|
||||
; CHECK-NOT: 1 Unknown instruction
|
||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %b, LocationSize::precise(1))
|
||||
define void @test1(i32 %c) {
|
||||
entry:
|
||||
@@ -64,7 +63,6 @@ entry:
|
||||
; CHECK: Alias sets for function 'test5':
|
||||
; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
|
||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(1))
|
||||
; CHECK-NOT: 1 Unknown instruction
|
||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %b, LocationSize::precise(1))
|
||||
define void @test5() {
|
||||
entry:
|
||||
|
||||
@@ -92,3 +92,87 @@ body: |
|
||||
$w1 = COPY %o_wide
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: add_multiuse
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1
|
||||
; CHECK-LABEL: name: add_multiuse
|
||||
; CHECK: liveins: $w0, $w1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: $w1 = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: $w2 = COPY %const(s32)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:_(s32) = COPY $w0
|
||||
%const:_(s32) = G_CONSTANT i32 0
|
||||
%add:_(s32), %o:_(s1) = G_SADDO %0, %const
|
||||
%o_wide:_(s32) = G_ZEXT %o(s1)
|
||||
$w0 = COPY %add(s32)
|
||||
$w1 = COPY %add(s32)
|
||||
$w2 = COPY %o_wide
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: add_vector
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1
|
||||
; CHECK-LABEL: name: add_vector
|
||||
; CHECK: liveins: $w0, $w1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
|
||||
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK-NEXT: %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
|
||||
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||
; CHECK-NEXT: $q0 = COPY %add(<4 x s32>)
|
||||
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = COPY $w1
|
||||
%2:_(s32) = COPY $w2
|
||||
%3:_(s32) = COPY $w3
|
||||
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
|
||||
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %3:_(s32), %2:_(s32), %3:_(s32)
|
||||
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
|
||||
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||
$q0 = COPY %add(<4 x s32>)
|
||||
$q1 = COPY %o_wide
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: add_splat_vector
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1
|
||||
; CHECK-LABEL: name: add_splat_vector
|
||||
; CHECK: liveins: $w0, $w1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
|
||||
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||
; CHECK-NEXT: %o:_(<4 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1), [[C]](s1), [[C]](s1)
|
||||
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
|
||||
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = COPY $w1
|
||||
%2:_(s32) = COPY $w2
|
||||
%3:_(s32) = COPY $w3
|
||||
%const:_(s32) = G_CONSTANT i32 0
|
||||
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
|
||||
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %const:_(s32), %const:_(s32), %const:_(s32), %const:_(s32)
|
||||
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_SADDO %bv0, %bv1
|
||||
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||
$q0 = COPY %add(<4 x s32>)
|
||||
$q1 = COPY %o_wide
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
|
||||
@@ -8,11 +8,12 @@ body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: abs_s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
|
||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
|
||||
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
|
||||
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY]]
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
|
||||
; CHECK-NEXT: $w0 = COPY [[SELECT]](s32)
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_s32
|
||||
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
|
||||
@@ -28,11 +29,12 @@ body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: abs_s64
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
|
||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[ASHR]]
|
||||
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ADD]], [[ASHR]]
|
||||
; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
|
||||
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
|
||||
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_s64
|
||||
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
|
||||
@@ -55,6 +57,7 @@ body: |
|
||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
|
||||
; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_v4s16
|
||||
; CHECK-CSSC: liveins: $d0
|
||||
; CHECK-CSSC-NEXT: {{ $}}
|
||||
@@ -82,6 +85,7 @@ body: |
|
||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
|
||||
; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_v8s16
|
||||
; CHECK-CSSC: liveins: $q0
|
||||
; CHECK-CSSC-NEXT: {{ $}}
|
||||
@@ -109,6 +113,7 @@ body: |
|
||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
|
||||
; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_v2s32
|
||||
; CHECK-CSSC: liveins: $d0
|
||||
; CHECK-CSSC-NEXT: {{ $}}
|
||||
@@ -136,6 +141,7 @@ body: |
|
||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
|
||||
; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_v4s32
|
||||
; CHECK-CSSC: liveins: $q0
|
||||
; CHECK-CSSC-NEXT: {{ $}}
|
||||
@@ -163,6 +169,7 @@ body: |
|
||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
|
||||
; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_v4s8
|
||||
; CHECK-CSSC: liveins: $d0
|
||||
; CHECK-CSSC-NEXT: {{ $}}
|
||||
@@ -190,6 +197,7 @@ body: |
|
||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
|
||||
; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||
;
|
||||
; CHECK-CSSC-LABEL: name: abs_v16s8
|
||||
; CHECK-CSSC: liveins: $q0
|
||||
; CHECK-CSSC-NEXT: {{ $}}
|
||||
|
||||
@@ -15,9 +15,8 @@ define i8 @abs_i8(i8 %a){
|
||||
; CHECK-GI-LABEL: abs_i8:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: sxtb w8, w0
|
||||
; CHECK-GI-NEXT: asr w8, w8, #7
|
||||
; CHECK-GI-NEXT: add w9, w0, w8
|
||||
; CHECK-GI-NEXT: eor w0, w9, w8
|
||||
; CHECK-GI-NEXT: cmp w8, #0
|
||||
; CHECK-GI-NEXT: cneg w0, w0, le
|
||||
; CHECK-GI-NEXT: ret
|
||||
entry:
|
||||
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
|
||||
@@ -36,9 +35,8 @@ define i16 @abs_i16(i16 %a){
|
||||
; CHECK-GI-LABEL: abs_i16:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: sxth w8, w0
|
||||
; CHECK-GI-NEXT: asr w8, w8, #15
|
||||
; CHECK-GI-NEXT: add w9, w0, w8
|
||||
; CHECK-GI-NEXT: eor w0, w9, w8
|
||||
; CHECK-GI-NEXT: cmp w8, #0
|
||||
; CHECK-GI-NEXT: cneg w0, w0, le
|
||||
; CHECK-GI-NEXT: ret
|
||||
entry:
|
||||
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
|
||||
@@ -55,9 +53,8 @@ define i32 @abs_i32(i32 %a){
|
||||
;
|
||||
; CHECK-GI-LABEL: abs_i32:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: asr w8, w0, #31
|
||||
; CHECK-GI-NEXT: add w9, w0, w8
|
||||
; CHECK-GI-NEXT: eor w0, w9, w8
|
||||
; CHECK-GI-NEXT: cmp w0, #0
|
||||
; CHECK-GI-NEXT: cneg w0, w0, le
|
||||
; CHECK-GI-NEXT: ret
|
||||
entry:
|
||||
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
|
||||
@@ -74,9 +71,8 @@ define i64 @abs_i64(i64 %a){
|
||||
;
|
||||
; CHECK-GI-LABEL: abs_i64:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: asr x8, x0, #63
|
||||
; CHECK-GI-NEXT: add x9, x0, x8
|
||||
; CHECK-GI-NEXT: eor x0, x9, x8
|
||||
; CHECK-GI-NEXT: cmp x0, #0
|
||||
; CHECK-GI-NEXT: cneg x0, x0, le
|
||||
; CHECK-GI-NEXT: ret
|
||||
entry:
|
||||
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
|
||||
@@ -248,9 +244,9 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
|
||||
; CHECK-GI-LABEL: abs_v1i32:
|
||||
; CHECK-GI: // %bb.0: // %entry
|
||||
; CHECK-GI-NEXT: fmov w8, s0
|
||||
; CHECK-GI-NEXT: asr w9, w8, #31
|
||||
; CHECK-GI-NEXT: add w8, w8, w9
|
||||
; CHECK-GI-NEXT: eor w8, w8, w9
|
||||
; CHECK-GI-NEXT: fmov w9, s0
|
||||
; CHECK-GI-NEXT: cmp w8, #0
|
||||
; CHECK-GI-NEXT: cneg w8, w9, le
|
||||
; CHECK-GI-NEXT: fmov s0, w8
|
||||
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-GI-NEXT: ret
|
||||
|
||||
@@ -2643,8 +2643,7 @@ define i8 @pr60530() {
|
||||
;
|
||||
; GISEL-LABEL: pr60530:
|
||||
; GISEL: // %bb.0:
|
||||
; GISEL-NEXT: mov w8, #1 // =0x1
|
||||
; GISEL-NEXT: sbfx w0, w8, #0, #1
|
||||
; GISEL-NEXT: mov w0, #255 // =0xff
|
||||
; GISEL-NEXT: ret
|
||||
%1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
|
||||
%2 = extractvalue { i8, i1 } %1, 1
|
||||
|
||||
@@ -64,21 +64,10 @@ entry:
|
||||
}
|
||||
|
||||
define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
|
||||
; SDAG-LABEL: saddo.select.i64:
|
||||
; SDAG: // %bb.0: // %entry
|
||||
; SDAG-NEXT: mov w0, w1
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: saddo.select.i64:
|
||||
; GISEL: // %bb.0: // %entry
|
||||
; GISEL-NEXT: mov w8, #13 // =0xd
|
||||
; GISEL-NEXT: and x9, x3, #0xc
|
||||
; GISEL-NEXT: and x8, x4, x8
|
||||
; GISEL-NEXT: cmn x9, x8
|
||||
; GISEL-NEXT: cset w8, vs
|
||||
; GISEL-NEXT: tst w8, #0x1
|
||||
; GISEL-NEXT: csel w0, w0, w1, ne
|
||||
; GISEL-NEXT: ret
|
||||
; CHECK-LABEL: saddo.select.i64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov w0, w1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%lhs = and i64 %v4, 12
|
||||
%rhs = and i64 %v5, 13
|
||||
@@ -89,22 +78,10 @@ entry:
|
||||
}
|
||||
|
||||
define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
|
||||
; SDAG-LABEL: uaddo.select.i64:
|
||||
; SDAG: // %bb.0: // %entry
|
||||
; SDAG-NEXT: mov w0, w1
|
||||
; SDAG-NEXT: ret
|
||||
;
|
||||
; GISEL-LABEL: uaddo.select.i64:
|
||||
; GISEL: // %bb.0: // %entry
|
||||
; GISEL-NEXT: mov w8, #9 // =0x9
|
||||
; GISEL-NEXT: mov w9, #10 // =0xa
|
||||
; GISEL-NEXT: and x8, x3, x8
|
||||
; GISEL-NEXT: and x9, x4, x9
|
||||
; GISEL-NEXT: cmn x8, x9
|
||||
; GISEL-NEXT: cset w8, hs
|
||||
; GISEL-NEXT: tst w8, #0x1
|
||||
; GISEL-NEXT: csel w0, w0, w1, ne
|
||||
; GISEL-NEXT: ret
|
||||
; CHECK-LABEL: uaddo.select.i64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov w0, w1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%lhs = and i64 %v4, 9
|
||||
%rhs = and i64 %v5, 10
|
||||
|
||||
@@ -670,36 +670,19 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrsp
|
||||
define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||
; GFX6-LABEL: simplify_demanded_bfe_sdiv:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0
|
||||
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
|
||||
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
|
||||
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
|
||||
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_load_dword s0, s[6:7], 0x0
|
||||
; GFX6-NEXT: s_mov_b32 s6, -1
|
||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
||||
; GFX6-NEXT: v_mul_lo_u32 v1, v0, -2
|
||||
; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
|
||||
; GFX6-NEXT: s_mov_b32 s2, -1
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100001
|
||||
; GFX6-NEXT: s_ashr_i32 s2, s0, 31
|
||||
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
|
||||
; GFX6-NEXT: s_add_i32 s0, s0, s2
|
||||
; GFX6-NEXT: s_xor_b32 s0, s0, s2
|
||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0
|
||||
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0
|
||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
||||
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
|
||||
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
||||
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
|
||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
||||
; GFX6-NEXT: v_xor_b32_e32 v0, s2, v0
|
||||
; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100001
|
||||
; GFX6-NEXT: s_ashr_i32 s4, s3, 31
|
||||
; GFX6-NEXT: s_lshr_b32 s4, s4, 31
|
||||
; GFX6-NEXT: s_add_i32 s3, s3, s4
|
||||
; GFX6-NEXT: s_ashr_i32 s3, s3, 1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, s3
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; GFX6-NEXT: s_endpgm
|
||||
%src = load i32, ptr addrspace(1) %in, align 4
|
||||
%bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
|
||||
|
||||
@@ -279,125 +279,27 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
|
||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0x1000
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
|
||||
; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v2
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
|
||||
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
|
||||
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x1000, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
|
||||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv i32 %num, 4096
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
|
||||
; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
|
||||
; GISEL-NEXT: v_mov_b32_e32 v3, 0x1000
|
||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, 0xfffff000
|
||||
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
|
||||
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
|
||||
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
|
||||
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
|
||||
; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
|
||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
|
||||
; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v5
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5
|
||||
; GISEL-NEXT: v_lshlrev_b32_e32 v9, 12, v4
|
||||
; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
|
||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5]
|
||||
; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v3
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v3
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
|
||||
; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
|
||||
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v4
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
|
||||
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
|
||||
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
|
||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
|
||||
; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
|
||||
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
|
||||
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
|
||||
; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
|
||||
; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
|
||||
; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
|
||||
; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v4
|
||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
|
||||
; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v3
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
|
||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
|
||||
; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
|
||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[6:7]
|
||||
; CGP-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
|
||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
|
||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
|
||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc
|
||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
|
||||
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
|
||||
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v3, 20, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 12, v1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
|
||||
ret <2 x i32> %result
|
||||
}
|
||||
@@ -884,3 +786,13 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
|
||||
%result = sdiv <2 x i32> %num.mask, %den.mask
|
||||
ret <2 x i32> %result
|
||||
}
|
||||
|
||||
define i32 @v_sdiv_i32_exact(i32 %num) {
|
||||
; CHECK-LABEL: v_sdiv_i32_exact:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv exact i32 %num, 4096
|
||||
ret i32 %result
|
||||
}
|
||||
|
||||
@@ -999,126 +999,11 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
|
||||
; CHECK-LABEL: v_sdiv_i64_pow2k_denom:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000
|
||||
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v6, 0xfffff000
|
||||
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
|
||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
|
||||
; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
|
||||
; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
|
||||
; CHECK-NEXT: v_trunc_f32_e32 v4, v3
|
||||
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
|
||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v4
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
|
||||
; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
|
||||
; CHECK-NEXT: v_mul_lo_u32 v4, v7, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
|
||||
; CHECK-NEXT: v_mul_lo_u32 v9, v5, v3
|
||||
; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3
|
||||
; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
|
||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
|
||||
; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
|
||||
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
|
||||
; CHECK-NEXT: v_xor_b32_e32 v4, v0, v6
|
||||
; CHECK-NEXT: v_mul_lo_u32 v0, v7, v2
|
||||
; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3
|
||||
; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6
|
||||
; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
|
||||
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc
|
||||
; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0
|
||||
; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
|
||||
; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
|
||||
; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v5, 0x1000
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3
|
||||
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v0, v2
|
||||
; CHECK-NEXT: v_mul_hi_u32 v8, v9, v1
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v2
|
||||
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2]
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
|
||||
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
|
||||
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
|
||||
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
|
||||
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v4, s[4:5]
|
||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v7
|
||||
; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc
|
||||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
|
||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4
|
||||
; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
|
||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
||||
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6
|
||||
; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6
|
||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
|
||||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
|
||||
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1
|
||||
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2
|
||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv i64 %num, 4096
|
||||
ret i64 %result
|
||||
@@ -1128,473 +1013,31 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
|
||||
; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom:
|
||||
; GISEL: ; %bb.0:
|
||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
|
||||
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
|
||||
; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
|
||||
; GISEL-NEXT: s_subb_u32 s7, 0, 0
|
||||
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
|
||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
||||
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
|
||||
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
|
||||
; GISEL-NEXT: v_trunc_f32_e32 v7, v5
|
||||
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
|
||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
|
||||
; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
|
||||
; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
|
||||
; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
|
||||
; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
|
||||
; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
|
||||
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5
|
||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
|
||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
|
||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
|
||||
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
|
||||
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
|
||||
; GISEL-NEXT: v_mov_b32_e32 v4, v9
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
|
||||
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v4, 20, v4
|
||||
; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
|
||||
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
|
||||
; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
|
||||
; GISEL-NEXT: v_mul_lo_u32 v0, v5, v8
|
||||
; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9
|
||||
; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
|
||||
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
|
||||
; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
|
||||
; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
|
||||
; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
|
||||
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
|
||||
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
|
||||
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
|
||||
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
|
||||
; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
|
||||
; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
|
||||
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
|
||||
; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
|
||||
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
|
||||
; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
|
||||
; GISEL-NEXT: s_subb_u32 s7, 0, 0
|
||||
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
|
||||
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
|
||||
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
|
||||
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
|
||||
; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5]
|
||||
; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
|
||||
; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
|
||||
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
|
||||
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc
|
||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
|
||||
; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8
|
||||
; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
|
||||
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
|
||||
; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
|
||||
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
|
||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0
|
||||
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
|
||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
|
||||
; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
|
||||
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
|
||||
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
|
||||
; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
|
||||
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
|
||||
; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
|
||||
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
|
||||
; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
|
||||
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
|
||||
; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
|
||||
; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
|
||||
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
|
||||
; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
|
||||
; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
|
||||
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
|
||||
; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
|
||||
; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
|
||||
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
|
||||
; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
|
||||
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0
|
||||
; GISEL-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
|
||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
|
||||
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
|
||||
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
|
||||
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
|
||||
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
|
||||
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
|
||||
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
|
||||
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
|
||||
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
|
||||
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5]
|
||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
|
||||
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
|
||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
|
||||
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
||||
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
|
||||
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
|
||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
|
||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
|
||||
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
|
||||
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
|
||||
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
|
||||
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
|
||||
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 20, v5
|
||||
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
|
||||
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; GISEL-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||
; GISEL-NEXT: v_ashr_i64 v[2:3], v[2:3], 12
|
||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
|
||||
; CGP: ; %bb.0:
|
||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
|
||||
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
|
||||
; CGP-NEXT: v_mov_b32_e32 v6, 0xfffff000
|
||||
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
|
||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
||||
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
|
||||
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
|
||||
; CGP-NEXT: v_trunc_f32_e32 v7, v5
|
||||
; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7
|
||||
; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
|
||||
; CGP-NEXT: v_mov_b32_e32 v7, v5
|
||||
; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
|
||||
; CGP-NEXT: v_mul_hi_u32 v12, v9, v4
|
||||
; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
|
||||
; CGP-NEXT: v_mul_lo_u32 v10, v9, v4
|
||||
; CGP-NEXT: v_mul_hi_u32 v11, v8, v4
|
||||
; CGP-NEXT: v_mul_lo_u32 v4, v8, v13
|
||||
; CGP-NEXT: v_mul_lo_u32 v7, v9, v13
|
||||
; CGP-NEXT: v_mul_hi_u32 v14, v8, v13
|
||||
; CGP-NEXT: v_mul_hi_u32 v13, v9, v13
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
|
||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7
|
||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7
|
||||
; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4
|
||||
; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
|
||||
; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, v14
|
||||
; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7
|
||||
; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
|
||||
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
|
||||
; CGP-NEXT: v_xor_b32_e32 v15, v0, v7
|
||||
; CGP-NEXT: v_mul_lo_u32 v0, v17, v13
|
||||
; CGP-NEXT: v_mul_lo_u32 v4, v16, v14
|
||||
; CGP-NEXT: v_xor_b32_e32 v18, v1, v7
|
||||
; CGP-NEXT: v_mul_hi_u32 v1, v16, v13
|
||||
; CGP-NEXT: v_mul_hi_u32 v13, v17, v13
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v4, 20, v4
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v1, v17, v14
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
|
||||
; CGP-NEXT: v_mul_hi_u32 v4, v16, v14
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
|
||||
; CGP-NEXT: v_mul_hi_u32 v13, v17, v14
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
|
||||
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v13, v18, v0
|
||||
; CGP-NEXT: v_mul_lo_u32 v14, v15, v1
|
||||
; CGP-NEXT: v_mul_hi_u32 v16, v15, v0
|
||||
; CGP-NEXT: v_mul_hi_u32 v0, v18, v0
|
||||
; CGP-NEXT: v_mov_b32_e32 v4, 0x1000
|
||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v16, v18, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
|
||||
; CGP-NEXT: v_mul_hi_u32 v14, v15, v1
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
|
||||
; CGP-NEXT: v_add_i32_e32 v16, vcc, v0, v13
|
||||
; CGP-NEXT: v_mul_hi_u32 v17, v18, v1
|
||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
|
||||
; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v13
|
||||
; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2]
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v15, v0
|
||||
; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v13, vcc
|
||||
; CGP-NEXT: v_sub_i32_e64 v13, s[4:5], v18, v13
|
||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
|
||||
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
|
||||
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
|
||||
; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v16
|
||||
; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v17, vcc
|
||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
|
||||
; CGP-NEXT: v_mov_b32_e32 v0, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5]
|
||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc
|
||||
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
|
||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v19, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v15
|
||||
; CGP-NEXT: v_mul_lo_u32 v19, v8, v0
|
||||
; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc
|
||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v1, vcc
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v11, v9, v0
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1
|
||||
; CGP-NEXT: v_mul_hi_u32 v10, v8, v0
|
||||
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
|
||||
; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
|
||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
|
||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1
|
||||
; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc
|
||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
|
||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
|
||||
; CGP-NEXT: v_xor_b32_e32 v11, v5, v7
|
||||
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v10, v17, v13, vcc
|
||||
; CGP-NEXT: v_xor_b32_e32 v1, v10, v7
|
||||
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
|
||||
; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
|
||||
; CGP-NEXT: v_xor_b32_e32 v12, v2, v10
|
||||
; CGP-NEXT: v_mul_lo_u32 v2, v9, v0
|
||||
; CGP-NEXT: v_mul_lo_u32 v6, v8, v5
|
||||
; CGP-NEXT: v_xor_b32_e32 v13, v3, v10
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v8, v0
|
||||
; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v3, v9, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
|
||||
; CGP-NEXT: v_mul_hi_u32 v6, v8, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
|
||||
; CGP-NEXT: v_mul_hi_u32 v5, v9, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0
|
||||
; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v5, v13, v3
|
||||
; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
|
||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7
|
||||
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc
|
||||
; CGP-NEXT: v_mul_hi_u32 v7, v12, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
|
||||
; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
|
||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
|
||||
; CGP-NEXT: v_mul_hi_u32 v6, v12, v2
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
|
||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v3, v5
|
||||
; CGP-NEXT: v_mul_hi_u32 v8, v13, v2
|
||||
; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
|
||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v5
|
||||
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
|
||||
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
|
||||
; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
|
||||
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
|
||||
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
|
||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
|
||||
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
|
||||
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
|
||||
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
|
||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v7
|
||||
; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc
|
||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
||||
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
|
||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6
|
||||
; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc
|
||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc
|
||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
|
||||
; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
|
||||
; CGP-NEXT: v_xor_b32_e32 v2, v2, v10
|
||||
; CGP-NEXT: v_xor_b32_e32 v3, v3, v10
|
||||
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
|
||||
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
|
||||
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||
; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||
; CGP-NEXT: v_lshrrev_b32_e32 v4, 20, v4
|
||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
|
||||
; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
||||
; CGP-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||
; CGP-NEXT: v_ashr_i64 v[2:3], v[2:3], 12
|
||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv <2 x i64> %num, <i64 4096, i64 4096>
|
||||
ret <2 x i64> %result
|
||||
@@ -3398,3 +2841,13 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
|
||||
%result = sdiv <2 x i64> %num.mask, %den.mask
|
||||
ret <2 x i64> %result
|
||||
}
|
||||
|
||||
define i64 @v_sdiv_i64_exact(i64 %num) {
|
||||
; CHECK-LABEL: v_sdiv_i64_exact:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%result = sdiv exact i64 %num, 4096
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -238,7 +238,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
|
||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||
; GISEL-NEXT: s_cbranch_execz .LBB0_4
|
||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
|
||||
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
|
||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||
@@ -612,7 +612,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
|
||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||
; GISEL-NEXT: s_cbranch_execz .LBB1_4
|
||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
|
||||
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
|
||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||
@@ -978,7 +978,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
|
||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||
; GISEL-NEXT: s_cbranch_execz .LBB2_4
|
||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
|
||||
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
|
||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||
@@ -1338,7 +1338,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
|
||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||
; GISEL-NEXT: s_cbranch_execz .LBB3_4
|
||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
|
||||
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
|
||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||
|
||||
929
llvm/test/CodeGen/X86/apx/domain-reassignment.mir
Normal file
929
llvm/test/CodeGen/X86/apx/domain-reassignment.mir
Normal file
@@ -0,0 +1,929 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass x86-domain-reassignment -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq -o - %s | FileCheck %s
|
||||
--- |
|
||||
; ModuleID = '../test/CodeGen/X86/gpr-to-mask.ll'
|
||||
source_filename = "../test/CodeGen/X86/gpr-to-mask.ll"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
define void @test_fcmp_storefloat(i1 %cond, ptr %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) #0 {
|
||||
entry:
|
||||
br i1 %cond, label %if, label %else
|
||||
|
||||
if: ; preds = %entry
|
||||
%cmp1 = fcmp oeq float %f3, %f4
|
||||
br label %exit
|
||||
|
||||
else: ; preds = %entry
|
||||
%cmp2 = fcmp oeq float %f5, %f6
|
||||
br label %exit
|
||||
|
||||
exit: ; preds = %else, %if
|
||||
%val = phi i1 [ %cmp1, %if ], [ %cmp2, %else ]
|
||||
%selected = select i1 %val, float %f1, float %f2
|
||||
store float %selected, ptr %fptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_8bitops() #0 {
|
||||
ret void
|
||||
}
|
||||
define void @test_16bitops() #0 {
|
||||
ret void
|
||||
}
|
||||
define void @test_32bitops() #0 {
|
||||
ret void
|
||||
}
|
||||
define void @test_64bitops() #0 {
|
||||
ret void
|
||||
}
|
||||
define void @test_16bitext() #0 {
|
||||
ret void
|
||||
}
|
||||
define void @test_32bitext() #0 {
|
||||
ret void
|
||||
}
|
||||
define void @test_64bitext() #0 {
|
||||
ret void
|
||||
}
|
||||
; Note that this function need to be compiled with -global-isel
|
||||
; to obtain testable MIR
|
||||
define void @test_unused(i64 %0) #0 {
|
||||
%unused = lshr i64 %0, 7
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: test_fcmp_storefloat
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr8, preferred-register: '' }
|
||||
- { id: 1, class: gr8, preferred-register: '' }
|
||||
- { id: 2, class: gr8, preferred-register: '' }
|
||||
- { id: 3, class: gr32, preferred-register: '' }
|
||||
- { id: 4, class: gr64, preferred-register: '' }
|
||||
- { id: 5, class: vr128x, preferred-register: '' }
|
||||
- { id: 6, class: fr32x, preferred-register: '' }
|
||||
- { id: 7, class: fr32x, preferred-register: '' }
|
||||
- { id: 8, class: fr32x, preferred-register: '' }
|
||||
- { id: 9, class: fr32x, preferred-register: '' }
|
||||
- { id: 10, class: fr32x, preferred-register: '' }
|
||||
- { id: 11, class: gr8, preferred-register: '' }
|
||||
- { id: 12, class: vk1, preferred-register: '' }
|
||||
- { id: 13, class: gr32, preferred-register: '' }
|
||||
- { id: 14, class: vk1, preferred-register: '' }
|
||||
- { id: 15, class: gr32, preferred-register: '' }
|
||||
- { id: 16, class: gr32, preferred-register: '' }
|
||||
- { id: 17, class: gr32, preferred-register: '' }
|
||||
- { id: 18, class: vk1wm, preferred-register: '' }
|
||||
- { id: 19, class: vr128x, preferred-register: '' }
|
||||
- { id: 20, class: vr128, preferred-register: '' }
|
||||
- { id: 21, class: vr128, preferred-register: '' }
|
||||
- { id: 22, class: fr32x, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$edi', virtual-reg: '%3' }
|
||||
- { reg: '$rsi', virtual-reg: '%4' }
|
||||
- { reg: '$xmm0', virtual-reg: '%5' }
|
||||
- { reg: '$xmm1', virtual-reg: '%6' }
|
||||
- { reg: '$xmm2', virtual-reg: '%7' }
|
||||
- { reg: '$xmm3', virtual-reg: '%8' }
|
||||
- { reg: '$xmm4', virtual-reg: '%9' }
|
||||
- { reg: '$xmm5', virtual-reg: '%10' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_fcmp_storefloat
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: liveins: $edi, $rsi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5
|
||||
; CHECK: [[COPY:%[0-9]+]]:fr32x = COPY $xmm5
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fr32x = COPY $xmm4
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fr32x = COPY $xmm3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:fr32x = COPY $xmm2
|
||||
; CHECK: [[COPY4:%[0-9]+]]:fr32x = COPY $xmm1
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vr128x = COPY $xmm0
|
||||
; CHECK: [[COPY6:%[0-9]+]]:gr64 = COPY $rsi
|
||||
; CHECK: [[COPY7:%[0-9]+]]:gr32 = COPY $edi
|
||||
; CHECK: [[COPY8:%[0-9]+]]:gr8 = COPY [[COPY7]].sub_8bit
|
||||
; CHECK: TEST8ri killed [[COPY8]], 1, implicit-def $eflags
|
||||
; CHECK: JCC_1 %bb.2, 4, implicit $eflags
|
||||
; CHECK: JMP_1 %bb.1
|
||||
; CHECK: bb.1.if:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[VCMPSSZrri:%[0-9]+]]:vk1 = VCMPSSZrri [[COPY3]], [[COPY2]], 0
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vk32 = COPY [[VCMPSSZrri]]
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vk8 = COPY [[COPY9]]
|
||||
; CHECK: JMP_1 %bb.3
|
||||
; CHECK: bb.2.else:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[VCMPSSZrri1:%[0-9]+]]:vk1 = VCMPSSZrri [[COPY1]], [[COPY]], 0
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vk32 = COPY [[VCMPSSZrri1]]
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
|
||||
; CHECK: bb.3.exit:
|
||||
; CHECK: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY10]], %bb.1
|
||||
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vk32 = COPY [[PHI]]
|
||||
; CHECK: [[COPY14:%[0-9]+]]:vk1wm = COPY [[COPY13]]
|
||||
; CHECK: [[COPY15:%[0-9]+]]:vr128x = COPY [[COPY4]]
|
||||
; CHECK: [[DEF1:%[0-9]+]]:vr128 = IMPLICIT_DEF
|
||||
; CHECK: [[VMOVSSZrrk:%[0-9]+]]:vr128 = VMOVSSZrrk [[COPY15]], killed [[COPY14]], killed [[DEF1]], [[COPY5]]
|
||||
; CHECK: [[COPY16:%[0-9]+]]:fr32x = COPY [[VMOVSSZrrk]]
|
||||
; CHECK: VMOVSSZmr [[COPY6]], 1, $noreg, 0, $noreg, killed [[COPY16]] :: (store (s32) into %ir.fptr)
|
||||
; CHECK: RET 0
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
liveins: $edi, $rsi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5
|
||||
|
||||
%10 = COPY $xmm5
|
||||
%9 = COPY $xmm4
|
||||
%8 = COPY $xmm3
|
||||
%7 = COPY $xmm2
|
||||
%6 = COPY $xmm1
|
||||
%5 = COPY $xmm0
|
||||
%4 = COPY $rsi
|
||||
%3 = COPY $edi
|
||||
%11 = COPY %3.sub_8bit
|
||||
TEST8ri killed %11, 1, implicit-def $eflags
|
||||
JCC_1 %bb.2, 4, implicit $eflags
|
||||
JMP_1 %bb.1
|
||||
|
||||
bb.1.if:
|
||||
successors: %bb.3(0x80000000)
|
||||
|
||||
%14 = VCMPSSZrri %7, %8, 0, implicit $mxcsr
|
||||
|
||||
; check that cross domain copies are replaced with same domain copies.
|
||||
|
||||
%15 = COPY %14
|
||||
%0 = COPY %15.sub_8bit
|
||||
JMP_1 %bb.3
|
||||
|
||||
bb.2.else:
|
||||
successors: %bb.3(0x80000000)
|
||||
%12 = VCMPSSZrri %9, %10, 0, implicit $mxcsr
|
||||
|
||||
; check that cross domain copies are replaced with same domain copies.
|
||||
|
||||
%13 = COPY %12
|
||||
%1 = COPY %13.sub_8bit
|
||||
|
||||
bb.3.exit:
|
||||
|
||||
; check PHI, IMPLICIT_DEF, and INSERT_SUBREG replacers.
|
||||
|
||||
%2 = PHI %1, %bb.2, %0, %bb.1
|
||||
%17 = IMPLICIT_DEF
|
||||
%16 = INSERT_SUBREG %17, %2, %subreg.sub_8bit_hi
|
||||
%18 = COPY %16
|
||||
%19 = COPY %6
|
||||
%21 = IMPLICIT_DEF
|
||||
%20 = VMOVSSZrrk %19, killed %18, killed %21, %5
|
||||
%22 = COPY %20
|
||||
VMOVSSZmr %4, 1, $noreg, 0, $noreg, killed %22 :: (store (s32) into %ir.fptr)
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_8bitops
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vr512, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: vk8, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr8, preferred-register: '' }
|
||||
- { id: 8, class: gr32, preferred-register: '' }
|
||||
- { id: 9, class: gr32, preferred-register: '' }
|
||||
- { id: 10, class: vk8wm, preferred-register: '' }
|
||||
- { id: 11, class: vr512, preferred-register: '' }
|
||||
- { id: 12, class: gr8, preferred-register: '' }
|
||||
- { id: 13, class: gr8, preferred-register: '' }
|
||||
- { id: 14, class: gr8, preferred-register: '' }
|
||||
- { id: 15, class: gr8, preferred-register: '' }
|
||||
- { id: 16, class: gr8, preferred-register: '' }
|
||||
- { id: 17, class: gr8, preferred-register: '' }
|
||||
- { id: 18, class: gr8, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
- { reg: '$zmm2', virtual-reg: '%3' }
|
||||
- { reg: '$zmm3', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_8bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vr512 = COPY $zmm2
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vr512 = COPY $zmm3
|
||||
; CHECK: [[VCMPPDZrri:%[0-9]+]]:vk8 = VCMPPDZrri [[COPY3]], [[COPY4]], 0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPDZrri]]
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vk8 = COPY [[COPY5]]
|
||||
; CHECK: [[KSHIFTRBri:%[0-9]+]]:vk8 = KSHIFTRBri [[COPY6]], 2
|
||||
; CHECK: [[KSHIFTLBri:%[0-9]+]]:vk8 = KSHIFTLBri [[KSHIFTRBri]], 1
|
||||
; CHECK: [[KNOTBrr:%[0-9]+]]:vk8 = KNOTBrr [[KSHIFTLBri]]
|
||||
; CHECK: [[KORBrr:%[0-9]+]]:vk8 = KORBrr [[KNOTBrr]], [[KSHIFTRBri]]
|
||||
; CHECK: [[KANDBrr:%[0-9]+]]:vk8 = KANDBrr [[KORBrr]], [[KSHIFTLBri]]
|
||||
; CHECK: [[KXORBrr:%[0-9]+]]:vk8 = KXORBrr [[KANDBrr]], [[KSHIFTRBri]]
|
||||
; CHECK: [[KADDBrr:%[0-9]+]]:vk8 = KADDBrr [[KXORBrr]], [[KNOTBrr]]
|
||||
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBrr]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
|
||||
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||
; CHECK: VMOVAPDZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPDZrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
%3 = COPY $zmm2
|
||||
%4 = COPY $zmm3
|
||||
|
||||
%5 = VCMPPDZrri %3, %4, 0, implicit $mxcsr
|
||||
%6 = COPY %5
|
||||
%7 = COPY %6.sub_8bit
|
||||
|
||||
%12 = SHR8ri %7, 2, implicit-def dead $eflags
|
||||
%13 = SHL8ri %12, 1, implicit-def dead $eflags
|
||||
%14 = NOT8r %13
|
||||
%15 = OR8rr %14, %12, implicit-def dead $eflags
|
||||
%16 = AND8rr %15, %13, implicit-def dead $eflags
|
||||
%17 = XOR8rr %16, %12, implicit-def dead $eflags
|
||||
%18 = ADD8rr %17, %14, implicit-def dead $eflags
|
||||
|
||||
%8 = IMPLICIT_DEF
|
||||
%9 = INSERT_SUBREG %8, %18, %subreg.sub_8bit_hi
|
||||
%10 = COPY %9
|
||||
%11 = VMOVAPDZrrk %2, killed %10, %1
|
||||
VMOVAPDZmr %0, 1, $noreg, 0, $noreg, killed %11
|
||||
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; TEST8rr %18, %18, implicit-def $eflags
|
||||
; JCC_1 %bb.1, 4, implicit $eflags
|
||||
; JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
bb.2:
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_16bitops
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vr512, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: vk16, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr16, preferred-register: '' }
|
||||
- { id: 8, class: gr32, preferred-register: '' }
|
||||
- { id: 9, class: gr32, preferred-register: '' }
|
||||
- { id: 10, class: vk16wm, preferred-register: '' }
|
||||
- { id: 11, class: vr512, preferred-register: '' }
|
||||
- { id: 12, class: gr16, preferred-register: '' }
|
||||
- { id: 13, class: gr16, preferred-register: '' }
|
||||
- { id: 14, class: gr16, preferred-register: '' }
|
||||
- { id: 15, class: gr16, preferred-register: '' }
|
||||
- { id: 16, class: gr16, preferred-register: '' }
|
||||
- { id: 17, class: gr16, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
- { reg: '$zmm2', virtual-reg: '%3' }
|
||||
- { reg: '$zmm3', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_16bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vr512 = COPY $zmm2
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vr512 = COPY $zmm3
|
||||
; CHECK: [[VCMPPSZrri:%[0-9]+]]:vk16 = VCMPPSZrri [[COPY3]], [[COPY4]], 0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPSZrri]]
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vk16 = COPY [[COPY5]]
|
||||
; CHECK: [[KSHIFTRWri:%[0-9]+]]:vk16 = KSHIFTRWri [[COPY6]], 2
|
||||
; CHECK: [[KSHIFTLWri:%[0-9]+]]:vk16 = KSHIFTLWri [[KSHIFTRWri]], 1
|
||||
; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[KSHIFTLWri]]
|
||||
; CHECK: [[KORWrr:%[0-9]+]]:vk16 = KORWrr [[KNOTWrr]], [[KSHIFTRWri]]
|
||||
; CHECK: [[KANDWrr:%[0-9]+]]:vk16 = KANDWrr [[KORWrr]], [[KSHIFTLWri]]
|
||||
; CHECK: [[KXORWrr:%[0-9]+]]:vk16 = KXORWrr [[KANDWrr]], [[KSHIFTRWri]]
|
||||
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWrr]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
|
||||
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
%3 = COPY $zmm2
|
||||
%4 = COPY $zmm3
|
||||
|
||||
%5 = VCMPPSZrri %3, %4, 0, implicit $mxcsr
|
||||
%6 = COPY %5
|
||||
%7 = COPY %6.sub_16bit
|
||||
|
||||
%12 = SHR16ri %7, 2, implicit-def dead $eflags
|
||||
%13 = SHL16ri %12, 1, implicit-def dead $eflags
|
||||
%14 = NOT16r %13
|
||||
%15 = OR16rr %14, %12, implicit-def dead $eflags
|
||||
%16 = AND16rr %15, %13, implicit-def dead $eflags
|
||||
%17 = XOR16rr %16, %12, implicit-def dead $eflags
|
||||
|
||||
%8 = IMPLICIT_DEF
|
||||
%9 = INSERT_SUBREG %8, %17, %subreg.sub_16bit
|
||||
%10 = COPY %9
|
||||
%11 = VMOVAPSZrrk %2, killed %10, %1
|
||||
VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %11
|
||||
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; FIXME TEST16rr %17, %17, implicit-def $eflags
|
||||
; FIXME JCC_1 %bb.1, 4, implicit $eflags
|
||||
; FIXME JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
bb.2:
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_32bitops
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vk32wm, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: gr32, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr32, preferred-register: '' }
|
||||
- { id: 8, class: gr32, preferred-register: '' }
|
||||
- { id: 9, class: gr32, preferred-register: '' }
|
||||
- { id: 10, class: gr32, preferred-register: '' }
|
||||
- { id: 11, class: gr32, preferred-register: '' }
|
||||
- { id: 12, class: gr32, preferred-register: '' }
|
||||
- { id: 13, class: gr32, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_32bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[KMOVDkm:%[0-9]+]]:vk32 = KMOVDkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[KSHIFTRDri:%[0-9]+]]:vk32 = KSHIFTRDri [[KMOVDkm]], 2
|
||||
; CHECK: [[KSHIFTLDri:%[0-9]+]]:vk32 = KSHIFTLDri [[KSHIFTRDri]], 1
|
||||
; CHECK: [[KNOTDrr:%[0-9]+]]:vk32 = KNOTDrr [[KSHIFTLDri]]
|
||||
; CHECK: [[KORDrr:%[0-9]+]]:vk32 = KORDrr [[KNOTDrr]], [[KSHIFTRDri]]
|
||||
; CHECK: [[KANDDrr:%[0-9]+]]:vk32 = KANDDrr [[KORDrr]], [[KSHIFTLDri]]
|
||||
; CHECK: [[KXORDrr:%[0-9]+]]:vk32 = KXORDrr [[KANDDrr]], [[KSHIFTRDri]]
|
||||
; CHECK: [[KANDNDrr:%[0-9]+]]:vk32 = KANDNDrr [[KXORDrr]], [[KORDrr]]
|
||||
; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[KANDNDrr]], [[KXORDrr]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
|
||||
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1
|
||||
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
|
||||
%5 = MOV32rm %0, 1, $noreg, 0, $noreg
|
||||
%6 = SHR32ri %5, 2, implicit-def dead $eflags
|
||||
%7 = SHL32ri %6, 1, implicit-def dead $eflags
|
||||
%8 = NOT32r %7
|
||||
%9 = OR32rr %8, %6, implicit-def dead $eflags
|
||||
%10 = AND32rr %9, %7, implicit-def dead $eflags
|
||||
%11 = XOR32rr %10, %6, implicit-def dead $eflags
|
||||
%12 = ANDN32rr %11, %9, implicit-def dead $eflags
|
||||
%13 = ADD32rr %12, %11, implicit-def dead $eflags
|
||||
|
||||
%3 = COPY %13
|
||||
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; FIXME TEST32rr %13, %13, implicit-def $eflags
|
||||
; FIXME JCC_1 %bb.1, 4, implicit $eflags
|
||||
; FIXME JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
bb.2:
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_64bitops
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vk64wm, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: gr64, preferred-register: '' }
|
||||
- { id: 6, class: gr64, preferred-register: '' }
|
||||
- { id: 7, class: gr64, preferred-register: '' }
|
||||
- { id: 8, class: gr64, preferred-register: '' }
|
||||
- { id: 9, class: gr64, preferred-register: '' }
|
||||
- { id: 10, class: gr64, preferred-register: '' }
|
||||
- { id: 11, class: gr64, preferred-register: '' }
|
||||
- { id: 12, class: gr64, preferred-register: '' }
|
||||
- { id: 13, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_64bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[KMOVQkm:%[0-9]+]]:vk64 = KMOVQkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[KSHIFTRQri:%[0-9]+]]:vk64 = KSHIFTRQri [[KMOVQkm]], 2
|
||||
; CHECK: [[KSHIFTLQri:%[0-9]+]]:vk64 = KSHIFTLQri [[KSHIFTRQri]], 1
|
||||
; CHECK: [[KNOTQrr:%[0-9]+]]:vk64 = KNOTQrr [[KSHIFTLQri]]
|
||||
; CHECK: [[KORQrr:%[0-9]+]]:vk64 = KORQrr [[KNOTQrr]], [[KSHIFTRQri]]
|
||||
; CHECK: [[KANDQrr:%[0-9]+]]:vk64 = KANDQrr [[KORQrr]], [[KSHIFTLQri]]
|
||||
; CHECK: [[KXORQrr:%[0-9]+]]:vk64 = KXORQrr [[KANDQrr]], [[KSHIFTRQri]]
|
||||
; CHECK: [[KANDNQrr:%[0-9]+]]:vk64 = KANDNQrr [[KXORQrr]], [[KORQrr]]
|
||||
; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[KANDNQrr]], [[KXORQrr]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1
|
||||
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
|
||||
%5 = MOV64rm %0, 1, $noreg, 0, $noreg
|
||||
%6 = SHR64ri %5, 2, implicit-def dead $eflags
|
||||
%7 = SHL64ri %6, 1, implicit-def dead $eflags
|
||||
%8 = NOT64r %7
|
||||
%9 = OR64rr %8, %6, implicit-def dead $eflags
|
||||
%10 = AND64rr %9, %7, implicit-def dead $eflags
|
||||
%11 = XOR64rr %10, %6, implicit-def dead $eflags
|
||||
%12 = ANDN64rr %11, %9, implicit-def dead $eflags
|
||||
%13 = ADD64rr %12, %11, implicit-def dead $eflags
|
||||
|
||||
%3 = COPY %13
|
||||
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; FIXME TEST64rr %13, %13, implicit-def $eflags
|
||||
; FIXME JCC_1 %bb.1, 4, implicit $eflags
|
||||
; FIXME JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
bb.2:
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_16bitext
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vk16wm, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: gr16, preferred-register: '' }
|
||||
- { id: 6, class: gr16, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1
|
||||
|
||||
; CHECK-LABEL: name: test_16bitext
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk16 = COPY [[KMOVBkm]]
|
||||
; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[COPY3]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWrr]]
|
||||
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY4]], [[COPY1]]
|
||||
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
|
||||
; CHECK: RET 0
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
|
||||
%5 = MOVZX16rm8 %0, 1, $noreg, 0, $noreg
|
||||
%6 = NOT16r %5
|
||||
|
||||
%3 = COPY %6
|
||||
%4 = VMOVAPSZrrk %2, killed %3, %1
|
||||
VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_32bitext
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vk64wm, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: gr32, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr32, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1
|
||||
|
||||
; CHECK-LABEL: name: test_32bitext
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk32 = COPY [[KMOVBkm]]
|
||||
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vk32 = COPY [[KMOVWkm]]
|
||||
; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[COPY3]], [[COPY4]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDrr]]
|
||||
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
|
||||
; CHECK: RET 0
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
|
||||
%5 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg
|
||||
%6 = MOVZX32rm16 %0, 1, $noreg, 0, $noreg
|
||||
%7 = ADD32rr %5, %6, implicit-def dead $eflags
|
||||
|
||||
%3 = COPY %7
|
||||
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_64bitext
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
- { id: 3, class: vk64wm, preferred-register: '' }
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: gr64, preferred-register: '' }
|
||||
- { id: 6, class: gr64, preferred-register: '' }
|
||||
- { id: 7, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $rdi, $zmm0, $zmm1
|
||||
|
||||
; CHECK-LABEL: name: test_64bitext
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk64 = COPY [[KMOVBkm]]
|
||||
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vk64 = COPY [[KMOVWkm]]
|
||||
; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[COPY3]], [[COPY4]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
|
||||
; CHECK: RET 0
|
||||
%0 = COPY $rdi
|
||||
%1 = COPY $zmm0
|
||||
%2 = COPY $zmm1
|
||||
|
||||
%5 = MOVZX64rm8 %0, 1, $noreg, 0, $noreg
|
||||
%6 = MOVZX64rm16 %0, 1, $noreg, 0, $noreg
|
||||
%7 = ADD64rr %5, %6, implicit-def dead $eflags
|
||||
|
||||
%3 = COPY %7
|
||||
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
RET 0
|
||||
|
||||
...
|
||||
---
|
||||
name: test_unused
|
||||
alignment: 16
|
||||
exposesReturnsTwice: false
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
selected: true
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
callsEHReturn: false
|
||||
callsUnwindInit: false
|
||||
hasEHCatchret: false
|
||||
hasEHScopes: false
|
||||
hasEHFunclets: false
|
||||
isOutlined: false
|
||||
debugInstrRef: false
|
||||
failsVerification: false
|
||||
tracksDebugUserValues: false
|
||||
registers:
|
||||
# Note that this test is supposed to have registers without classes
|
||||
- { id: 0, class: _, preferred-register: '' }
|
||||
- { id: 1, class: _, preferred-register: '' }
|
||||
- { id: 2, class: _, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 1
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
functionContext: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
hasTailCall: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
entry_values: []
|
||||
callSites: []
|
||||
debugValueSubstitutions: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.1 (%ir-block.1):
|
||||
liveins: $rdi
|
||||
|
||||
RET 0
|
||||
|
||||
...
|
||||
@@ -2,6 +2,9 @@
|
||||
; RUN: opt -aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
|
||||
; RUN: opt -aarch64-lit -simplifycfg -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
|
||||
; RUN: opt -aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
|
||||
; RUN: opt -p aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
|
||||
; RUN: opt -passes='function(loop(aarch64-lit)),simplifycfg' -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
|
||||
; RUN: opt -p aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
|
||||
|
||||
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
|
||||
; CHECK-LABEL: define i32 @compare_bytes_simple(
|
||||
@@ -780,7 +783,7 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
||||
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
|
||||
; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
|
||||
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
||||
; CHECK: while.body:
|
||||
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
|
||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
|
||||
@@ -788,11 +791,14 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
|
||||
; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
|
||||
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
|
||||
; CHECK: byte.compare:
|
||||
; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT]]
|
||||
; CHECK: while.end.loopexit:
|
||||
; CHECK-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
|
||||
; CHECK-NEXT: br label [[WHILE_END]]
|
||||
; CHECK: while.end:
|
||||
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
|
||||
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
|
||||
; CHECK-NEXT: ret i32 [[INC_LCSSA]]
|
||||
;
|
||||
; LOOP-DEL-LABEL: define i32 @compare_bytes_extra_cmp(
|
||||
@@ -884,7 +890,7 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
||||
; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
|
||||
; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
|
||||
; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
|
||||
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
|
||||
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
||||
; NO-TRANSFORM: while.body:
|
||||
; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
|
||||
; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
|
||||
@@ -892,9 +898,12 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
||||
; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
|
||||
; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
|
||||
; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
|
||||
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
|
||||
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
|
||||
; NO-TRANSFORM: while.end.loopexit:
|
||||
; NO-TRANSFORM-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ], [ [[INC]], [[WHILE_BODY]] ]
|
||||
; NO-TRANSFORM-NEXT: br label [[WHILE_END]]
|
||||
; NO-TRANSFORM: while.end:
|
||||
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ]
|
||||
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
|
||||
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
|
||||
;
|
||||
entry:
|
||||
@@ -908,7 +917,7 @@ while.cond:
|
||||
%len.addr = phi i32 [ %len, %ph ], [ %inc, %while.body ]
|
||||
%inc = add i32 %len.addr, 1
|
||||
%cmp.not = icmp eq i32 %inc, %n
|
||||
br i1 %cmp.not, label %while.end, label %while.body
|
||||
br i1 %cmp.not, label %while.end.loopexit, label %while.body
|
||||
|
||||
while.body:
|
||||
%idxprom = zext i32 %inc to i64
|
||||
@@ -917,10 +926,14 @@ while.body:
|
||||
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
|
||||
%1 = load i8, ptr %arrayidx2
|
||||
%cmp.not2 = icmp eq i8 %0, %1
|
||||
br i1 %cmp.not2, label %while.cond, label %while.end
|
||||
br i1 %cmp.not2, label %while.cond, label %while.end.loopexit
|
||||
|
||||
while.end.loopexit:
|
||||
%inc.lcssa1 = phi i32 [ %inc, %while.cond ], [ %inc, %while.body ]
|
||||
br label %while.end
|
||||
|
||||
while.end:
|
||||
%inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ], [ %x, %entry ]
|
||||
%inc.lcssa = phi i32 [ %x, %entry ], [ %inc.lcssa1, %while.end.loopexit ]
|
||||
ret i32 %inc.lcssa
|
||||
}
|
||||
|
||||
|
||||
@@ -11,5 +11,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
|
||||
MLIRXeGPUEnumsIncGen
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRDialectUtils
|
||||
MLIRIR
|
||||
MLIRViewLikeInterface
|
||||
)
|
||||
|
||||
@@ -285,8 +285,7 @@ module {
|
||||
%has_runtime = sparse_tensor.has_runtime_library
|
||||
scf.if %has_runtime {
|
||||
// sparse_tensor.assemble copies buffers when running with the runtime
|
||||
// library. Deallocations are needed not needed when running in codgen
|
||||
// mode.
|
||||
// library. Deallocations are not needed when running in codegen mode.
|
||||
bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO>
|
||||
bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32>
|
||||
bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR>
|
||||
|
||||
@@ -146,8 +146,7 @@ module {
|
||||
%has_runtime = sparse_tensor.has_runtime_library
|
||||
scf.if %has_runtime {
|
||||
// sparse_tensor.assemble copies buffers when running with the runtime
|
||||
// library. Deallocations are needed not needed when running in codgen
|
||||
// mode.
|
||||
// library. Deallocations are not needed when running in codegen mode.
|
||||
bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
|
||||
bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
|
||||
bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
|
||||
|
||||
Reference in New Issue
Block a user