Compare commits
17 Commits
mangling
...
globalisel
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0943946b0b | ||
|
|
7bb87d5338 | ||
|
|
a5d7fc1d10 | ||
|
|
29bf32efbb | ||
|
|
deefe3fbc9 | ||
|
|
07a5e31cb3 | ||
|
|
5c95484061 | ||
|
|
0e3fbfd1e1 | ||
|
|
0d08282310 | ||
|
|
44a81af510 | ||
|
|
35a66f965c | ||
|
|
631248dcd2 | ||
|
|
71defe40b7 | ||
|
|
3cd988914e | ||
|
|
893717446b | ||
|
|
aa7e4ba3ca | ||
|
|
af90e1975c |
@@ -44,6 +44,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
|
|||||||
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
|
<< " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
|
||||||
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
|
LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n",
|
||||||
getBBHash(HotFuncAddress, BBInputOffset)));
|
getBBHash(HotFuncAddress, BBInputOffset)));
|
||||||
|
(void)HotFuncAddress;
|
||||||
// In case of conflicts (same Key mapping to different Vals), the last
|
// In case of conflicts (same Key mapping to different Vals), the last
|
||||||
// update takes precedence. Of course it is not ideal to have conflicts and
|
// update takes precedence. Of course it is not ideal to have conflicts and
|
||||||
// those happen when we have an empty BB that either contained only
|
// those happen when we have an empty BB that either contained only
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt
|
# RUN: llvm-dwarfdump --show-form --verbose --debug-addr %t.bolt > %t.txt
|
||||||
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt
|
# RUN: llvm-dwarfdump --show-form --verbose --debug-info %t.bolt >> %t.txt
|
||||||
|
# RUN: cat %t.txt | FileCheck --check-prefix=POSTCHECK %s
|
||||||
|
|
||||||
# This test checks that we correctly handle DW_AT_low_pc [DW_FORM_addrx] that is part of DW_TAG_label.
|
# This test checks that we correctly handle DW_AT_low_pc [DW_FORM_addrx] that is part of DW_TAG_label.
|
||||||
|
|
||||||
@@ -35,16 +36,14 @@
|
|||||||
# POSTCHECK-NEXT: DW_AT_name
|
# POSTCHECK-NEXT: DW_AT_name
|
||||||
# POSTCHECK-NEXT: DW_AT_decl_file
|
# POSTCHECK-NEXT: DW_AT_decl_file
|
||||||
# POSTCHECK-NEXT: DW_AT_decl_line
|
# POSTCHECK-NEXT: DW_AT_decl_line
|
||||||
# POSTCHECK-NEXT:
|
|
||||||
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000002)
|
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000002)
|
||||||
# POSTCHECK-SAME: [0x[[#ADDR]]
|
# POSTCHECK-SAME: 0x[[#ADDR]]
|
||||||
# POSTCHECK: DW_TAG_label
|
# POSTCHECK: DW_TAG_label
|
||||||
# POSTCHECK-NEXT: DW_AT_name
|
# POSTCHECK-NEXT: DW_AT_name
|
||||||
# POSTCHECK-NEXT: DW_AT_decl_file
|
# POSTCHECK-NEXT: DW_AT_decl_file
|
||||||
# POSTCHECK-NEXT: DW_AT_decl_line
|
# POSTCHECK-NEXT: DW_AT_decl_line
|
||||||
# POSTCHECK-NEXT:
|
|
||||||
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000003)
|
# POSTCHECK-NEXT:DW_AT_low_pc [DW_FORM_addrx] (indexed (00000003)
|
||||||
# POSTCHECK-SAME: [0x[[#ADDR2]]
|
# POSTCHECK-SAME: 0x[[#ADDR2]]
|
||||||
|
|
||||||
# clang++ main.cpp -g -S
|
# clang++ main.cpp -g -S
|
||||||
# int main() {
|
# int main() {
|
||||||
|
|||||||
@@ -3019,6 +3019,10 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
|||||||
ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
|
ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
|
||||||
/*isNamedArg*/false);
|
/*isNamedArg*/false);
|
||||||
|
|
||||||
|
// Empty records are ignored for parameter passing purposes.
|
||||||
|
if (AI.isIgnore())
|
||||||
|
return CGF.CreateMemTemp(Ty);
|
||||||
|
|
||||||
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
|
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
|
||||||
// in the registers. If not go to step 7.
|
// in the registers. If not go to step 7.
|
||||||
if (!neededInt && !neededSSE)
|
if (!neededInt && !neededSSE)
|
||||||
|
|||||||
23
clang/test/CodeGenCXX/x86_64-vaarg.cpp
Normal file
23
clang/test/CodeGenCXX/x86_64-vaarg.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||||
|
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
|
||||||
|
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -x c -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
typedef struct { struct {} a; } empty;
|
||||||
|
|
||||||
|
// CHECK-LABEL: @{{.*}}empty_record_test
|
||||||
|
// CHECK-NEXT: entry:
|
||||||
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1
|
||||||
|
// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4
|
||||||
|
// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
|
||||||
|
// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1
|
||||||
|
// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
|
||||||
|
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
|
||||||
|
// CHECK-NEXT: call void @llvm.va_start(ptr [[ARRAYDECAY]])
|
||||||
|
// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
|
||||||
|
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 {{.*}}, i1 false)
|
||||||
|
// CHECK-NEXT: ret void
|
||||||
|
empty empty_record_test(int z, ...) {
|
||||||
|
__builtin_va_list list;
|
||||||
|
__builtin_va_start(list, z);
|
||||||
|
return __builtin_va_arg(list, empty);
|
||||||
|
}
|
||||||
@@ -175,7 +175,7 @@ void test_stat() {
|
|||||||
|
|
||||||
s.st_dev = i;
|
s.st_dev = i;
|
||||||
SAVE_ORIGINS(s)
|
SAVE_ORIGINS(s)
|
||||||
ret = stat("/nonexistent", &s);
|
ret = stat("/nonexistent_581cb021aba7", &s);
|
||||||
assert(-1 == ret);
|
assert(-1 == ret);
|
||||||
ASSERT_ZERO_LABEL(ret);
|
ASSERT_ZERO_LABEL(ret);
|
||||||
ASSERT_LABEL(s.st_dev, i_label);
|
ASSERT_LABEL(s.st_dev, i_label);
|
||||||
|
|||||||
@@ -61,7 +61,8 @@ if(LLVM_LIBC_FULL_BUILD OR LLVM_LIBC_GPU_BUILD)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
# We will build the GPU utilities if we are not doing a runtimes build.
|
# We will build the GPU utilities if we are not doing a runtimes build.
|
||||||
if(LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD)
|
option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
|
||||||
|
if(LIBC_BUILD_GPU_LOADER OR (LLVM_LIBC_GPU_BUILD AND NOT LLVM_RUNTIMES_BUILD))
|
||||||
add_subdirectory(utils/gpu)
|
add_subdirectory(utils/gpu)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|||||||
@@ -673,6 +673,14 @@ public:
|
|||||||
bool matchSDivByConst(MachineInstr &MI);
|
bool matchSDivByConst(MachineInstr &MI);
|
||||||
void applySDivByConst(MachineInstr &MI);
|
void applySDivByConst(MachineInstr &MI);
|
||||||
|
|
||||||
|
/// Given an G_SDIV \p MI expressing a signed divided by a pow2 constant,
|
||||||
|
/// return expressions that implements it by shifting.
|
||||||
|
bool matchDivByPow2(MachineInstr &MI, bool IsSigned);
|
||||||
|
void applySDivByPow2(MachineInstr &MI);
|
||||||
|
/// Given an G_UDIV \p MI expressing an unsigned divided by a pow2 constant,
|
||||||
|
/// return expressions that implements it by shifting.
|
||||||
|
void applyUDivByPow2(MachineInstr &MI);
|
||||||
|
|
||||||
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
|
// G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
|
||||||
bool matchUMulHToLShr(MachineInstr &MI);
|
bool matchUMulHToLShr(MachineInstr &MI);
|
||||||
void applyUMulHToLShr(MachineInstr &MI);
|
void applyUMulHToLShr(MachineInstr &MI);
|
||||||
|
|||||||
@@ -429,6 +429,7 @@ public:
|
|||||||
LegalizeResult lowerDIVREM(MachineInstr &MI);
|
LegalizeResult lowerDIVREM(MachineInstr &MI);
|
||||||
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
|
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
|
||||||
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
|
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
|
||||||
|
LegalizeResult lowerAbsToCNeg(MachineInstr &MI);
|
||||||
LegalizeResult lowerVectorReduction(MachineInstr &MI);
|
LegalizeResult lowerVectorReduction(MachineInstr &MI);
|
||||||
LegalizeResult lowerMemcpyInline(MachineInstr &MI);
|
LegalizeResult lowerMemcpyInline(MachineInstr &MI);
|
||||||
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
|
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
|
||||||
|
|||||||
192
llvm/include/llvm/Passes/TargetPassRegistry.inc
Normal file
192
llvm/include/llvm/Passes/TargetPassRegistry.inc
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
//===- TargetPassRegistry.inc - Registry of passes --------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file is used as the registry of passes in registerPassBuilderCallbacks
|
||||||
|
// Just put the following lines in the body of registerPassBuilderCallbacks:
|
||||||
|
// #define GET_PASS_REGISTRY "<Target>PassRegistry.def"
|
||||||
|
// #include "llvm/Passes/TargetPassRegistry.inc"
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// NOTE: NO INCLUDE GUARD DESIRED!
|
||||||
|
|
||||||
|
#ifdef GET_PASS_REGISTRY
|
||||||
|
|
||||||
|
#if !__has_include(GET_PASS_REGISTRY)
|
||||||
|
#error "must provide <Target>PassRegistry.def"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (PopulateClassToPassNames) {
|
||||||
|
auto *PIC = PB.getPassInstrumentationCallbacks();
|
||||||
|
|
||||||
|
#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS) \
|
||||||
|
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
|
||||||
|
#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS) \
|
||||||
|
PIC->addClassToPassName(CLASS, NAME);
|
||||||
|
|
||||||
|
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define MODULE_PASS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
|
||||||
|
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define FUNCTION_PASS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
|
||||||
|
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define LOOP_PASS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
|
||||||
|
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
|
||||||
|
PARAMS) \
|
||||||
|
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef MODULE_ANALYSIS
|
||||||
|
#undef MODULE_PASS
|
||||||
|
#undef MODULE_PASS_WITH_PARAMS
|
||||||
|
#undef FUNCTION_ANALYSIS
|
||||||
|
#undef FUNCTION_ALIAS_ANALYSIS
|
||||||
|
#undef FUNCTION_PASS
|
||||||
|
#undef FUNCTION_PASS_WITH_PARAMS
|
||||||
|
#undef LOOP_ANALYSIS
|
||||||
|
#undef LOOP_PASS
|
||||||
|
#undef MACHINE_FUNCTION_ANALYSIS
|
||||||
|
#undef MACHINE_FUNCTION_PASS
|
||||||
|
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
|
||||||
|
#undef ADD_CLASS_PASS_TO_PASS_NAME
|
||||||
|
#undef ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ADD_PASS(NAME, CREATE_PASS) \
|
||||||
|
if (Name == NAME) { \
|
||||||
|
PM.addPass(CREATE_PASS); \
|
||||||
|
return true; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
|
||||||
|
if (PassBuilder::checkParametrizedPassName(Name, NAME)) { \
|
||||||
|
auto Params = PassBuilder::parsePassParameters(PARSER, Name, NAME); \
|
||||||
|
if (!Params) \
|
||||||
|
return false; \
|
||||||
|
PM.addPass(CREATE_PASS(Params.get())); \
|
||||||
|
return true; \
|
||||||
|
}
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name, ModulePassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define MODULE_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef MODULE_PASS
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name, ModulePassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||||
|
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef MODULE_PASS_WITH_PARAMS
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name, FunctionPassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define FUNCTION_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef FUNCTION_PASS
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name, FunctionPassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
|
||||||
|
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef FUNCTION_PASS_WITH_PARAMS
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name, LoopPassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define LOOP_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name,
|
||||||
|
MachineFunctionPassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) ADD_PASS(NAME, CREATE_PASS)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerPipelineParsingCallback([=](StringRef Name, FunctionPassManager &PM,
|
||||||
|
ArrayRef<PassBuilder::PipelineElement>) {
|
||||||
|
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
|
||||||
|
PARAMS) \
|
||||||
|
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
#undef ADD_PASS
|
||||||
|
#undef ADD_PASS_WITH_PARAMS
|
||||||
|
|
||||||
|
PB.registerAnalysisRegistrationCallback([](ModuleAnalysisManager &AM) {
|
||||||
|
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
AM.registerPass([&] { return CREATE_PASS; });
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef MODULE_ANALYSIS
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &AM) {
|
||||||
|
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
AM.registerPass([&] { return CREATE_PASS; });
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef FUNCTION_ANALYSIS
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerParseAACallback([](StringRef Name, AAManager &AM) {
|
||||||
|
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
if (Name == NAME) { \
|
||||||
|
AM.registerFunctionAnalysis< \
|
||||||
|
std::remove_reference_t<decltype(CREATE_PASS)>>(); \
|
||||||
|
return true; \
|
||||||
|
}
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef FUNCTION_ALIAS_ANALYSIS
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerAnalysisRegistrationCallback([](LoopAnalysisManager &AM) {
|
||||||
|
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
AM.registerPass([&] { return CREATE_PASS; });
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef LOOP_ANALYSIS
|
||||||
|
});
|
||||||
|
|
||||||
|
PB.registerAnalysisRegistrationCallback([](MachineFunctionAnalysisManager &AM) {
|
||||||
|
#define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
|
||||||
|
AM.registerPass([&] { return CREATE_PASS; });
|
||||||
|
#include GET_PASS_REGISTRY
|
||||||
|
#undef MACHINE_FUNCTION_ANALYSIS
|
||||||
|
});
|
||||||
|
|
||||||
|
#undef GET_PASS_REGISTRY
|
||||||
|
#endif // GET_PASS_REGISTRY
|
||||||
@@ -179,6 +179,7 @@ def FmArcp : MIFlagEnum<"FmArcp">;
|
|||||||
def FmContract : MIFlagEnum<"FmContract">;
|
def FmContract : MIFlagEnum<"FmContract">;
|
||||||
def FmAfn : MIFlagEnum<"FmAfn">;
|
def FmAfn : MIFlagEnum<"FmAfn">;
|
||||||
def FmReassoc : MIFlagEnum<"FmReassoc">;
|
def FmReassoc : MIFlagEnum<"FmReassoc">;
|
||||||
|
def IsExact : MIFlagEnum<"IsExact">;
|
||||||
|
|
||||||
def MIFlags;
|
def MIFlags;
|
||||||
// def not; -> Already defined as a SDNode
|
// def not; -> Already defined as a SDNode
|
||||||
@@ -1036,7 +1037,20 @@ def sdiv_by_const : GICombineRule<
|
|||||||
[{ return Helper.matchSDivByConst(*${root}); }]),
|
[{ return Helper.matchSDivByConst(*${root}); }]),
|
||||||
(apply [{ Helper.applySDivByConst(*${root}); }])>;
|
(apply [{ Helper.applySDivByConst(*${root}); }])>;
|
||||||
|
|
||||||
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const]>;
|
def sdiv_by_pow2 : GICombineRule<
|
||||||
|
(defs root:$root),
|
||||||
|
(match (G_SDIV $dst, $x, $y, (MIFlags (not IsExact))):$root,
|
||||||
|
[{ return Helper.matchDivByPow2(*${root}, /*IsSigned=*/true); }]),
|
||||||
|
(apply [{ Helper.applySDivByPow2(*${root}); }])>;
|
||||||
|
|
||||||
|
def udiv_by_pow2 : GICombineRule<
|
||||||
|
(defs root:$root),
|
||||||
|
(match (G_UDIV $dst, $x, $y, (MIFlags (not IsExact))):$root,
|
||||||
|
[{ return Helper.matchDivByPow2(*${root}, /*IsSigned=*/false); }]),
|
||||||
|
(apply [{ Helper.applyUDivByPow2(*${root}); }])>;
|
||||||
|
|
||||||
|
def intdiv_combines : GICombineGroup<[udiv_by_const, sdiv_by_const,
|
||||||
|
sdiv_by_pow2, udiv_by_pow2]>;
|
||||||
|
|
||||||
def reassoc_ptradd : GICombineRule<
|
def reassoc_ptradd : GICombineRule<
|
||||||
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
(defs root:$root, build_fn_matchinfo:$matchinfo),
|
||||||
|
|||||||
@@ -5270,6 +5270,144 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
|
|||||||
return MIB.buildMul(Ty, Res, Factor);
|
return MIB.buildMul(Ty, Res, Factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) {
|
||||||
|
assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
|
||||||
|
MI.getOpcode() == TargetOpcode::G_UDIV) &&
|
||||||
|
"Expected SDIV or UDIV");
|
||||||
|
auto &Div = cast<GenericMachineInstr>(MI);
|
||||||
|
Register RHS = Div.getReg(2);
|
||||||
|
auto MatchPow2 = [&](const Constant *C) {
|
||||||
|
auto *CI = dyn_cast<ConstantInt>(C);
|
||||||
|
return CI && (CI->getValue().isPowerOf2() ||
|
||||||
|
(IsSigned && CI->getValue().isNegatedPowerOf2()));
|
||||||
|
};
|
||||||
|
return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CombinerHelper::applySDivByPow2(MachineInstr &MI) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
|
||||||
|
auto &SDiv = cast<GenericMachineInstr>(MI);
|
||||||
|
Register Dst = SDiv.getReg(0);
|
||||||
|
Register LHS = SDiv.getReg(1);
|
||||||
|
Register RHS = SDiv.getReg(2);
|
||||||
|
LLT Ty = MRI.getType(Dst);
|
||||||
|
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||||
|
|
||||||
|
Builder.setInstrAndDebugLoc(MI);
|
||||||
|
|
||||||
|
// Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
|
||||||
|
// to the following version:
|
||||||
|
//
|
||||||
|
// %c1 = G_CTTZ %rhs
|
||||||
|
// %inexact = G_SUB $bitwidth, %c1
|
||||||
|
// %sign = %G_ASHR %lhs, $(bitwidth - 1)
|
||||||
|
// %srl = G_SHR %sign, %inexact
|
||||||
|
// %add = G_ADD %lhs, %srl
|
||||||
|
// %sra = G_ASHR %add, %c1
|
||||||
|
// %sra = G_SELECT, %isoneorallones, %lhs, %sra
|
||||||
|
// %zero = G_CONSTANT $0
|
||||||
|
// %neg = G_NEG %sra
|
||||||
|
// %isneg = G_ICMP SLT %lhs, %zero
|
||||||
|
// %res = G_SELECT %isneg, %neg, %sra
|
||||||
|
//
|
||||||
|
// When %rhs is a constant integer, or a splat vector, we can check its value
|
||||||
|
// at compile time such that the first two G_ICMP conditional statements, as
|
||||||
|
// well as the corresponding non-taken branches, can be eliminated. This can
|
||||||
|
// generate compact code even w/o any constant folding afterwards. When $rhs
|
||||||
|
// is not a splat vector, we have to generate those checks via instructions.
|
||||||
|
|
||||||
|
unsigned Bitwidth = Ty.getScalarSizeInBits();
|
||||||
|
auto Zero = Builder.buildConstant(Ty, 0);
|
||||||
|
|
||||||
|
// TODO: It is not necessary to have this specialized version. We need it *for
|
||||||
|
// now* because the folding/combine can't handle it. Remove this large
|
||||||
|
// conditional statement once we can properly fold the two G_ICMP.
|
||||||
|
if (auto RHSC = getConstantOrConstantSplatVector(RHS)) {
|
||||||
|
// Special case: (sdiv X, 1) -> X
|
||||||
|
if (RHSC->isOne()) {
|
||||||
|
replaceSingleDefInstWithReg(MI, LHS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Special Case: (sdiv X, -1) -> 0-X
|
||||||
|
if (RHSC->isAllOnes()) {
|
||||||
|
auto Neg = Builder.buildNeg(Ty, LHS);
|
||||||
|
replaceSingleDefInstWithReg(MI, Neg->getOperand(0).getReg());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned TrailingZeros = RHSC->countTrailingZeros();
|
||||||
|
auto C1 = Builder.buildConstant(ShiftAmtTy, TrailingZeros);
|
||||||
|
auto Inexact = Builder.buildConstant(ShiftAmtTy, Bitwidth - TrailingZeros);
|
||||||
|
auto Sign = Builder.buildAShr(
|
||||||
|
Ty, LHS, Builder.buildConstant(ShiftAmtTy, Bitwidth - 1));
|
||||||
|
// Add (LHS < 0) ? abs2 - 1 : 0;
|
||||||
|
auto Lshr = Builder.buildLShr(Ty, Sign, Inexact);
|
||||||
|
auto Add = Builder.buildAdd(Ty, LHS, Lshr);
|
||||||
|
auto Shr = Builder.buildAShr(Ty, Add, C1);
|
||||||
|
|
||||||
|
// If dividing by a positive value, we're done. Otherwise, the result must
|
||||||
|
// be negated.
|
||||||
|
auto Res = RHSC->isNegative() ? Builder.buildNeg(Ty, Shr) : Shr;
|
||||||
|
replaceSingleDefInstWithReg(MI, Res->getOperand(0).getReg());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// RHS is not a splat vector. Build the above version with instructions.
|
||||||
|
auto Bits = Builder.buildConstant(ShiftAmtTy, Bitwidth);
|
||||||
|
auto C1 = Builder.buildCTTZ(Ty, RHS);
|
||||||
|
C1 = Builder.buildZExtOrTrunc(ShiftAmtTy, C1);
|
||||||
|
auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
|
||||||
|
auto Sign = Builder.buildAShr(
|
||||||
|
Ty, LHS, Builder.buildConstant(ShiftAmtTy, Bitwidth - 1));
|
||||||
|
|
||||||
|
// Add (LHS < 0) ? abs2 - 1 : 0;
|
||||||
|
auto Shl = Builder.buildShl(Ty, Sign, Inexact);
|
||||||
|
auto Add = Builder.buildAdd(Ty, LHS, Shl);
|
||||||
|
auto Shr = Builder.buildAShr(Ty, Add, C1);
|
||||||
|
|
||||||
|
LLT CCVT = LLT::vector(Ty.getElementCount(), 1);
|
||||||
|
|
||||||
|
auto One = Builder.buildConstant(Ty, 1);
|
||||||
|
auto AllOnes =
|
||||||
|
Builder.buildConstant(Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
|
||||||
|
auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
|
||||||
|
auto IsAllOnes =
|
||||||
|
Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, AllOnes);
|
||||||
|
auto IsOneOrAllOnes = Builder.buildOr(CCVT, IsOne, IsAllOnes);
|
||||||
|
Shr = Builder.buildSelect(Ty, IsOneOrAllOnes, LHS, Shr);
|
||||||
|
|
||||||
|
// If dividing by a positive value, we're done. Otherwise, the result must
|
||||||
|
// be negated.
|
||||||
|
auto Neg = Builder.buildNeg(Ty, Shr);
|
||||||
|
auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, LHS, Zero);
|
||||||
|
Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, Shr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CombinerHelper::applyUDivByPow2(MachineInstr &MI) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
|
||||||
|
auto &UDiv = cast<GenericMachineInstr>(MI);
|
||||||
|
Register Dst = UDiv.getReg(0);
|
||||||
|
Register LHS = UDiv.getReg(1);
|
||||||
|
Register RHS = UDiv.getReg(2);
|
||||||
|
LLT Ty = MRI.getType(Dst);
|
||||||
|
LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
|
||||||
|
|
||||||
|
Builder.setInstrAndDebugLoc(MI);
|
||||||
|
|
||||||
|
// TODO: It is not necessary to have this specialized version. We need it *for
|
||||||
|
// now* because the folding/combine can't handle CTTZ.
|
||||||
|
if (auto RHSC = getConstantOrConstantSplatVector(RHS)) {
|
||||||
|
auto C1 = Builder.buildConstant(ShiftAmtTy, RHSC->countTrailingZeros());
|
||||||
|
auto Res = Builder.buildLShr(Ty, LHS, C1);
|
||||||
|
replaceSingleDefInstWithReg(MI, Res->getOperand(0).getReg());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto C1 = Builder.buildCTTZ(Ty, RHS);
|
||||||
|
C1 = Builder.buildZExtOrTrunc(ShiftAmtTy, C1);
|
||||||
|
Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
|
||||||
|
}
|
||||||
|
|
||||||
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
|
bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
|
||||||
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
|
assert(MI.getOpcode() == TargetOpcode::G_UMULH);
|
||||||
Register RHS = MI.getOperand(2).getReg();
|
Register RHS = MI.getOperand(2).getReg();
|
||||||
@@ -6945,10 +7083,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
|||||||
LLT DstTy = MRI.getType(Dst);
|
LLT DstTy = MRI.getType(Dst);
|
||||||
LLT CarryTy = MRI.getType(Carry);
|
LLT CarryTy = MRI.getType(Carry);
|
||||||
|
|
||||||
// We want do fold the [u|s]addo.
|
|
||||||
if (!MRI.hasOneNonDBGUse(Dst))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Fold addo, if the carry is dead -> add, undef.
|
// Fold addo, if the carry is dead -> add, undef.
|
||||||
if (MRI.use_nodbg_empty(Carry) &&
|
if (MRI.use_nodbg_empty(Carry) &&
|
||||||
isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
|
isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
|
||||||
@@ -6959,10 +7093,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We want do fold the [u|s]addo.
|
|
||||||
if (!MRI.hasOneNonDBGUse(Carry))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Canonicalize constant to RHS.
|
// Canonicalize constant to RHS.
|
||||||
if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
|
if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
|
||||||
if (IsSigned) {
|
if (IsSigned) {
|
||||||
@@ -6994,7 +7124,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fold (addo x, 0) -> x, no borrow
|
// Fold (addo x, 0) -> x, no carry
|
||||||
if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
|
if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
|
||||||
MatchInfo = [=](MachineIRBuilder &B) {
|
MatchInfo = [=](MachineIRBuilder &B) {
|
||||||
B.buildCopy(Dst, LHS);
|
B.buildCopy(Dst, LHS);
|
||||||
|
|||||||
@@ -8215,9 +8215,22 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
|
|||||||
// %res = G_SMAX %a, %v2
|
// %res = G_SMAX %a, %v2
|
||||||
Register SrcReg = MI.getOperand(1).getReg();
|
Register SrcReg = MI.getOperand(1).getReg();
|
||||||
LLT Ty = MRI.getType(SrcReg);
|
LLT Ty = MRI.getType(SrcReg);
|
||||||
|
auto Zero = MIRBuilder.buildConstant(Ty, 0);
|
||||||
|
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
|
||||||
|
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
|
||||||
|
MI.eraseFromParent();
|
||||||
|
return Legalized;
|
||||||
|
}
|
||||||
|
|
||||||
|
LegalizerHelper::LegalizeResult
|
||||||
|
LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
|
||||||
|
Register SrcReg = MI.getOperand(1).getReg();
|
||||||
|
Register DestReg = MI.getOperand(0).getReg();
|
||||||
|
LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
|
||||||
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
|
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
|
||||||
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
|
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
|
||||||
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
|
auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
|
||||||
|
MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return Legalized;
|
return Legalized;
|
||||||
}
|
}
|
||||||
|
|||||||
20
llvm/lib/Target/AArch64/AArch64PassRegistry.def
Normal file
20
llvm/lib/Target/AArch64/AArch64PassRegistry.def
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
//===- AArch64PassRegistry.def - Registry of AArch64 passes -----*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file is used as the registry of passes that are part of the
|
||||||
|
// AArch64 backend.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// NOTE: NO INCLUDE GUARD DESIRED!
|
||||||
|
|
||||||
|
#ifndef LOOP_PASS
|
||||||
|
#define LOOP_PASS(NAME, CREATE_PASS)
|
||||||
|
#endif
|
||||||
|
LOOP_PASS("aarch64-lit", AArch64LoopIdiomTransformPass())
|
||||||
|
#undef LOOP_PASS
|
||||||
@@ -547,6 +547,10 @@ public:
|
|||||||
|
|
||||||
void AArch64TargetMachine::registerPassBuilderCallbacks(
|
void AArch64TargetMachine::registerPassBuilderCallbacks(
|
||||||
PassBuilder &PB, bool PopulateClassToPassNames) {
|
PassBuilder &PB, bool PopulateClassToPassNames) {
|
||||||
|
|
||||||
|
#define GET_PASS_REGISTRY "AArch64PassRegistry.def"
|
||||||
|
#include "llvm/Passes/TargetPassRegistry.inc"
|
||||||
|
|
||||||
PB.registerLateLoopOptimizationsEPCallback(
|
PB.registerLateLoopOptimizationsEPCallback(
|
||||||
[=](LoopPassManager &LPM, OptimizationLevel Level) {
|
[=](LoopPassManager &LPM, OptimizationLevel Level) {
|
||||||
LPM.addPass(AArch64LoopIdiomTransformPass());
|
LPM.addPass(AArch64LoopIdiomTransformPass());
|
||||||
|
|||||||
@@ -1012,6 +1012,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|||||||
ABSActions
|
ABSActions
|
||||||
.legalFor({s32, s64});
|
.legalFor({s32, s64});
|
||||||
ABSActions.legalFor(PackedVectorAllTypeList)
|
ABSActions.legalFor(PackedVectorAllTypeList)
|
||||||
|
.customIf([=](const LegalityQuery &Q) {
|
||||||
|
// TODO: Fix suboptimal codegen for 128+ bit types.
|
||||||
|
LLT SrcTy = Q.Types[0];
|
||||||
|
return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
|
||||||
|
})
|
||||||
.widenScalarIf(
|
.widenScalarIf(
|
||||||
[=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
|
[=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
|
||||||
[=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
|
[=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
|
||||||
@@ -1264,6 +1269,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
|
|||||||
return legalizeDynStackAlloc(MI, Helper);
|
return legalizeDynStackAlloc(MI, Helper);
|
||||||
case TargetOpcode::G_PREFETCH:
|
case TargetOpcode::G_PREFETCH:
|
||||||
return legalizePrefetch(MI, Helper);
|
return legalizePrefetch(MI, Helper);
|
||||||
|
case TargetOpcode::G_ABS:
|
||||||
|
return Helper.lowerAbsToCNeg(MI);
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm_unreachable("expected switch to return");
|
llvm_unreachable("expected switch to return");
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s
|
; RUN: opt -passes=print-alias-sets -S -o - < %s 2>&1 | FileCheck %s --implicit-check-not="Unknown instructions"
|
||||||
|
|
||||||
; CHECK: Alias sets for function 'test1':
|
; CHECK: Alias sets for function 'test1':
|
||||||
; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
|
; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
|
||||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(1))
|
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(1))
|
||||||
; CHECK-NOT: 1 Unknown instruction
|
|
||||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %b, LocationSize::precise(1))
|
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %b, LocationSize::precise(1))
|
||||||
define void @test1(i32 %c) {
|
define void @test1(i32 %c) {
|
||||||
entry:
|
entry:
|
||||||
@@ -64,7 +63,6 @@ entry:
|
|||||||
; CHECK: Alias sets for function 'test5':
|
; CHECK: Alias sets for function 'test5':
|
||||||
; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
|
; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values.
|
||||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(1))
|
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %a, LocationSize::precise(1))
|
||||||
; CHECK-NOT: 1 Unknown instruction
|
|
||||||
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %b, LocationSize::precise(1))
|
; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Memory locations: (ptr %b, LocationSize::precise(1))
|
||||||
define void @test5() {
|
define void @test5() {
|
||||||
entry:
|
entry:
|
||||||
|
|||||||
@@ -92,3 +92,87 @@ body: |
|
|||||||
$w1 = COPY %o_wide
|
$w1 = COPY %o_wide
|
||||||
RET_ReallyLR implicit $w0
|
RET_ReallyLR implicit $w0
|
||||||
...
|
...
|
||||||
|
---
|
||||||
|
name: add_multiuse
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $w0, $w1
|
||||||
|
; CHECK-LABEL: name: add_multiuse
|
||||||
|
; CHECK: liveins: $w0, $w1
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 0
|
||||||
|
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
|
||||||
|
; CHECK-NEXT: $w1 = COPY [[COPY]](s32)
|
||||||
|
; CHECK-NEXT: $w2 = COPY %const(s32)
|
||||||
|
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%const:_(s32) = G_CONSTANT i32 0
|
||||||
|
%add:_(s32), %o:_(s1) = G_SADDO %0, %const
|
||||||
|
%o_wide:_(s32) = G_ZEXT %o(s1)
|
||||||
|
$w0 = COPY %add(s32)
|
||||||
|
$w1 = COPY %add(s32)
|
||||||
|
$w2 = COPY %o_wide
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: add_vector
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $w0, $w1
|
||||||
|
; CHECK-LABEL: name: add_vector
|
||||||
|
; CHECK: liveins: $w0, $w1
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
|
||||||
|
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
|
||||||
|
; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
|
||||||
|
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||||
|
; CHECK-NEXT: $q0 = COPY %add(<4 x s32>)
|
||||||
|
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
|
||||||
|
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = COPY $w1
|
||||||
|
%2:_(s32) = COPY $w2
|
||||||
|
%3:_(s32) = COPY $w3
|
||||||
|
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
|
||||||
|
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %3:_(s32), %2:_(s32), %3:_(s32)
|
||||||
|
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
|
||||||
|
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||||
|
$q0 = COPY %add(<4 x s32>)
|
||||||
|
$q1 = COPY %o_wide
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: add_splat_vector
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $w0, $w1
|
||||||
|
; CHECK-LABEL: name: add_splat_vector
|
||||||
|
; CHECK: liveins: $w0, $w1
|
||||||
|
; CHECK-NEXT: {{ $}}
|
||||||
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
|
||||||
|
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
|
||||||
|
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
|
||||||
|
; CHECK-NEXT: %o:_(<4 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1), [[C]](s1), [[C]](s1)
|
||||||
|
; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||||
|
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
|
||||||
|
; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
|
||||||
|
; CHECK-NEXT: RET_ReallyLR implicit $w0
|
||||||
|
%0:_(s32) = COPY $w0
|
||||||
|
%1:_(s32) = COPY $w1
|
||||||
|
%2:_(s32) = COPY $w2
|
||||||
|
%3:_(s32) = COPY $w3
|
||||||
|
%const:_(s32) = G_CONSTANT i32 0
|
||||||
|
%bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
|
||||||
|
%bv1:_(<4 x s32>) = G_BUILD_VECTOR %const:_(s32), %const:_(s32), %const:_(s32), %const:_(s32)
|
||||||
|
%add:_(<4 x s32>), %o:_(<4 x s1>) = G_SADDO %bv0, %bv1
|
||||||
|
%o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
|
||||||
|
$q0 = COPY %add(<4 x s32>)
|
||||||
|
$q1 = COPY %o_wide
|
||||||
|
RET_ReallyLR implicit $w0
|
||||||
|
...
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ body: |
|
|||||||
bb.0:
|
bb.0:
|
||||||
; CHECK-LABEL: name: abs_s32
|
; CHECK-LABEL: name: abs_s32
|
||||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
|
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64)
|
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY]]
|
||||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
|
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
|
||||||
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
|
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
|
||||||
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
|
; CHECK-NEXT: $w0 = COPY [[SELECT]](s32)
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_s32
|
; CHECK-CSSC-LABEL: name: abs_s32
|
||||||
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||||
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
|
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
|
||||||
@@ -28,11 +29,12 @@ body: |
|
|||||||
bb.0:
|
bb.0:
|
||||||
; CHECK-LABEL: name: abs_s64
|
; CHECK-LABEL: name: abs_s64
|
||||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
|
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||||
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64)
|
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
|
||||||
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[ASHR]]
|
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
|
||||||
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ADD]], [[ASHR]]
|
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
|
||||||
; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
|
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_s64
|
; CHECK-CSSC-LABEL: name: abs_s64
|
||||||
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||||
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
|
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
|
||||||
@@ -55,6 +57,7 @@ body: |
|
|||||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
|
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
|
||||||
; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
|
; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
|
||||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_v4s16
|
; CHECK-CSSC-LABEL: name: abs_v4s16
|
||||||
; CHECK-CSSC: liveins: $d0
|
; CHECK-CSSC: liveins: $d0
|
||||||
; CHECK-CSSC-NEXT: {{ $}}
|
; CHECK-CSSC-NEXT: {{ $}}
|
||||||
@@ -82,6 +85,7 @@ body: |
|
|||||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
|
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
|
||||||
; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
|
; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
|
||||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_v8s16
|
; CHECK-CSSC-LABEL: name: abs_v8s16
|
||||||
; CHECK-CSSC: liveins: $q0
|
; CHECK-CSSC: liveins: $q0
|
||||||
; CHECK-CSSC-NEXT: {{ $}}
|
; CHECK-CSSC-NEXT: {{ $}}
|
||||||
@@ -109,6 +113,7 @@ body: |
|
|||||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
|
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
|
||||||
; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
|
; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
|
||||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_v2s32
|
; CHECK-CSSC-LABEL: name: abs_v2s32
|
||||||
; CHECK-CSSC: liveins: $d0
|
; CHECK-CSSC: liveins: $d0
|
||||||
; CHECK-CSSC-NEXT: {{ $}}
|
; CHECK-CSSC-NEXT: {{ $}}
|
||||||
@@ -136,6 +141,7 @@ body: |
|
|||||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
|
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
|
||||||
; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
|
; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
|
||||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_v4s32
|
; CHECK-CSSC-LABEL: name: abs_v4s32
|
||||||
; CHECK-CSSC: liveins: $q0
|
; CHECK-CSSC: liveins: $q0
|
||||||
; CHECK-CSSC-NEXT: {{ $}}
|
; CHECK-CSSC-NEXT: {{ $}}
|
||||||
@@ -163,6 +169,7 @@ body: |
|
|||||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
|
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
|
||||||
; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
|
; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
|
||||||
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
; CHECK-NEXT: RET_ReallyLR implicit $d0
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_v4s8
|
; CHECK-CSSC-LABEL: name: abs_v4s8
|
||||||
; CHECK-CSSC: liveins: $d0
|
; CHECK-CSSC: liveins: $d0
|
||||||
; CHECK-CSSC-NEXT: {{ $}}
|
; CHECK-CSSC-NEXT: {{ $}}
|
||||||
@@ -190,6 +197,7 @@ body: |
|
|||||||
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
|
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
|
||||||
; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
|
; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
|
||||||
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
; CHECK-NEXT: RET_ReallyLR implicit $q0
|
||||||
|
;
|
||||||
; CHECK-CSSC-LABEL: name: abs_v16s8
|
; CHECK-CSSC-LABEL: name: abs_v16s8
|
||||||
; CHECK-CSSC: liveins: $q0
|
; CHECK-CSSC: liveins: $q0
|
||||||
; CHECK-CSSC-NEXT: {{ $}}
|
; CHECK-CSSC-NEXT: {{ $}}
|
||||||
|
|||||||
@@ -15,9 +15,8 @@ define i8 @abs_i8(i8 %a){
|
|||||||
; CHECK-GI-LABEL: abs_i8:
|
; CHECK-GI-LABEL: abs_i8:
|
||||||
; CHECK-GI: // %bb.0: // %entry
|
; CHECK-GI: // %bb.0: // %entry
|
||||||
; CHECK-GI-NEXT: sxtb w8, w0
|
; CHECK-GI-NEXT: sxtb w8, w0
|
||||||
; CHECK-GI-NEXT: asr w8, w8, #7
|
; CHECK-GI-NEXT: cmp w8, #0
|
||||||
; CHECK-GI-NEXT: add w9, w0, w8
|
; CHECK-GI-NEXT: cneg w0, w0, le
|
||||||
; CHECK-GI-NEXT: eor w0, w9, w8
|
|
||||||
; CHECK-GI-NEXT: ret
|
; CHECK-GI-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
|
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
|
||||||
@@ -36,9 +35,8 @@ define i16 @abs_i16(i16 %a){
|
|||||||
; CHECK-GI-LABEL: abs_i16:
|
; CHECK-GI-LABEL: abs_i16:
|
||||||
; CHECK-GI: // %bb.0: // %entry
|
; CHECK-GI: // %bb.0: // %entry
|
||||||
; CHECK-GI-NEXT: sxth w8, w0
|
; CHECK-GI-NEXT: sxth w8, w0
|
||||||
; CHECK-GI-NEXT: asr w8, w8, #15
|
; CHECK-GI-NEXT: cmp w8, #0
|
||||||
; CHECK-GI-NEXT: add w9, w0, w8
|
; CHECK-GI-NEXT: cneg w0, w0, le
|
||||||
; CHECK-GI-NEXT: eor w0, w9, w8
|
|
||||||
; CHECK-GI-NEXT: ret
|
; CHECK-GI-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
|
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
|
||||||
@@ -55,9 +53,8 @@ define i32 @abs_i32(i32 %a){
|
|||||||
;
|
;
|
||||||
; CHECK-GI-LABEL: abs_i32:
|
; CHECK-GI-LABEL: abs_i32:
|
||||||
; CHECK-GI: // %bb.0: // %entry
|
; CHECK-GI: // %bb.0: // %entry
|
||||||
; CHECK-GI-NEXT: asr w8, w0, #31
|
; CHECK-GI-NEXT: cmp w0, #0
|
||||||
; CHECK-GI-NEXT: add w9, w0, w8
|
; CHECK-GI-NEXT: cneg w0, w0, le
|
||||||
; CHECK-GI-NEXT: eor w0, w9, w8
|
|
||||||
; CHECK-GI-NEXT: ret
|
; CHECK-GI-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
|
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
|
||||||
@@ -74,9 +71,8 @@ define i64 @abs_i64(i64 %a){
|
|||||||
;
|
;
|
||||||
; CHECK-GI-LABEL: abs_i64:
|
; CHECK-GI-LABEL: abs_i64:
|
||||||
; CHECK-GI: // %bb.0: // %entry
|
; CHECK-GI: // %bb.0: // %entry
|
||||||
; CHECK-GI-NEXT: asr x8, x0, #63
|
; CHECK-GI-NEXT: cmp x0, #0
|
||||||
; CHECK-GI-NEXT: add x9, x0, x8
|
; CHECK-GI-NEXT: cneg x0, x0, le
|
||||||
; CHECK-GI-NEXT: eor x0, x9, x8
|
|
||||||
; CHECK-GI-NEXT: ret
|
; CHECK-GI-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
|
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
|
||||||
@@ -248,9 +244,9 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
|
|||||||
; CHECK-GI-LABEL: abs_v1i32:
|
; CHECK-GI-LABEL: abs_v1i32:
|
||||||
; CHECK-GI: // %bb.0: // %entry
|
; CHECK-GI: // %bb.0: // %entry
|
||||||
; CHECK-GI-NEXT: fmov w8, s0
|
; CHECK-GI-NEXT: fmov w8, s0
|
||||||
; CHECK-GI-NEXT: asr w9, w8, #31
|
; CHECK-GI-NEXT: fmov w9, s0
|
||||||
; CHECK-GI-NEXT: add w8, w8, w9
|
; CHECK-GI-NEXT: cmp w8, #0
|
||||||
; CHECK-GI-NEXT: eor w8, w8, w9
|
; CHECK-GI-NEXT: cneg w8, w9, le
|
||||||
; CHECK-GI-NEXT: fmov s0, w8
|
; CHECK-GI-NEXT: fmov s0, w8
|
||||||
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||||
; CHECK-GI-NEXT: ret
|
; CHECK-GI-NEXT: ret
|
||||||
|
|||||||
@@ -2643,8 +2643,7 @@ define i8 @pr60530() {
|
|||||||
;
|
;
|
||||||
; GISEL-LABEL: pr60530:
|
; GISEL-LABEL: pr60530:
|
||||||
; GISEL: // %bb.0:
|
; GISEL: // %bb.0:
|
||||||
; GISEL-NEXT: mov w8, #1 // =0x1
|
; GISEL-NEXT: mov w0, #255 // =0xff
|
||||||
; GISEL-NEXT: sbfx w0, w8, #0, #1
|
|
||||||
; GISEL-NEXT: ret
|
; GISEL-NEXT: ret
|
||||||
%1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
|
%1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
|
||||||
%2 = extractvalue { i8, i1 } %1, 1
|
%2 = extractvalue { i8, i1 } %1, 1
|
||||||
|
|||||||
@@ -64,21 +64,10 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
|
define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
|
||||||
; SDAG-LABEL: saddo.select.i64:
|
; CHECK-LABEL: saddo.select.i64:
|
||||||
; SDAG: // %bb.0: // %entry
|
; CHECK: // %bb.0: // %entry
|
||||||
; SDAG-NEXT: mov w0, w1
|
; CHECK-NEXT: mov w0, w1
|
||||||
; SDAG-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
;
|
|
||||||
; GISEL-LABEL: saddo.select.i64:
|
|
||||||
; GISEL: // %bb.0: // %entry
|
|
||||||
; GISEL-NEXT: mov w8, #13 // =0xd
|
|
||||||
; GISEL-NEXT: and x9, x3, #0xc
|
|
||||||
; GISEL-NEXT: and x8, x4, x8
|
|
||||||
; GISEL-NEXT: cmn x9, x8
|
|
||||||
; GISEL-NEXT: cset w8, vs
|
|
||||||
; GISEL-NEXT: tst w8, #0x1
|
|
||||||
; GISEL-NEXT: csel w0, w0, w1, ne
|
|
||||||
; GISEL-NEXT: ret
|
|
||||||
entry:
|
entry:
|
||||||
%lhs = and i64 %v4, 12
|
%lhs = and i64 %v4, 12
|
||||||
%rhs = and i64 %v5, 13
|
%rhs = and i64 %v5, 13
|
||||||
@@ -89,22 +78,10 @@ entry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
|
define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
|
||||||
; SDAG-LABEL: uaddo.select.i64:
|
; CHECK-LABEL: uaddo.select.i64:
|
||||||
; SDAG: // %bb.0: // %entry
|
; CHECK: // %bb.0: // %entry
|
||||||
; SDAG-NEXT: mov w0, w1
|
; CHECK-NEXT: mov w0, w1
|
||||||
; SDAG-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
;
|
|
||||||
; GISEL-LABEL: uaddo.select.i64:
|
|
||||||
; GISEL: // %bb.0: // %entry
|
|
||||||
; GISEL-NEXT: mov w8, #9 // =0x9
|
|
||||||
; GISEL-NEXT: mov w9, #10 // =0xa
|
|
||||||
; GISEL-NEXT: and x8, x3, x8
|
|
||||||
; GISEL-NEXT: and x9, x4, x9
|
|
||||||
; GISEL-NEXT: cmn x8, x9
|
|
||||||
; GISEL-NEXT: cset w8, hs
|
|
||||||
; GISEL-NEXT: tst w8, #0x1
|
|
||||||
; GISEL-NEXT: csel w0, w0, w1, ne
|
|
||||||
; GISEL-NEXT: ret
|
|
||||||
entry:
|
entry:
|
||||||
%lhs = and i64 %v4, 9
|
%lhs = and i64 %v4, 9
|
||||||
%rhs = and i64 %v5, 10
|
%rhs = and i64 %v5, 10
|
||||||
|
|||||||
@@ -670,36 +670,19 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrsp
|
|||||||
define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
|
||||||
; GFX6-LABEL: simplify_demanded_bfe_sdiv:
|
; GFX6-LABEL: simplify_demanded_bfe_sdiv:
|
||||||
; GFX6: ; %bb.0:
|
; GFX6: ; %bb.0:
|
||||||
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0
|
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
||||||
; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
|
|
||||||
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
|
|
||||||
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
|
|
||||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GFX6-NEXT: s_load_dword s0, s[6:7], 0x0
|
; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0
|
||||||
; GFX6-NEXT: s_mov_b32 s6, -1
|
; GFX6-NEXT: s_mov_b32 s2, -1
|
||||||
; GFX6-NEXT: s_mov_b32 s7, 0xf000
|
|
||||||
; GFX6-NEXT: v_mul_lo_u32 v1, v0, -2
|
|
||||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; GFX6-NEXT: s_bfe_i32 s0, s0, 0x100001
|
; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100001
|
||||||
; GFX6-NEXT: s_ashr_i32 s2, s0, 31
|
; GFX6-NEXT: s_ashr_i32 s4, s3, 31
|
||||||
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
|
; GFX6-NEXT: s_lshr_b32 s4, s4, 31
|
||||||
; GFX6-NEXT: s_add_i32 s0, s0, s2
|
; GFX6-NEXT: s_add_i32 s3, s3, s4
|
||||||
; GFX6-NEXT: s_xor_b32 s0, s0, s2
|
; GFX6-NEXT: s_ashr_i32 s3, s3, 1
|
||||||
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
; GFX6-NEXT: v_mov_b32_e32 v0, s3
|
||||||
; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0
|
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||||
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v0
|
; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
|
||||||
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
|
|
||||||
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
|
||||||
; GFX6-NEXT: v_subrev_i32_e64 v2, s[0:1], 2, v1
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
|
|
||||||
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 1, v0
|
|
||||||
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 2, v1
|
|
||||||
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
|
|
||||||
; GFX6-NEXT: v_xor_b32_e32 v0, s2, v0
|
|
||||||
; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
|
|
||||||
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
||||||
; GFX6-NEXT: s_endpgm
|
; GFX6-NEXT: s_endpgm
|
||||||
%src = load i32, ptr addrspace(1) %in, align 4
|
%src = load i32, ptr addrspace(1) %in, align 4
|
||||||
%bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
|
%bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
|
||||||
|
|||||||
@@ -279,125 +279,27 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
|
|||||||
; CHECK: ; %bb.0:
|
; CHECK: ; %bb.0:
|
||||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
|
; CHECK-NEXT: v_lshrrev_b32_e32 v1, 20, v1
|
||||||
; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
|
|
||||||
; CHECK-NEXT: v_mov_b32_e32 v4, 0x1000
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
||||||
; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
|
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
|
||||||
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
|
|
||||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
|
|
||||||
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v2
|
|
||||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
|
|
||||||
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
|
|
||||||
; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 0x1000, v0
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5]
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2
|
|
||||||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
|
|
||||||
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
|
|
||||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
|
|
||||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = sdiv i32 %num, 4096
|
%result = sdiv i32 %num, 4096
|
||||||
ret i32 %result
|
ret i32 %result
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
|
define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
|
||||||
; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom:
|
; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
|
||||||
; GISEL: ; %bb.0:
|
; CHECK: ; %bb.0:
|
||||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
|
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0
|
||||||
; GISEL-NEXT: v_mov_b32_e32 v3, 0x1000
|
; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
|
||||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
|
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2
|
||||||
; GISEL-NEXT: v_mov_b32_e32 v5, 0xfffff000
|
; CHECK-NEXT: v_lshrrev_b32_e32 v3, 20, v3
|
||||||
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
|
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
|
||||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
|
; CHECK-NEXT: v_ashrrev_i32_e32 v1, 12, v1
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
|
|
||||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
|
|
||||||
; GISEL-NEXT: v_lshlrev_b32_e32 v7, 12, v5
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v5
|
|
||||||
; GISEL-NEXT: v_lshlrev_b32_e32 v9, 12, v4
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[4:5]
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v0, v3
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v3
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
|
|
||||||
; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v4
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v7, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
|
|
||||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
|
||||||
;
|
|
||||||
; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
|
|
||||||
; CGP: ; %bb.0:
|
|
||||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
||||||
; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
|
|
||||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
|
|
||||||
; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
|
|
||||||
; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
|
|
||||||
; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
|
||||||
; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
|
|
||||||
; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
|
|
||||||
; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v4
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4
|
|
||||||
; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v3
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v3
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v1, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[6:7]
|
|
||||||
; CGP-NEXT: v_subrev_i32_e32 v8, vcc, 0x1000, v1
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v7, s[4:5]
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[6:7]
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
|
|
||||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
|
||||||
%result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
|
%result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
|
||||||
ret <2 x i32> %result
|
ret <2 x i32> %result
|
||||||
}
|
}
|
||||||
@@ -884,3 +786,13 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
|
|||||||
%result = sdiv <2 x i32> %num.mask, %den.mask
|
%result = sdiv <2 x i32> %num.mask, %den.mask
|
||||||
ret <2 x i32> %result
|
ret <2 x i32> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i32 @v_sdiv_i32_exact(i32 %num) {
|
||||||
|
; CHECK-LABEL: v_sdiv_i32_exact:
|
||||||
|
; CHECK: ; %bb.0:
|
||||||
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
; CHECK-NEXT: v_ashrrev_i32_e32 v0, 12, v0
|
||||||
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
%result = sdiv exact i32 %num, 4096
|
||||||
|
ret i32 %result
|
||||||
|
}
|
||||||
|
|||||||
@@ -999,126 +999,11 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
|
|||||||
; CHECK-LABEL: v_sdiv_i64_pow2k_denom:
|
; CHECK-LABEL: v_sdiv_i64_pow2k_denom:
|
||||||
; CHECK: ; %bb.0:
|
; CHECK: ; %bb.0:
|
||||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000
|
; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1
|
||||||
; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
|
; CHECK-NEXT: v_lshrrev_b32_e32 v2, 20, v2
|
||||||
; CHECK-NEXT: v_mov_b32_e32 v6, 0xfffff000
|
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
||||||
; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
|
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||||
; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
|
; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||||
; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
|
|
||||||
; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
|
|
||||||
; CHECK-NEXT: v_trunc_f32_e32 v4, v3
|
|
||||||
; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
|
|
||||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v2
|
|
||||||
; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v4
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v8, v5, v2
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v4, v7, v2
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v9, v5, v3
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v10, v7, v3
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v11, v5, v3
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v10, v2
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v11
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v2
|
|
||||||
; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v3, vcc
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v7, v[3:4]
|
|
||||||
; CHECK-NEXT: v_ashrrev_i32_e32 v6, 31, v1
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[3:4], s[4:5], -1, v5, v[3:4]
|
|
||||||
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
|
|
||||||
; CHECK-NEXT: v_xor_b32_e32 v4, v0, v6
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v0, v7, v2
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v8, v5, v3
|
|
||||||
; CHECK-NEXT: v_xor_b32_e32 v9, v1, v6
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v1, v5, v2
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v2, v7, v2
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v1, v7, v3
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v8, v0
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v3, v7, v3
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, v3, v1
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
|
|
||||||
; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v7, v1, vcc
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v2, v9, v0
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v3, v4, v1
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v0, v9, v0
|
|
||||||
; CHECK-NEXT: v_mov_b32_e32 v5, 0x1000
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_mul_lo_u32 v7, v9, v1
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v3, v4, v1
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v3
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v7, v3
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v7, vcc, v0, v2
|
|
||||||
; CHECK-NEXT: v_mul_hi_u32 v8, v9, v1
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v7, 0
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v2
|
|
||||||
; CHECK-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v5, v3, v[1:2]
|
|
||||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
|
|
||||||
; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v9, v1, vcc
|
|
||||||
; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v9, v1
|
|
||||||
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
|
|
||||||
; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
|
|
||||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
|
|
||||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2
|
|
||||||
; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v4, s[4:5]
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v7
|
|
||||||
; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v3, vcc
|
|
||||||
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
|
|
||||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
|
|
||||||
; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v4
|
|
||||||
; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
|
|
||||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc
|
|
||||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc
|
|
||||||
; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
|
||||||
; CHECK-NEXT: v_xor_b32_e32 v0, v0, v6
|
|
||||||
; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6
|
|
||||||
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
|
|
||||||
; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
|
|
||||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = sdiv i64 %num, 4096
|
%result = sdiv i64 %num, 4096
|
||||||
ret i64 %result
|
ret i64 %result
|
||||||
@@ -1128,473 +1013,31 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) {
|
|||||||
; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom:
|
; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom:
|
||||||
; GISEL: ; %bb.0:
|
; GISEL: ; %bb.0:
|
||||||
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
|
|
||||||
; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
|
|
||||||
; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
|
|
||||||
; GISEL-NEXT: s_subb_u32 s7, 0, 0
|
|
||||||
; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
|
|
||||||
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
|
||||||
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
|
|
||||||
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
|
|
||||||
; GISEL-NEXT: v_trunc_f32_e32 v7, v5
|
|
||||||
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
|
|
||||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
|
|
||||||
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
|
|
||||||
; GISEL-NEXT: v_mov_b32_e32 v4, v9
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
|
|
||||||
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
|
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
|
||||||
|
; GISEL-NEXT: v_lshrrev_b32_e32 v4, 20, v4
|
||||||
|
; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
|
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 20, v5
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
|
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
|
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v0, v5, v8
|
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9
|
; GISEL-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
|
; GISEL-NEXT: v_ashr_i64 v[2:3], v[2:3], 12
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
|
|
||||||
; GISEL-NEXT: v_mov_b32_e32 v5, 0x1000
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v11, v13, v1
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v9, v10, v1
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v9
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
|
|
||||||
; GISEL-NEXT: s_sub_u32 s6, 0, 0x1000
|
|
||||||
; GISEL-NEXT: s_subb_u32 s7, 0, 0
|
|
||||||
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
|
|
||||||
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
|
|
||||||
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
|
|
||||||
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5]
|
|
||||||
; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v15
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
|
|
||||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
|
|
||||||
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
|
|
||||||
; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0
|
|
||||||
; GISEL-NEXT: v_mov_b32_e32 v0, v3
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
|
|
||||||
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
|
|
||||||
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
|
|
||||||
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
|
|
||||||
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
|
|
||||||
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
|
|
||||||
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
|
|
||||||
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5]
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
|
|
||||||
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
|
|
||||||
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
|
|
||||||
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
|
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
|
|
||||||
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
|
|
||||||
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
|
|
||||||
; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc
|
|
||||||
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
; GISEL-NEXT: s_setpc_b64 s[30:31]
|
||||||
;
|
;
|
||||||
; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
|
; CGP-LABEL: v_sdiv_v2i64_pow2k_denom:
|
||||||
; CGP: ; %bb.0:
|
; CGP: ; %bb.0:
|
||||||
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
|
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1
|
||||||
; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
|
; CGP-NEXT: v_lshrrev_b32_e32 v4, 20, v4
|
||||||
; CGP-NEXT: v_mov_b32_e32 v6, 0xfffff000
|
|
||||||
; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
|
|
||||||
; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4
|
|
||||||
; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
|
|
||||||
; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
|
|
||||||
; CGP-NEXT: v_trunc_f32_e32 v7, v5
|
|
||||||
; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
|
|
||||||
; CGP-NEXT: v_cvt_u32_f32_e32 v8, v4
|
|
||||||
; CGP-NEXT: v_cvt_u32_f32_e32 v9, v7
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v8, 0
|
|
||||||
; CGP-NEXT: v_mov_b32_e32 v7, v5
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v9, v[7:8]
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v12, v9, v4
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], -1, v8, v[10:11]
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v10, v9, v4
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v11, v8, v4
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v4, v8, v13
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v7, v9, v13
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v14, v8, v13
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v13, v9, v13
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v15, v4
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v16, vcc, v8, v4
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v6, v16, 0
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v17, vcc, v9, v7, vcc
|
|
||||||
; CGP-NEXT: v_mov_b32_e32 v4, v14
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v6, v17, v[4:5]
|
|
||||||
; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v1
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v7
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[14:15], s[4:5], -1, v16, v[14:15]
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v15, v0, v7
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v0, v17, v13
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v4, v16, v14
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v18, v1, v7
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v1, v16, v13
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v13, v17, v13
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v3
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
|
; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
; CGP-NEXT: v_lshrrev_b32_e32 v4, 20, v4
|
||||||
; CGP-NEXT: v_mul_lo_u32 v1, v17, v14
|
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
|
; CGP-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
|
||||||
; CGP-NEXT: v_mul_hi_u32 v4, v16, v14
|
; CGP-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13
|
; CGP-NEXT: v_ashr_i64 v[2:3], v[2:3], 12
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v13, v17, v14
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v4, v1
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v1, vcc, v17, v1, vcc
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v13, v18, v0
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v14, v15, v1
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v16, v15, v0
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v0, v18, v0
|
|
||||||
; CGP-NEXT: v_mov_b32_e32 v4, 0x1000
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v16, v18, v1
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v14, v15, v1
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v16, v0
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v16, vcc, v0, v13
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v17, v18, v1
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v16, 0
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v13
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[13:14], s[4:5], v4, v17, v[1:2]
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v15, v0
|
|
||||||
; CGP-NEXT: v_subb_u32_e64 v1, s[4:5], v18, v13, vcc
|
|
||||||
; CGP-NEXT: v_sub_i32_e64 v13, s[4:5], v18, v13
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
|
|
||||||
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
|
|
||||||
; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v13, vcc
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
|
|
||||||
; CGP-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v16
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v17, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
|
|
||||||
; CGP-NEXT: v_mov_b32_e32 v0, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v14, -1, v14, s[4:5]
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v9, v[0:1]
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc
|
|
||||||
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], -1, v8, v[0:1]
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v5, -1, v19, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v15
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v19, v8, v0
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v5, v15, v1, vcc
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v19
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v11
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v11, v9, v0
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v10, v8, v0
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v1, vcc, v10, v1
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v1
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v0, vcc
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v8, 0
|
|
||||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v11, v5, v7
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v6, v9, v[1:2]
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v10, v17, v13, vcc
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v1, v10, v7
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], -1, v8, v[5:6]
|
|
||||||
; CGP-NEXT: v_ashrrev_i32_e32 v10, 31, v3
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v10
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v12, v2, v10
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v2, v9, v0
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v6, v8, v5
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v13, v3, v10
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v3, v8, v0
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v3, v9, v5
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v6, v2
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v6, v8, v5
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v5, v9, v5
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v0
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v2, vcc, v9, v2, vcc
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v5, v13, v3
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v6, v12, v2
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v11, v7
|
|
||||||
; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v7, v12, v3
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_mul_lo_u32 v7, v13, v2
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v3, v13, v3
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v6, v12, v2
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v7, v3
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v7, vcc, v3, v5
|
|
||||||
; CGP-NEXT: v_mul_hi_u32 v8, v13, v2
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v5
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v5
|
|
||||||
; CGP-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[3:4]
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v12, v2
|
|
||||||
; CGP-NEXT: v_subb_u32_e64 v3, s[4:5], v13, v5, vcc
|
|
||||||
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v13, v5
|
|
||||||
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
|
|
||||||
; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
|
|
||||||
; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v7
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v8, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
|
|
||||||
; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
|
|
||||||
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v6
|
|
||||||
; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v9, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v5, vcc
|
|
||||||
; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
|
|
||||||
; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v2, v2, v10
|
|
||||||
; CGP-NEXT: v_xor_b32_e32 v3, v3, v10
|
|
||||||
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
|
|
||||||
; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
|
|
||||||
; CGP-NEXT: s_setpc_b64 s[30:31]
|
; CGP-NEXT: s_setpc_b64 s[30:31]
|
||||||
%result = sdiv <2 x i64> %num, <i64 4096, i64 4096>
|
%result = sdiv <2 x i64> %num, <i64 4096, i64 4096>
|
||||||
ret <2 x i64> %result
|
ret <2 x i64> %result
|
||||||
@@ -3398,3 +2841,13 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
|
|||||||
%result = sdiv <2 x i64> %num.mask, %den.mask
|
%result = sdiv <2 x i64> %num.mask, %den.mask
|
||||||
ret <2 x i64> %result
|
ret <2 x i64> %result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i64 @v_sdiv_i64_exact(i64 %num) {
|
||||||
|
; CHECK-LABEL: v_sdiv_i64_exact:
|
||||||
|
; CHECK: ; %bb.0:
|
||||||
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
; CHECK-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
|
||||||
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
%result = sdiv exact i64 %num, 4096
|
||||||
|
ret i64 %result
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -238,7 +238,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
|
|||||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||||
; GISEL-NEXT: s_cbranch_execz .LBB0_4
|
; GISEL-NEXT: s_cbranch_execz .LBB0_4
|
||||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
|
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
|
||||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||||
@@ -612,7 +612,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
|
|||||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||||
; GISEL-NEXT: s_cbranch_execz .LBB1_4
|
; GISEL-NEXT: s_cbranch_execz .LBB1_4
|
||||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
|
; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
|
||||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||||
@@ -978,7 +978,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
|
|||||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||||
; GISEL-NEXT: s_cbranch_execz .LBB2_4
|
; GISEL-NEXT: s_cbranch_execz .LBB2_4
|
||||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
|
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
|
||||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||||
@@ -1338,7 +1338,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
|
|||||||
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
|
||||||
; GISEL-NEXT: s_cbranch_execz .LBB3_4
|
; GISEL-NEXT: s_cbranch_execz .LBB3_4
|
||||||
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
|
||||||
; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
|
; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
|
||||||
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
|
||||||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
|
||||||
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
||||||
|
|||||||
929
llvm/test/CodeGen/X86/apx/domain-reassignment.mir
Normal file
929
llvm/test/CodeGen/X86/apx/domain-reassignment.mir
Normal file
@@ -0,0 +1,929 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -run-pass x86-domain-reassignment -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq -o - %s | FileCheck %s
|
||||||
|
--- |
|
||||||
|
; ModuleID = '../test/CodeGen/X86/gpr-to-mask.ll'
|
||||||
|
source_filename = "../test/CodeGen/X86/gpr-to-mask.ll"
|
||||||
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-unknown-unknown"
|
||||||
|
|
||||||
|
define void @test_fcmp_storefloat(i1 %cond, ptr %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) #0 {
|
||||||
|
entry:
|
||||||
|
br i1 %cond, label %if, label %else
|
||||||
|
|
||||||
|
if: ; preds = %entry
|
||||||
|
%cmp1 = fcmp oeq float %f3, %f4
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
else: ; preds = %entry
|
||||||
|
%cmp2 = fcmp oeq float %f5, %f6
|
||||||
|
br label %exit
|
||||||
|
|
||||||
|
exit: ; preds = %else, %if
|
||||||
|
%val = phi i1 [ %cmp1, %if ], [ %cmp2, %else ]
|
||||||
|
%selected = select i1 %val, float %f1, float %f2
|
||||||
|
store float %selected, ptr %fptr
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test_8bitops() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @test_16bitops() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @test_32bitops() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @test_64bitops() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @test_16bitext() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @test_32bitext() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
define void @test_64bitext() #0 {
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
; Note that this function need to be compiled with -global-isel
|
||||||
|
; to obtain testable MIR
|
||||||
|
define void @test_unused(i64 %0) #0 {
|
||||||
|
%unused = lshr i64 %0, 7
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_fcmp_storefloat
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 1, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 2, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 3, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 4, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 5, class: vr128x, preferred-register: '' }
|
||||||
|
- { id: 6, class: fr32x, preferred-register: '' }
|
||||||
|
- { id: 7, class: fr32x, preferred-register: '' }
|
||||||
|
- { id: 8, class: fr32x, preferred-register: '' }
|
||||||
|
- { id: 9, class: fr32x, preferred-register: '' }
|
||||||
|
- { id: 10, class: fr32x, preferred-register: '' }
|
||||||
|
- { id: 11, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 12, class: vk1, preferred-register: '' }
|
||||||
|
- { id: 13, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 14, class: vk1, preferred-register: '' }
|
||||||
|
- { id: 15, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 16, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 17, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 18, class: vk1wm, preferred-register: '' }
|
||||||
|
- { id: 19, class: vr128x, preferred-register: '' }
|
||||||
|
- { id: 20, class: vr128, preferred-register: '' }
|
||||||
|
- { id: 21, class: vr128, preferred-register: '' }
|
||||||
|
- { id: 22, class: fr32x, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$edi', virtual-reg: '%3' }
|
||||||
|
- { reg: '$rsi', virtual-reg: '%4' }
|
||||||
|
- { reg: '$xmm0', virtual-reg: '%5' }
|
||||||
|
- { reg: '$xmm1', virtual-reg: '%6' }
|
||||||
|
- { reg: '$xmm2', virtual-reg: '%7' }
|
||||||
|
- { reg: '$xmm3', virtual-reg: '%8' }
|
||||||
|
- { reg: '$xmm4', virtual-reg: '%9' }
|
||||||
|
- { reg: '$xmm5', virtual-reg: '%10' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: test_fcmp_storefloat
|
||||||
|
; CHECK: bb.0.entry:
|
||||||
|
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||||
|
; CHECK: liveins: $edi, $rsi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fr32x = COPY $xmm5
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:fr32x = COPY $xmm4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:fr32x = COPY $xmm3
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:fr32x = COPY $xmm2
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:fr32x = COPY $xmm1
|
||||||
|
; CHECK: [[COPY5:%[0-9]+]]:vr128x = COPY $xmm0
|
||||||
|
; CHECK: [[COPY6:%[0-9]+]]:gr64 = COPY $rsi
|
||||||
|
; CHECK: [[COPY7:%[0-9]+]]:gr32 = COPY $edi
|
||||||
|
; CHECK: [[COPY8:%[0-9]+]]:gr8 = COPY [[COPY7]].sub_8bit
|
||||||
|
; CHECK: TEST8ri killed [[COPY8]], 1, implicit-def $eflags
|
||||||
|
; CHECK: JCC_1 %bb.2, 4, implicit $eflags
|
||||||
|
; CHECK: JMP_1 %bb.1
|
||||||
|
; CHECK: bb.1.if:
|
||||||
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: [[VCMPSSZrri:%[0-9]+]]:vk1 = VCMPSSZrri [[COPY3]], [[COPY2]], 0
|
||||||
|
; CHECK: [[COPY9:%[0-9]+]]:vk32 = COPY [[VCMPSSZrri]]
|
||||||
|
; CHECK: [[COPY10:%[0-9]+]]:vk8 = COPY [[COPY9]]
|
||||||
|
; CHECK: JMP_1 %bb.3
|
||||||
|
; CHECK: bb.2.else:
|
||||||
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: [[VCMPSSZrri1:%[0-9]+]]:vk1 = VCMPSSZrri [[COPY1]], [[COPY]], 0
|
||||||
|
; CHECK: [[COPY11:%[0-9]+]]:vk32 = COPY [[VCMPSSZrri1]]
|
||||||
|
; CHECK: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
|
||||||
|
; CHECK: bb.3.exit:
|
||||||
|
; CHECK: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY10]], %bb.1
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
|
||||||
|
; CHECK: [[COPY13:%[0-9]+]]:vk32 = COPY [[PHI]]
|
||||||
|
; CHECK: [[COPY14:%[0-9]+]]:vk1wm = COPY [[COPY13]]
|
||||||
|
; CHECK: [[COPY15:%[0-9]+]]:vr128x = COPY [[COPY4]]
|
||||||
|
; CHECK: [[DEF1:%[0-9]+]]:vr128 = IMPLICIT_DEF
|
||||||
|
; CHECK: [[VMOVSSZrrk:%[0-9]+]]:vr128 = VMOVSSZrrk [[COPY15]], killed [[COPY14]], killed [[DEF1]], [[COPY5]]
|
||||||
|
; CHECK: [[COPY16:%[0-9]+]]:fr32x = COPY [[VMOVSSZrrk]]
|
||||||
|
; CHECK: VMOVSSZmr [[COPY6]], 1, $noreg, 0, $noreg, killed [[COPY16]] :: (store (s32) into %ir.fptr)
|
||||||
|
; CHECK: RET 0
|
||||||
|
bb.0.entry:
|
||||||
|
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||||
|
liveins: $edi, $rsi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5
|
||||||
|
|
||||||
|
%10 = COPY $xmm5
|
||||||
|
%9 = COPY $xmm4
|
||||||
|
%8 = COPY $xmm3
|
||||||
|
%7 = COPY $xmm2
|
||||||
|
%6 = COPY $xmm1
|
||||||
|
%5 = COPY $xmm0
|
||||||
|
%4 = COPY $rsi
|
||||||
|
%3 = COPY $edi
|
||||||
|
%11 = COPY %3.sub_8bit
|
||||||
|
TEST8ri killed %11, 1, implicit-def $eflags
|
||||||
|
JCC_1 %bb.2, 4, implicit $eflags
|
||||||
|
JMP_1 %bb.1
|
||||||
|
|
||||||
|
bb.1.if:
|
||||||
|
successors: %bb.3(0x80000000)
|
||||||
|
|
||||||
|
%14 = VCMPSSZrri %7, %8, 0, implicit $mxcsr
|
||||||
|
|
||||||
|
; check that cross domain copies are replaced with same domain copies.
|
||||||
|
|
||||||
|
%15 = COPY %14
|
||||||
|
%0 = COPY %15.sub_8bit
|
||||||
|
JMP_1 %bb.3
|
||||||
|
|
||||||
|
bb.2.else:
|
||||||
|
successors: %bb.3(0x80000000)
|
||||||
|
%12 = VCMPSSZrri %9, %10, 0, implicit $mxcsr
|
||||||
|
|
||||||
|
; check that cross domain copies are replaced with same domain copies.
|
||||||
|
|
||||||
|
%13 = COPY %12
|
||||||
|
%1 = COPY %13.sub_8bit
|
||||||
|
|
||||||
|
bb.3.exit:
|
||||||
|
|
||||||
|
; check PHI, IMPLICIT_DEF, and INSERT_SUBREG replacers.
|
||||||
|
|
||||||
|
%2 = PHI %1, %bb.2, %0, %bb.1
|
||||||
|
%17 = IMPLICIT_DEF
|
||||||
|
%16 = INSERT_SUBREG %17, %2, %subreg.sub_8bit_hi
|
||||||
|
%18 = COPY %16
|
||||||
|
%19 = COPY %6
|
||||||
|
%21 = IMPLICIT_DEF
|
||||||
|
%20 = VMOVSSZrrk %19, killed %18, killed %21, %5
|
||||||
|
%22 = COPY %20
|
||||||
|
VMOVSSZmr %4, 1, $noreg, 0, $noreg, killed %22 :: (store (s32) into %ir.fptr)
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_8bitops
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: vk8, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 7, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 8, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 9, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 10, class: vk8wm, preferred-register: '' }
|
||||||
|
- { id: 11, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 12, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 13, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 14, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 15, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 16, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 17, class: gr8, preferred-register: '' }
|
||||||
|
- { id: 18, class: gr8, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
- { reg: '$zmm2', virtual-reg: '%3' }
|
||||||
|
- { reg: '$zmm3', virtual-reg: '%4' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: test_8bitops
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vr512 = COPY $zmm2
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:vr512 = COPY $zmm3
|
||||||
|
; CHECK: [[VCMPPDZrri:%[0-9]+]]:vk8 = VCMPPDZrri [[COPY3]], [[COPY4]], 0
|
||||||
|
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPDZrri]]
|
||||||
|
; CHECK: [[COPY6:%[0-9]+]]:vk8 = COPY [[COPY5]]
|
||||||
|
; CHECK: [[KSHIFTRBri:%[0-9]+]]:vk8 = KSHIFTRBri [[COPY6]], 2
|
||||||
|
; CHECK: [[KSHIFTLBri:%[0-9]+]]:vk8 = KSHIFTLBri [[KSHIFTRBri]], 1
|
||||||
|
; CHECK: [[KNOTBrr:%[0-9]+]]:vk8 = KNOTBrr [[KSHIFTLBri]]
|
||||||
|
; CHECK: [[KORBrr:%[0-9]+]]:vk8 = KORBrr [[KNOTBrr]], [[KSHIFTRBri]]
|
||||||
|
; CHECK: [[KANDBrr:%[0-9]+]]:vk8 = KANDBrr [[KORBrr]], [[KSHIFTLBri]]
|
||||||
|
; CHECK: [[KXORBrr:%[0-9]+]]:vk8 = KXORBrr [[KANDBrr]], [[KSHIFTRBri]]
|
||||||
|
; CHECK: [[KADDBrr:%[0-9]+]]:vk8 = KADDBrr [[KXORBrr]], [[KNOTBrr]]
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
|
||||||
|
; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBrr]]
|
||||||
|
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
|
||||||
|
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||||
|
; CHECK: VMOVAPDZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPDZrrk]]
|
||||||
|
; CHECK: bb.1:
|
||||||
|
; CHECK: successors: %bb.2(0x80000000)
|
||||||
|
; CHECK: bb.2:
|
||||||
|
; CHECK: RET 0
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||||
|
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
%3 = COPY $zmm2
|
||||||
|
%4 = COPY $zmm3
|
||||||
|
|
||||||
|
%5 = VCMPPDZrri %3, %4, 0, implicit $mxcsr
|
||||||
|
%6 = COPY %5
|
||||||
|
%7 = COPY %6.sub_8bit
|
||||||
|
|
||||||
|
%12 = SHR8ri %7, 2, implicit-def dead $eflags
|
||||||
|
%13 = SHL8ri %12, 1, implicit-def dead $eflags
|
||||||
|
%14 = NOT8r %13
|
||||||
|
%15 = OR8rr %14, %12, implicit-def dead $eflags
|
||||||
|
%16 = AND8rr %15, %13, implicit-def dead $eflags
|
||||||
|
%17 = XOR8rr %16, %12, implicit-def dead $eflags
|
||||||
|
%18 = ADD8rr %17, %14, implicit-def dead $eflags
|
||||||
|
|
||||||
|
%8 = IMPLICIT_DEF
|
||||||
|
%9 = INSERT_SUBREG %8, %18, %subreg.sub_8bit_hi
|
||||||
|
%10 = COPY %9
|
||||||
|
%11 = VMOVAPDZrrk %2, killed %10, %1
|
||||||
|
VMOVAPDZmr %0, 1, $noreg, 0, $noreg, killed %11
|
||||||
|
|
||||||
|
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||||
|
; TEST8rr %18, %18, implicit-def $eflags
|
||||||
|
; JCC_1 %bb.1, 4, implicit $eflags
|
||||||
|
; JMP_1 %bb.2
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
|
||||||
|
bb.2:
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_16bitops
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: vk16, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 7, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 8, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 9, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 10, class: vk16wm, preferred-register: '' }
|
||||||
|
- { id: 11, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 12, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 13, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 14, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 15, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 16, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 17, class: gr16, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
- { reg: '$zmm2', virtual-reg: '%3' }
|
||||||
|
- { reg: '$zmm3', virtual-reg: '%4' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: test_16bitops
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vr512 = COPY $zmm2
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:vr512 = COPY $zmm3
|
||||||
|
; CHECK: [[VCMPPSZrri:%[0-9]+]]:vk16 = VCMPPSZrri [[COPY3]], [[COPY4]], 0
|
||||||
|
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPSZrri]]
|
||||||
|
; CHECK: [[COPY6:%[0-9]+]]:vk16 = COPY [[COPY5]]
|
||||||
|
; CHECK: [[KSHIFTRWri:%[0-9]+]]:vk16 = KSHIFTRWri [[COPY6]], 2
|
||||||
|
; CHECK: [[KSHIFTLWri:%[0-9]+]]:vk16 = KSHIFTLWri [[KSHIFTRWri]], 1
|
||||||
|
; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[KSHIFTLWri]]
|
||||||
|
; CHECK: [[KORWrr:%[0-9]+]]:vk16 = KORWrr [[KNOTWrr]], [[KSHIFTRWri]]
|
||||||
|
; CHECK: [[KANDWrr:%[0-9]+]]:vk16 = KANDWrr [[KORWrr]], [[KSHIFTLWri]]
|
||||||
|
; CHECK: [[KXORWrr:%[0-9]+]]:vk16 = KXORWrr [[KANDWrr]], [[KSHIFTRWri]]
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vk32 = IMPLICIT_DEF
|
||||||
|
; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWrr]]
|
||||||
|
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
|
||||||
|
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||||
|
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
|
||||||
|
; CHECK: bb.1:
|
||||||
|
; CHECK: successors: %bb.2(0x80000000)
|
||||||
|
; CHECK: bb.2:
|
||||||
|
; CHECK: RET 0
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||||
|
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
%3 = COPY $zmm2
|
||||||
|
%4 = COPY $zmm3
|
||||||
|
|
||||||
|
%5 = VCMPPSZrri %3, %4, 0, implicit $mxcsr
|
||||||
|
%6 = COPY %5
|
||||||
|
%7 = COPY %6.sub_16bit
|
||||||
|
|
||||||
|
%12 = SHR16ri %7, 2, implicit-def dead $eflags
|
||||||
|
%13 = SHL16ri %12, 1, implicit-def dead $eflags
|
||||||
|
%14 = NOT16r %13
|
||||||
|
%15 = OR16rr %14, %12, implicit-def dead $eflags
|
||||||
|
%16 = AND16rr %15, %13, implicit-def dead $eflags
|
||||||
|
%17 = XOR16rr %16, %12, implicit-def dead $eflags
|
||||||
|
|
||||||
|
%8 = IMPLICIT_DEF
|
||||||
|
%9 = INSERT_SUBREG %8, %17, %subreg.sub_16bit
|
||||||
|
%10 = COPY %9
|
||||||
|
%11 = VMOVAPSZrrk %2, killed %10, %1
|
||||||
|
VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %11
|
||||||
|
|
||||||
|
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||||
|
; FIXME TEST16rr %17, %17, implicit-def $eflags
|
||||||
|
; FIXME JCC_1 %bb.1, 4, implicit $eflags
|
||||||
|
; FIXME JMP_1 %bb.2
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
|
||||||
|
bb.2:
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_32bitops
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vk32wm, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 7, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 8, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 9, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 10, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 11, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 12, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 13, class: gr32, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: test_32bitops
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[KMOVDkm:%[0-9]+]]:vk32 = KMOVDkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[KSHIFTRDri:%[0-9]+]]:vk32 = KSHIFTRDri [[KMOVDkm]], 2
|
||||||
|
; CHECK: [[KSHIFTLDri:%[0-9]+]]:vk32 = KSHIFTLDri [[KSHIFTRDri]], 1
|
||||||
|
; CHECK: [[KNOTDrr:%[0-9]+]]:vk32 = KNOTDrr [[KSHIFTLDri]]
|
||||||
|
; CHECK: [[KORDrr:%[0-9]+]]:vk32 = KORDrr [[KNOTDrr]], [[KSHIFTRDri]]
|
||||||
|
; CHECK: [[KANDDrr:%[0-9]+]]:vk32 = KANDDrr [[KORDrr]], [[KSHIFTLDri]]
|
||||||
|
; CHECK: [[KXORDrr:%[0-9]+]]:vk32 = KXORDrr [[KANDDrr]], [[KSHIFTRDri]]
|
||||||
|
; CHECK: [[KANDNDrr:%[0-9]+]]:vk32 = KANDNDrr [[KXORDrr]], [[KORDrr]]
|
||||||
|
; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[KANDNDrr]], [[KXORDrr]]
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
|
||||||
|
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||||
|
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
|
||||||
|
; CHECK: bb.1:
|
||||||
|
; CHECK: successors: %bb.2(0x80000000)
|
||||||
|
; CHECK: bb.2:
|
||||||
|
; CHECK: RET 0
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1
|
||||||
|
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
|
||||||
|
%5 = MOV32rm %0, 1, $noreg, 0, $noreg
|
||||||
|
%6 = SHR32ri %5, 2, implicit-def dead $eflags
|
||||||
|
%7 = SHL32ri %6, 1, implicit-def dead $eflags
|
||||||
|
%8 = NOT32r %7
|
||||||
|
%9 = OR32rr %8, %6, implicit-def dead $eflags
|
||||||
|
%10 = AND32rr %9, %7, implicit-def dead $eflags
|
||||||
|
%11 = XOR32rr %10, %6, implicit-def dead $eflags
|
||||||
|
%12 = ANDN32rr %11, %9, implicit-def dead $eflags
|
||||||
|
%13 = ADD32rr %12, %11, implicit-def dead $eflags
|
||||||
|
|
||||||
|
%3 = COPY %13
|
||||||
|
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||||
|
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||||
|
|
||||||
|
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||||
|
; FIXME TEST32rr %13, %13, implicit-def $eflags
|
||||||
|
; FIXME JCC_1 %bb.1, 4, implicit $eflags
|
||||||
|
; FIXME JMP_1 %bb.2
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
|
||||||
|
bb.2:
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_64bitops
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vk64wm, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 7, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 8, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 9, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 10, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 11, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 12, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 13, class: gr64, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
; CHECK-LABEL: name: test_64bitops
|
||||||
|
; CHECK: bb.0:
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[KMOVQkm:%[0-9]+]]:vk64 = KMOVQkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[KSHIFTRQri:%[0-9]+]]:vk64 = KSHIFTRQri [[KMOVQkm]], 2
|
||||||
|
; CHECK: [[KSHIFTLQri:%[0-9]+]]:vk64 = KSHIFTLQri [[KSHIFTRQri]], 1
|
||||||
|
; CHECK: [[KNOTQrr:%[0-9]+]]:vk64 = KNOTQrr [[KSHIFTLQri]]
|
||||||
|
; CHECK: [[KORQrr:%[0-9]+]]:vk64 = KORQrr [[KNOTQrr]], [[KSHIFTRQri]]
|
||||||
|
; CHECK: [[KANDQrr:%[0-9]+]]:vk64 = KANDQrr [[KORQrr]], [[KSHIFTLQri]]
|
||||||
|
; CHECK: [[KXORQrr:%[0-9]+]]:vk64 = KXORQrr [[KANDQrr]], [[KSHIFTRQri]]
|
||||||
|
; CHECK: [[KANDNQrr:%[0-9]+]]:vk64 = KANDNQrr [[KXORQrr]], [[KORQrr]]
|
||||||
|
; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[KANDNQrr]], [[KXORQrr]]
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||||
|
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||||
|
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
|
||||||
|
; CHECK: bb.1:
|
||||||
|
; CHECK: successors: %bb.2(0x80000000)
|
||||||
|
; CHECK: bb.2:
|
||||||
|
; CHECK: RET 0
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1
|
||||||
|
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
|
||||||
|
%5 = MOV64rm %0, 1, $noreg, 0, $noreg
|
||||||
|
%6 = SHR64ri %5, 2, implicit-def dead $eflags
|
||||||
|
%7 = SHL64ri %6, 1, implicit-def dead $eflags
|
||||||
|
%8 = NOT64r %7
|
||||||
|
%9 = OR64rr %8, %6, implicit-def dead $eflags
|
||||||
|
%10 = AND64rr %9, %7, implicit-def dead $eflags
|
||||||
|
%11 = XOR64rr %10, %6, implicit-def dead $eflags
|
||||||
|
%12 = ANDN64rr %11, %9, implicit-def dead $eflags
|
||||||
|
%13 = ADD64rr %12, %11, implicit-def dead $eflags
|
||||||
|
|
||||||
|
%3 = COPY %13
|
||||||
|
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||||
|
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||||
|
|
||||||
|
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||||
|
; FIXME TEST64rr %13, %13, implicit-def $eflags
|
||||||
|
; FIXME JCC_1 %bb.1, 4, implicit $eflags
|
||||||
|
; FIXME JMP_1 %bb.2
|
||||||
|
|
||||||
|
bb.1:
|
||||||
|
|
||||||
|
bb.2:
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_16bitext
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vk16wm, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: gr16, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr16, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_16bitext
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vk16 = COPY [[KMOVBkm]]
|
||||||
|
; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[COPY3]]
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWrr]]
|
||||||
|
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY4]], [[COPY1]]
|
||||||
|
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
|
||||||
|
; CHECK: RET 0
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
|
||||||
|
%5 = MOVZX16rm8 %0, 1, $noreg, 0, $noreg
|
||||||
|
%6 = NOT16r %5
|
||||||
|
|
||||||
|
%3 = COPY %6
|
||||||
|
%4 = VMOVAPSZrrk %2, killed %3, %1
|
||||||
|
VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_32bitext
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vk64wm, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr32, preferred-register: '' }
|
||||||
|
- { id: 7, class: gr32, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_32bitext
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vk32 = COPY [[KMOVBkm]]
|
||||||
|
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:vk32 = COPY [[KMOVWkm]]
|
||||||
|
; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[COPY3]], [[COPY4]]
|
||||||
|
; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDrr]]
|
||||||
|
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
|
||||||
|
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
|
||||||
|
; CHECK: RET 0
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
|
||||||
|
%5 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg
|
||||||
|
%6 = MOVZX32rm16 %0, 1, $noreg, 0, $noreg
|
||||||
|
%7 = ADD32rr %5, %6, implicit-def dead $eflags
|
||||||
|
|
||||||
|
%3 = COPY %7
|
||||||
|
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||||
|
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_64bitext
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: false
|
||||||
|
regBankSelected: false
|
||||||
|
selected: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 1, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 2, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 3, class: vk64wm, preferred-register: '' }
|
||||||
|
- { id: 4, class: vr512, preferred-register: '' }
|
||||||
|
- { id: 5, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 6, class: gr64, preferred-register: '' }
|
||||||
|
- { id: 7, class: gr64, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '%0' }
|
||||||
|
- { reg: '$zmm0', virtual-reg: '%1' }
|
||||||
|
- { reg: '$zmm1', virtual-reg: '%2' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 0
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack:
|
||||||
|
stack:
|
||||||
|
constants:
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $rdi, $zmm0, $zmm1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_64bitext
|
||||||
|
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY $zmm1
|
||||||
|
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vk64 = COPY [[KMOVBkm]]
|
||||||
|
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, $noreg, 0, $noreg
|
||||||
|
; CHECK: [[COPY4:%[0-9]+]]:vk64 = COPY [[KMOVWkm]]
|
||||||
|
; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[COPY3]], [[COPY4]]
|
||||||
|
; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||||
|
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
|
||||||
|
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
|
||||||
|
; CHECK: RET 0
|
||||||
|
%0 = COPY $rdi
|
||||||
|
%1 = COPY $zmm0
|
||||||
|
%2 = COPY $zmm1
|
||||||
|
|
||||||
|
%5 = MOVZX64rm8 %0, 1, $noreg, 0, $noreg
|
||||||
|
%6 = MOVZX64rm16 %0, 1, $noreg, 0, $noreg
|
||||||
|
%7 = ADD64rr %5, %6, implicit-def dead $eflags
|
||||||
|
|
||||||
|
%3 = COPY %7
|
||||||
|
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||||
|
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_unused
|
||||||
|
alignment: 16
|
||||||
|
exposesReturnsTwice: false
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
selected: true
|
||||||
|
failedISel: false
|
||||||
|
tracksRegLiveness: true
|
||||||
|
hasWinCFI: false
|
||||||
|
callsEHReturn: false
|
||||||
|
callsUnwindInit: false
|
||||||
|
hasEHCatchret: false
|
||||||
|
hasEHScopes: false
|
||||||
|
hasEHFunclets: false
|
||||||
|
isOutlined: false
|
||||||
|
debugInstrRef: false
|
||||||
|
failsVerification: false
|
||||||
|
tracksDebugUserValues: false
|
||||||
|
registers:
|
||||||
|
# Note that this test is supposed to have registers without classes
|
||||||
|
- { id: 0, class: _, preferred-register: '' }
|
||||||
|
- { id: 1, class: _, preferred-register: '' }
|
||||||
|
- { id: 2, class: _, preferred-register: '' }
|
||||||
|
liveins:
|
||||||
|
- { reg: '$rdi', virtual-reg: '' }
|
||||||
|
frameInfo:
|
||||||
|
isFrameAddressTaken: false
|
||||||
|
isReturnAddressTaken: false
|
||||||
|
hasStackMap: false
|
||||||
|
hasPatchPoint: false
|
||||||
|
stackSize: 0
|
||||||
|
offsetAdjustment: 0
|
||||||
|
maxAlignment: 1
|
||||||
|
adjustsStack: false
|
||||||
|
hasCalls: false
|
||||||
|
stackProtector: ''
|
||||||
|
functionContext: ''
|
||||||
|
maxCallFrameSize: 4294967295
|
||||||
|
cvBytesOfCalleeSavedRegisters: 0
|
||||||
|
hasOpaqueSPAdjustment: false
|
||||||
|
hasVAStart: false
|
||||||
|
hasMustTailInVarArgFunc: false
|
||||||
|
hasTailCall: false
|
||||||
|
localFrameSize: 0
|
||||||
|
savePoint: ''
|
||||||
|
restorePoint: ''
|
||||||
|
fixedStack: []
|
||||||
|
stack: []
|
||||||
|
entry_values: []
|
||||||
|
callSites: []
|
||||||
|
debugValueSubstitutions: []
|
||||||
|
constants: []
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.1 (%ir-block.1):
|
||||||
|
liveins: $rdi
|
||||||
|
|
||||||
|
RET 0
|
||||||
|
|
||||||
|
...
|
||||||
@@ -2,6 +2,9 @@
|
|||||||
; RUN: opt -aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
|
; RUN: opt -aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
|
||||||
; RUN: opt -aarch64-lit -simplifycfg -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
|
; RUN: opt -aarch64-lit -simplifycfg -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
|
||||||
; RUN: opt -aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
|
; RUN: opt -aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
|
||||||
|
; RUN: opt -p aarch64-lit -aarch64-lit-verify -verify-dom-info -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s
|
||||||
|
; RUN: opt -passes='function(loop(aarch64-lit)),simplifycfg' -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S < %s | FileCheck %s --check-prefix=LOOP-DEL
|
||||||
|
; RUN: opt -p aarch64-lit -mtriple aarch64-unknown-linux-gnu -S < %s | FileCheck %s --check-prefix=NO-TRANSFORM
|
||||||
|
|
||||||
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
|
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) {
|
||||||
; CHECK-LABEL: define i32 @compare_bytes_simple(
|
; CHECK-LABEL: define i32 @compare_bytes_simple(
|
||||||
@@ -780,7 +783,7 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
|||||||
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
|
; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[MISMATCH_END]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY:%.*]] ]
|
||||||
; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
|
; CHECK-NEXT: [[INC:%.*]] = add i32 [[LEN_ADDR]], 1
|
||||||
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
|
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[MISMATCH_RESULT]], [[N]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
|
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
||||||
; CHECK: while.body:
|
; CHECK: while.body:
|
||||||
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
|
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[MISMATCH_RESULT]] to i64
|
||||||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
|
||||||
@@ -788,11 +791,14 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
|||||||
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
|
||||||
; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
|
; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
|
||||||
; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
|
; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP45]], [[TMP46]]
|
||||||
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
|
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
|
||||||
; CHECK: byte.compare:
|
; CHECK: byte.compare:
|
||||||
|
; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT]]
|
||||||
|
; CHECK: while.end.loopexit:
|
||||||
|
; CHECK-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
|
||||||
; CHECK-NEXT: br label [[WHILE_END]]
|
; CHECK-NEXT: br label [[WHILE_END]]
|
||||||
; CHECK: while.end:
|
; CHECK: while.end:
|
||||||
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[MISMATCH_RESULT]], [[WHILE_BODY]] ], [ [[MISMATCH_RESULT]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ], [ [[MISMATCH_RESULT]], [[BYTE_COMPARE]] ]
|
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
|
||||||
; CHECK-NEXT: ret i32 [[INC_LCSSA]]
|
; CHECK-NEXT: ret i32 [[INC_LCSSA]]
|
||||||
;
|
;
|
||||||
; LOOP-DEL-LABEL: define i32 @compare_bytes_extra_cmp(
|
; LOOP-DEL-LABEL: define i32 @compare_bytes_extra_cmp(
|
||||||
@@ -884,7 +890,7 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
|||||||
; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
|
; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[PH]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ]
|
||||||
; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
|
; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1
|
||||||
; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
|
; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
|
||||||
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
|
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
|
||||||
; NO-TRANSFORM: while.body:
|
; NO-TRANSFORM: while.body:
|
||||||
; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
|
; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64
|
||||||
; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
|
; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]]
|
||||||
@@ -892,9 +898,12 @@ define i32 @compare_bytes_extra_cmp(ptr %a, ptr %b, i32 %len, i32 %n, i32 %x) {
|
|||||||
; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
|
; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]]
|
||||||
; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
|
; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
|
||||||
; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
|
; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]]
|
||||||
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]]
|
; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END_LOOPEXIT]]
|
||||||
|
; NO-TRANSFORM: while.end.loopexit:
|
||||||
|
; NO-TRANSFORM-NEXT: [[INC_LCSSA1:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ], [ [[INC]], [[WHILE_BODY]] ]
|
||||||
|
; NO-TRANSFORM-NEXT: br label [[WHILE_END]]
|
||||||
; NO-TRANSFORM: while.end:
|
; NO-TRANSFORM: while.end:
|
||||||
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ], [ [[X]], [[ENTRY:%.*]] ]
|
; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[X]], [[ENTRY:%.*]] ], [ [[INC_LCSSA1]], [[WHILE_END_LOOPEXIT]] ]
|
||||||
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
|
; NO-TRANSFORM-NEXT: ret i32 [[INC_LCSSA]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
@@ -908,7 +917,7 @@ while.cond:
|
|||||||
%len.addr = phi i32 [ %len, %ph ], [ %inc, %while.body ]
|
%len.addr = phi i32 [ %len, %ph ], [ %inc, %while.body ]
|
||||||
%inc = add i32 %len.addr, 1
|
%inc = add i32 %len.addr, 1
|
||||||
%cmp.not = icmp eq i32 %inc, %n
|
%cmp.not = icmp eq i32 %inc, %n
|
||||||
br i1 %cmp.not, label %while.end, label %while.body
|
br i1 %cmp.not, label %while.end.loopexit, label %while.body
|
||||||
|
|
||||||
while.body:
|
while.body:
|
||||||
%idxprom = zext i32 %inc to i64
|
%idxprom = zext i32 %inc to i64
|
||||||
@@ -917,10 +926,14 @@ while.body:
|
|||||||
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
|
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom
|
||||||
%1 = load i8, ptr %arrayidx2
|
%1 = load i8, ptr %arrayidx2
|
||||||
%cmp.not2 = icmp eq i8 %0, %1
|
%cmp.not2 = icmp eq i8 %0, %1
|
||||||
br i1 %cmp.not2, label %while.cond, label %while.end
|
br i1 %cmp.not2, label %while.cond, label %while.end.loopexit
|
||||||
|
|
||||||
|
while.end.loopexit:
|
||||||
|
%inc.lcssa1 = phi i32 [ %inc, %while.cond ], [ %inc, %while.body ]
|
||||||
|
br label %while.end
|
||||||
|
|
||||||
while.end:
|
while.end:
|
||||||
%inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ], [ %x, %entry ]
|
%inc.lcssa = phi i32 [ %x, %entry ], [ %inc.lcssa1, %while.end.loopexit ]
|
||||||
ret i32 %inc.lcssa
|
ret i32 %inc.lcssa
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,5 +11,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
|
|||||||
MLIRXeGPUEnumsIncGen
|
MLIRXeGPUEnumsIncGen
|
||||||
|
|
||||||
LINK_LIBS PUBLIC
|
LINK_LIBS PUBLIC
|
||||||
|
MLIRDialectUtils
|
||||||
MLIRIR
|
MLIRIR
|
||||||
|
MLIRViewLikeInterface
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -285,8 +285,7 @@ module {
|
|||||||
%has_runtime = sparse_tensor.has_runtime_library
|
%has_runtime = sparse_tensor.has_runtime_library
|
||||||
scf.if %has_runtime {
|
scf.if %has_runtime {
|
||||||
// sparse_tensor.assemble copies buffers when running with the runtime
|
// sparse_tensor.assemble copies buffers when running with the runtime
|
||||||
// library. Deallocations are needed not needed when running in codgen
|
// library. Deallocations are not needed when running in codegen mode.
|
||||||
// mode.
|
|
||||||
bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO>
|
bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO>
|
||||||
bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32>
|
bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32>
|
||||||
bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR>
|
bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR>
|
||||||
|
|||||||
@@ -146,8 +146,7 @@ module {
|
|||||||
%has_runtime = sparse_tensor.has_runtime_library
|
%has_runtime = sparse_tensor.has_runtime_library
|
||||||
scf.if %has_runtime {
|
scf.if %has_runtime {
|
||||||
// sparse_tensor.assemble copies buffers when running with the runtime
|
// sparse_tensor.assemble copies buffers when running with the runtime
|
||||||
// library. Deallocations are needed not needed when running in codgen
|
// library. Deallocations are not needed when running in codegen mode.
|
||||||
// mode.
|
|
||||||
bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
|
bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC>
|
||||||
bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
|
bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR>
|
||||||
bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
|
bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense>
|
||||||
|
|||||||
Reference in New Issue
Block a user