In the past while, I've committed a number of patches in the PowerPC back end aimed at eliminating comparison instructions. However, this causes some failures in proprietary source and these issues are not observed in SPEC or any open source packages I've been able to run. As a result, I'm pulling the entire series and will refactor it to: - Have a single entry point for easy control - Have fine-grained control over which patterns we transform A side-effect of this is that test cases for these patches (and modified by them) are XFAIL-ed. This is a temporary measure as it is counter-productive to remove/modify these test cases and then have to modify them again when the refactored patch is recommitted. The failure will be investigated in parallel to the refactoring effort and the recommit will either have a fix for it or will leave this transformation off by default until the problem is resolved. llvm-svn: 314244
482 lines
12 KiB
LLVM
482 lines
12 KiB
LLVM
; XFAIL: *
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown | FileCheck %s
|
|
|
|
define zeroext i1 @all_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq i32 %P, 0
|
|
%b = icmp eq i32 %Q, 0
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @all_sign_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: nor 3, 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @all_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: li 5, -1
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: xor 3, 3, 5
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq i32 %P, -1
|
|
%b = icmp eq i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @all_sign_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = and i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 5
|
|
; CHECK-NEXT: xori 3, 3, 1
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne i32 %P, 0
|
|
%b = icmp ne i32 %Q, 0
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_set:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: li 5, -1
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: xor 3, 3, 5
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 5
|
|
; CHECK-NEXT: xori 3, 3, 1
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne i32 %P, -1
|
|
%b = icmp ne i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
define zeroext i1 @any_sign_bits_clear(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_clear:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: nor 3, 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 31
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
ret i1 %c
|
|
}
|
|
|
|
; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
|
|
define i32 @all_bits_clear_branch(i32* %P, i32* %Q) {
|
|
; CHECK-LABEL: all_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or. 3, 3, 4
|
|
; CHECK-NEXT: bne 0, .LBB8_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB8_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp eq i32* %P, null
|
|
%b = icmp eq i32* %Q, null
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, 0
|
|
; CHECK-NEXT: blt 0, .LBB9_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB9_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @all_bits_set_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: bne 0, .LBB10_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB10_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp eq i32 %P, -1
|
|
%b = icmp eq i32 %Q, -1
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: all_sign_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: bgt 0, .LBB11_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB11_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = and i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
|
|
define i32 @any_bits_set_branch(i32* %P, i32* %Q) {
|
|
; CHECK-LABEL: any_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or. 3, 3, 4
|
|
; CHECK-NEXT: beq 0, .LBB12_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB12_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp ne i32* %P, null
|
|
%b = icmp ne i32* %Q, null
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_set_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: or 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: bgt 0, .LBB13_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB13_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp slt i32 %P, 0
|
|
%b = icmp slt i32 %Q, 0
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @any_bits_clear_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, -1
|
|
; CHECK-NEXT: beq 0, .LBB14_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB14_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp ne i32 %P, -1
|
|
%b = icmp ne i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) {
|
|
; CHECK-LABEL: any_sign_bits_clear_branch:
|
|
; CHECK: # BB#0: # %entry
|
|
; CHECK-NEXT: and 3, 3, 4
|
|
; CHECK-NEXT: cmpwi 0, 3, 0
|
|
; CHECK-NEXT: blt 0, .LBB15_2
|
|
; CHECK-NEXT: # BB#1: # %bb1
|
|
; CHECK-NEXT: li 3, 4
|
|
; CHECK-NEXT: blr
|
|
; CHECK-NEXT: .LBB15_2: # %return
|
|
; CHECK-NEXT: li 3, 192
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%a = icmp sgt i32 %P, -1
|
|
%b = icmp sgt i32 %Q, -1
|
|
%c = or i1 %a, %b
|
|
br i1 %c, label %bb1, label %return
|
|
|
|
bb1:
|
|
ret i32 4
|
|
|
|
return:
|
|
ret i32 192
|
|
}
|
|
|
|
define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq <4 x i32> %P, zeroinitializer
|
|
%b = icmp eq <4 x i32> %Q, zeroinitializer
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_sign_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp eq <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp eq <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: all_sign_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 4, 2
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt <4 x i32> %P, zeroinitializer
|
|
%b = icmp slt <4 x i32> %Q, zeroinitializer
|
|
%c = and <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: xxlnor 34, 34, 34
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne <4 x i32> %P, zeroinitializer
|
|
%b = icmp ne <4 x i32> %Q, zeroinitializer
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_sign_bits_set_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xxlxor 36, 36, 36
|
|
; CHECK-NEXT: xxlor 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 4, 2
|
|
; CHECK-NEXT: blr
|
|
%a = icmp slt <4 x i32> %P, zeroinitializer
|
|
%b = icmp slt <4 x i32> %Q, zeroinitializer
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpequw 2, 2, 4
|
|
; CHECK-NEXT: xxlnor 34, 34, 34
|
|
; CHECK-NEXT: blr
|
|
%a = icmp ne <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp ne <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
|
|
; CHECK-LABEL: any_sign_bits_clear_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vspltisb 4, -1
|
|
; CHECK-NEXT: xxland 34, 34, 35
|
|
; CHECK-NEXT: vcmpgtsw 2, 2, 4
|
|
; CHECK-NEXT: blr
|
|
%a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%c = or <4 x i1> %a, %b
|
|
ret <4 x i1> %c
|
|
}
|
|
|
|
define zeroext i1 @ne_neg1_and_ne_zero(i64 %x) {
|
|
; CHECK-LABEL: ne_neg1_and_ne_zero:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: addi 3, 3, 1
|
|
; CHECK-NEXT: li 4, 0
|
|
; CHECK-NEXT: li 12, 1
|
|
; CHECK-NEXT: cmpldi 3, 1
|
|
; CHECK-NEXT: isel 3, 12, 4, 1
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp ne i64 %x, -1
|
|
%cmp2 = icmp ne i64 %x, 0
|
|
%and = and i1 %cmp1, %cmp2
|
|
ret i1 %and
|
|
}
|
|
|
|
; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
|
|
|
|
define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
|
|
; CHECK-LABEL: and_eq:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xor 5, 5, 6
|
|
; CHECK-NEXT: xor 3, 3, 4
|
|
; CHECK-NEXT: or 3, 3, 5
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 5
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp eq i16 %a, %b
|
|
%cmp2 = icmp eq i16 %c, %d
|
|
%and = and i1 %cmp1, %cmp2
|
|
ret i1 %and
|
|
}
|
|
|
|
define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
; CHECK-LABEL: or_ne:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: xor 5, 5, 6
|
|
; CHECK-NEXT: xor 3, 3, 4
|
|
; CHECK-NEXT: or 3, 3, 5
|
|
; CHECK-NEXT: cntlzw 3, 3
|
|
; CHECK-NEXT: srwi 3, 3, 5
|
|
; CHECK-NEXT: xori 3, 3, 1
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp ne i32 %a, %b
|
|
%cmp2 = icmp ne i32 %c, %d
|
|
%or = or i1 %cmp1, %cmp2
|
|
ret i1 %or
|
|
}
|
|
|
|
; This should not be transformed because vector compares + bitwise logic are faster.
|
|
|
|
define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
|
|
; CHECK-LABEL: and_eq_vec:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vcmpequw 2, 2, 3
|
|
; CHECK-NEXT: vcmpequw 19, 4, 5
|
|
; CHECK-NEXT: xxland 34, 34, 51
|
|
; CHECK-NEXT: blr
|
|
%cmp1 = icmp eq <4 x i32> %a, %b
|
|
%cmp2 = icmp eq <4 x i32> %c, %d
|
|
%and = and <4 x i1> %cmp1, %cmp2
|
|
ret <4 x i1> %and
|
|
}
|
|
|