Instead of a vector load, shuffle and then extract an element. Load the element from address with an offset.
pshufd $1, (%rdi), %xmm0
movd %xmm0, %eax
=>
movl 4(%rdi), %eax
llvm-svn: 51026
This commit is contained in:
@@ -607,6 +607,10 @@ public:
|
|||||||
/// isVerifiedDebugInfoDesc - Returns true if the specified SDOperand has
|
/// isVerifiedDebugInfoDesc - Returns true if the specified SDOperand has
|
||||||
/// been verified as a debug information descriptor.
|
/// been verified as a debug information descriptor.
|
||||||
bool isVerifiedDebugInfoDesc(SDOperand Op) const;
|
bool isVerifiedDebugInfoDesc(SDOperand Op) const;
|
||||||
|
|
||||||
|
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
|
||||||
|
/// element of the result of the vector shuffle.
|
||||||
|
SDOperand getShuffleScalarElt(const SDNode *N, unsigned Idx);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void RemoveNodeFromCSEMaps(SDNode *N);
|
void RemoveNodeFromCSEMaps(SDNode *N);
|
||||||
|
|||||||
@@ -4682,49 +4682,82 @@ SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SDOperand DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
SDOperand DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
|
||||||
|
// (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
|
||||||
|
// (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
|
||||||
|
// (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
|
||||||
|
|
||||||
|
// Perform only after legalization to ensure build_vector / vector_shuffle
|
||||||
|
// optimizations have already been done.
|
||||||
|
if (!AfterLegalize) return SDOperand();
|
||||||
|
|
||||||
SDOperand InVec = N->getOperand(0);
|
SDOperand InVec = N->getOperand(0);
|
||||||
SDOperand EltNo = N->getOperand(1);
|
SDOperand EltNo = N->getOperand(1);
|
||||||
|
|
||||||
// (vextract (v4f32 s2v (f32 load $addr)), 0) -> (f32 load $addr)
|
|
||||||
// (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr)
|
|
||||||
if (isa<ConstantSDNode>(EltNo)) {
|
if (isa<ConstantSDNode>(EltNo)) {
|
||||||
unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue();
|
unsigned Elt = cast<ConstantSDNode>(EltNo)->getValue();
|
||||||
bool NewLoad = false;
|
bool NewLoad = false;
|
||||||
if (Elt == 0) {
|
MVT::ValueType VT = InVec.getValueType();
|
||||||
MVT::ValueType VT = InVec.getValueType();
|
MVT::ValueType EVT = MVT::getVectorElementType(VT);
|
||||||
MVT::ValueType EVT = MVT::getVectorElementType(VT);
|
MVT::ValueType LVT = EVT;
|
||||||
MVT::ValueType LVT = EVT;
|
if (InVec.getOpcode() == ISD::BIT_CONVERT) {
|
||||||
unsigned NumElts = MVT::getVectorNumElements(VT);
|
MVT::ValueType BCVT = InVec.getOperand(0).getValueType();
|
||||||
if (InVec.getOpcode() == ISD::BIT_CONVERT) {
|
if (!MVT::isVector(BCVT)
|
||||||
MVT::ValueType BCVT = InVec.getOperand(0).getValueType();
|
|| (MVT::getSizeInBits(EVT) >
|
||||||
if (!MVT::isVector(BCVT) ||
|
MVT::getSizeInBits(MVT::getVectorElementType(BCVT))))
|
||||||
NumElts != MVT::getVectorNumElements(BCVT))
|
return SDOperand();
|
||||||
return SDOperand();
|
InVec = InVec.getOperand(0);
|
||||||
InVec = InVec.getOperand(0);
|
EVT = MVT::getVectorElementType(BCVT);
|
||||||
EVT = MVT::getVectorElementType(BCVT);
|
NewLoad = true;
|
||||||
NewLoad = true;
|
}
|
||||||
}
|
|
||||||
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
|
||||||
InVec.getOperand(0).getValueType() == EVT &&
|
|
||||||
ISD::isNormalLoad(InVec.getOperand(0).Val) &&
|
|
||||||
InVec.getOperand(0).hasOneUse()) {
|
|
||||||
LoadSDNode *LN0 = cast<LoadSDNode>(InVec.getOperand(0));
|
|
||||||
unsigned Align = LN0->getAlignment();
|
|
||||||
if (NewLoad) {
|
|
||||||
// Check the resultant load doesn't need a higher alignment than the
|
|
||||||
// original load.
|
|
||||||
unsigned NewAlign = TLI.getTargetMachine().getTargetData()->
|
|
||||||
getABITypeAlignment(MVT::getTypeForValueType(LVT));
|
|
||||||
if (!TLI.isOperationLegal(ISD::LOAD, LVT) || NewAlign > Align)
|
|
||||||
return SDOperand();
|
|
||||||
Align = NewAlign;
|
|
||||||
}
|
|
||||||
|
|
||||||
return DAG.getLoad(LVT, LN0->getChain(), LN0->getBasePtr(),
|
LoadSDNode *LN0 = NULL;
|
||||||
LN0->getSrcValue(), LN0->getSrcValueOffset(),
|
if (ISD::isNormalLoad(InVec.Val))
|
||||||
LN0->isVolatile(), Align);
|
LN0 = cast<LoadSDNode>(InVec);
|
||||||
|
else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
||||||
|
InVec.getOperand(0).getValueType() == EVT &&
|
||||||
|
ISD::isNormalLoad(InVec.getOperand(0).Val)) {
|
||||||
|
LN0 = cast<LoadSDNode>(InVec.getOperand(0));
|
||||||
|
} else if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||||
|
// (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
|
||||||
|
// =>
|
||||||
|
// (load $addr+1*size)
|
||||||
|
unsigned Idx = cast<ConstantSDNode>(InVec.getOperand(2).
|
||||||
|
getOperand(Elt))->getValue();
|
||||||
|
unsigned NumElems = InVec.getOperand(2).getNumOperands();
|
||||||
|
InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
|
||||||
|
if (InVec.getOpcode() == ISD::BIT_CONVERT)
|
||||||
|
InVec = InVec.getOperand(0);
|
||||||
|
if (ISD::isNormalLoad(InVec.Val)) {
|
||||||
|
LN0 = cast<LoadSDNode>(InVec);
|
||||||
|
Elt = (Idx < NumElems) ? Idx : Idx - NumElems;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!LN0 || !LN0->hasOneUse())
|
||||||
|
return SDOperand();
|
||||||
|
|
||||||
|
unsigned Align = LN0->getAlignment();
|
||||||
|
if (NewLoad) {
|
||||||
|
// Check the resultant load doesn't need a higher alignment than the
|
||||||
|
// original load.
|
||||||
|
unsigned NewAlign = TLI.getTargetMachine().getTargetData()->
|
||||||
|
getABITypeAlignment(MVT::getTypeForValueType(LVT));
|
||||||
|
if (!TLI.isOperationLegal(ISD::LOAD, LVT) || NewAlign > Align)
|
||||||
|
return SDOperand();
|
||||||
|
Align = NewAlign;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDOperand NewPtr = LN0->getBasePtr();
|
||||||
|
if (Elt) {
|
||||||
|
unsigned PtrOff = MVT::getSizeInBits(LVT) * Elt / 8;
|
||||||
|
MVT::ValueType PtrType = NewPtr.getValueType();
|
||||||
|
if (TLI.isBigEndian())
|
||||||
|
PtrOff = MVT::getSizeInBits(VT) / 8 - PtrOff;
|
||||||
|
NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr,
|
||||||
|
DAG.getConstant(PtrOff, PtrType));
|
||||||
|
}
|
||||||
|
return DAG.getLoad(LVT, LN0->getChain(), NewPtr,
|
||||||
|
LN0->getSrcValue(), LN0->getSrcValueOffset(),
|
||||||
|
LN0->isVolatile(), Align);
|
||||||
}
|
}
|
||||||
return SDOperand();
|
return SDOperand();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1838,6 +1838,28 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDOperand Op) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
|
||||||
|
/// element of the result of the vector shuffle.
|
||||||
|
SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) {
|
||||||
|
MVT::ValueType VT = N->getValueType(0);
|
||||||
|
SDOperand PermMask = N->getOperand(2);
|
||||||
|
unsigned NumElems = PermMask.getNumOperands();
|
||||||
|
SDOperand V = (Idx < NumElems) ? N->getOperand(0) : N->getOperand(1);
|
||||||
|
Idx %= NumElems;
|
||||||
|
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||||
|
return (Idx == 0)
|
||||||
|
? V.getOperand(0) : getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
|
||||||
|
}
|
||||||
|
if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||||
|
SDOperand Elt = PermMask.getOperand(Idx);
|
||||||
|
if (Elt.getOpcode() == ISD::UNDEF)
|
||||||
|
return getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
|
||||||
|
return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Elt)->getValue());
|
||||||
|
}
|
||||||
|
return SDOperand();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// getNode - Gets or creates the specified node.
|
/// getNode - Gets or creates the specified node.
|
||||||
///
|
///
|
||||||
SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) {
|
SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT) {
|
||||||
|
|||||||
@@ -545,35 +545,6 @@ swizzle:
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
These functions should produce the same code:
|
|
||||||
|
|
||||||
#include <emmintrin.h>
|
|
||||||
|
|
||||||
typedef long long __m128i __attribute__ ((__vector_size__ (16)));
|
|
||||||
|
|
||||||
int foo(__m128i* val) {
|
|
||||||
return __builtin_ia32_vec_ext_v4si(*val, 1);
|
|
||||||
}
|
|
||||||
int bar(__m128i* val) {
|
|
||||||
union vs {
|
|
||||||
__m128i *_v;
|
|
||||||
int* _s;
|
|
||||||
} v = {val};
|
|
||||||
return v._s[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
We currently produce (with -m64):
|
|
||||||
|
|
||||||
_foo:
|
|
||||||
pshufd $1, (%rdi), %xmm0
|
|
||||||
movd %xmm0, %eax
|
|
||||||
ret
|
|
||||||
_bar:
|
|
||||||
movl 4(%rdi), %eax
|
|
||||||
ret
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
We should materialize vector constants like "all ones" and "signbit" with
|
We should materialize vector constants like "all ones" and "signbit" with
|
||||||
code like:
|
code like:
|
||||||
|
|
||||||
|
|||||||
@@ -6182,26 +6182,6 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
|
|
||||||
/// element of the result of the vector shuffle.
|
|
||||||
static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
|
|
||||||
MVT::ValueType VT = N->getValueType(0);
|
|
||||||
SDOperand PermMask = N->getOperand(2);
|
|
||||||
unsigned NumElems = PermMask.getNumOperands();
|
|
||||||
SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
|
|
||||||
i %= NumElems;
|
|
||||||
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
|
||||||
return (i == 0)
|
|
||||||
? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
|
|
||||||
} else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
|
||||||
SDOperand Idx = PermMask.getOperand(i);
|
|
||||||
if (Idx.getOpcode() == ISD::UNDEF)
|
|
||||||
return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT));
|
|
||||||
return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
|
|
||||||
}
|
|
||||||
return SDOperand();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
|
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
|
||||||
/// node is a GlobalAddress + offset.
|
/// node is a GlobalAddress + offset.
|
||||||
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
|
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
|
||||||
@@ -6240,7 +6220,7 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
|
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
|
||||||
SDOperand Elt = getShuffleScalarElt(N, Index, DAG);
|
SDOperand Elt = DAG.getShuffleScalarElt(N, Index);
|
||||||
if (!Elt.Val ||
|
if (!Elt.Val ||
|
||||||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
|
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
; RUN: llvm-as %s -o - | llc -march=x86-64
|
; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2
|
||||||
|
|
||||||
define void @test(float* %R, <4 x float> %X) {
|
define void @test(float* %R, <4 x float> %X) nounwind {
|
||||||
%tmp = extractelement <4 x float> %X, i32 3
|
%tmp = extractelement <4 x float> %X, i32 3
|
||||||
store float %tmp, float* %R
|
store float %tmp, float* %R
|
||||||
ret void
|
ret void
|
||||||
|
|||||||
9
llvm/test/CodeGen/X86/extractelement-load.ll
Normal file
9
llvm/test/CodeGen/X86/extractelement-load.ll
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
; RUN: llvm-as %s -o - | llc -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
|
||||||
|
; RUN: llvm-as %s -o - | llc -march=x86-64 -mattr=+sse2 -mcpu=yonah | not grep movd
|
||||||
|
|
||||||
|
define i32 @t(<2 x i64>* %val) nounwind {
|
||||||
|
%tmp2 = load <2 x i64>* %val, align 16 ; <<2 x i64>> [#uses=1]
|
||||||
|
%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||||
|
%tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1]
|
||||||
|
ret i32 %tmp4
|
||||||
|
}
|
||||||
@@ -28,8 +28,7 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
|
|||||||
%s = insertelement <4 x float> %r, float %b, i32 3
|
%s = insertelement <4 x float> %r, float %b, i32 3
|
||||||
ret <4 x float> %s
|
ret <4 x float> %s
|
||||||
}
|
}
|
||||||
define <2 x double> @c(<2 x double>* %y)
|
define <2 x double> @c(<2 x double>* %y) nounwind {
|
||||||
{
|
|
||||||
%x = load <2 x double>* %y, align 8
|
%x = load <2 x double>* %y, align 8
|
||||||
%a = extractelement <2 x double> %x, i32 0
|
%a = extractelement <2 x double> %x, i32 0
|
||||||
%c = extractelement <2 x double> %x, i32 1
|
%c = extractelement <2 x double> %x, i32 1
|
||||||
@@ -37,8 +36,7 @@ define <2 x double> @c(<2 x double>* %y)
|
|||||||
%r = insertelement <2 x double> %p, double %a, i32 1
|
%r = insertelement <2 x double> %p, double %a, i32 1
|
||||||
ret <2 x double> %r
|
ret <2 x double> %r
|
||||||
}
|
}
|
||||||
define <2 x double> @d(<2 x double>* %y, <2 x double> %z)
|
define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
|
||||||
{
|
|
||||||
%x = load <2 x double>* %y, align 8
|
%x = load <2 x double>* %y, align 8
|
||||||
%a = extractelement <2 x double> %x, i32 1
|
%a = extractelement <2 x double> %x, i32 1
|
||||||
%c = extractelement <2 x double> %z, i32 1
|
%c = extractelement <2 x double> %z, i32 1
|
||||||
|
|||||||
@@ -1,29 +1,30 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
|
||||||
; RUN: grep extractps %t | count 1
|
; RUN: grep extractps %t | count 1
|
||||||
; RUN: grep pextrd %t | count 2
|
; RUN: grep pextrd %t | count 1
|
||||||
; RUN: grep pshufd %t | count 1
|
; RUN: not grep pshufd %t
|
||||||
|
; RUN: not grep movss %t
|
||||||
|
|
||||||
define void @t1(float* %R, <4 x float>* %P1) {
|
define void @t1(float* %R, <4 x float>* %P1) nounwind {
|
||||||
%X = load <4 x float>* %P1
|
%X = load <4 x float>* %P1
|
||||||
%tmp = extractelement <4 x float> %X, i32 3
|
%tmp = extractelement <4 x float> %X, i32 3
|
||||||
store float %tmp, float* %R
|
store float %tmp, float* %R
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @t2(<4 x float>* %P1) {
|
define float @t2(<4 x float>* %P1) nounwind {
|
||||||
%X = load <4 x float>* %P1
|
%X = load <4 x float>* %P1
|
||||||
%tmp = extractelement <4 x float> %X, i32 2
|
%tmp = extractelement <4 x float> %X, i32 2
|
||||||
ret float %tmp
|
ret float %tmp
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @t3(i32* %R, <4 x i32>* %P1) {
|
define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
|
||||||
%X = load <4 x i32>* %P1
|
%X = load <4 x i32>* %P1
|
||||||
%tmp = extractelement <4 x i32> %X, i32 3
|
%tmp = extractelement <4 x i32> %X, i32 3
|
||||||
store i32 %tmp, i32* %R
|
store i32 %tmp, i32* %R
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define i32 @t4(<4 x i32>* %P1) {
|
define i32 @t4(<4 x i32>* %P1) nounwind {
|
||||||
%X = load <4 x i32>* %P1
|
%X = load <4 x i32>* %P1
|
||||||
%tmp = extractelement <4 x i32> %X, i32 3
|
%tmp = extractelement <4 x i32> %X, i32 3
|
||||||
ret i32 %tmp
|
ret i32 %tmp
|
||||||
|
|||||||
Reference in New Issue
Block a user