Bump version to 5.0.2

llvm-svn: 328729
Backporting r325647 and r325713:
2018-03-28 20:25:17 +00:00 · 2018-03-16 22:28:08 +00:00 · 2018-03-16 22:21:00 +00:00 · 2018-03-16 22:14:38 +00:00 · 2018-02-14 00:34:35 +00:00 · 2018-02-14 00:34:13 +00:00
78 changed files with 3532 additions and 72 deletions
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -280,6 +280,10 @@ def warn_target_unsupported_nanlegacy : Warning<
 def warn_target_unsupported_compact_branches : Warning<
  "ignoring '-mcompact-branches=' option because the '%0' architecture does not"
  " support it">, InGroup<UnsupportedCB>;
+def err_drv_unsupported_indirect_jump_opt : Error<
+  "'-mindirect-jump=%0' is unsupported with the '%1' architecture">;
+def err_drv_unknown_indirect_jump_opt : Error<
+  "unknown '-mindirect-jump=' option '%0'">;

 def warn_drv_unable_to_find_directory_expected : Warning<
  "unable to find %0 directory, expected to be in '%1'">,
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2016,6 +2016,9 @@ def mcheck_zero_division : Flag<["-"], "mcheck-zero-division">, Group<m_Group>;
 def mno_check_zero_division : Flag<["-"], "mno-check-zero-division">,
                              Group<m_Group>;
 def mcompact_branches_EQ : Joined<["-"], "mcompact-branches=">, Group<m_Group>;
+def mindirect_jump_EQ : Joined<["-"], "mindirect-jump=">,
+  Group<m_Group>,
+  HelpText<"Change indirect jump instructions to inhibit speculation">;
 def mdsp : Flag<["-"], "mdsp">, Group<m_Group>;
 def mno_dsp : Flag<["-"], "mno-dsp">, Group<m_Group>;
 def mdspr2 : Flag<["-"], "mdspr2">, Group<m_Group>;
@@ -2422,6 +2425,11 @@ def mhexagon_hvx_double : Flag<["-"], "mhvx-double">, Group<m_hexagon_Features_G
 def mno_hexagon_hvx_double : Flag<["-"], "mno-hvx-double">, Group<m_hexagon_Features_Group>,
    Flags<[CC1Option]>, HelpText<"Disable Hexagon Double Vector eXtensions">;

+def mretpoline : Flag<["-"], "mretpoline">, Group<m_x86_Features_Group>;
+def mno_retpoline : Flag<["-"], "mno-retpoline">, Group<m_x86_Features_Group>;
+def mretpoline_external_thunk : Flag<["-"], "mretpoline-external-thunk">, Group<m_x86_Features_Group>;
+def mno_retpoline_external_thunk : Flag<["-"], "mno-retpoline-external-thunk">, Group<m_x86_Features_Group>;
+
 // These are legacy user-facing driver-level option spellings. They are always
 // aliases for options that are spelled using the more common Unix / GNU flag
 // style of double-dash and equals-joined flags.
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -2691,6 +2691,8 @@ class X86TargetInfo : public TargetInfo {
  bool HasCLWB = false;
  bool HasMOVBE = false;
  bool HasPREFETCHWT1 = false;
+  bool HasRetpoline = false;
+  bool HasRetpolineExternalThunk = false;

  /// \brief Enumeration of all of the X86 CPUs supported by Clang.
  ///
@@ -3821,6 +3823,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
      HasPREFETCHWT1 = true;
    } else if (Feature == "+clzero") {
      HasCLZERO = true;
+    } else if (Feature == "+retpoline") {
+      HasRetpoline = true;
+    } else if (Feature == "+retpoline-external-thunk") {
+      HasRetpolineExternalThunk = true;
    }

    X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -4285,6 +4291,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
      .Case("rdrnd", HasRDRND)
      .Case("rdseed", HasRDSEED)
      .Case("rtm", HasRTM)
+      .Case("retpoline", HasRetpoline)
+      .Case("retpoline-external-thunk", HasRetpolineExternalThunk)
      .Case("sgx", HasSGX)
      .Case("sha", HasSHA)
      .Case("sse", SSELevel >= SSE1)
@@ -8057,6 +8065,7 @@ class MipsTargetInfo : public TargetInfo {
  } DspRev;
  bool HasMSA;
  bool DisableMadd4;
+  bool UseIndirectJumpHazard;

 protected:
  bool HasFP64;
@@ -8067,7 +8076,8 @@ public:
      : TargetInfo(Triple), IsMips16(false), IsMicromips(false),
        IsNan2008(false), IsSingleFloat(false), IsNoABICalls(false),
        CanUseBSDABICalls(false), FloatABI(HardFloat), DspRev(NoDSP),
-        HasMSA(false), DisableMadd4(false), HasFP64(false) {
+        HasMSA(false), DisableMadd4(false), UseIndirectJumpHazard(false),
+        HasFP64(false) {
    TheCXXABI.set(TargetCXXABI::GenericMIPS);

    setABI((getTriple().getArch() == llvm::Triple::mips ||
@@ -8490,6 +8500,8 @@ public:
        IsNan2008 = false;
      else if (Feature == "+noabicalls")
        IsNoABICalls = true;
+      else if (Feature == "+use-indirect-jump-hazard")
+        UseIndirectJumpHazard = true;
    }

    setDataLayout();
--- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -302,6 +302,28 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
  AddTargetFeature(Args, Features, options::OPT_mlong_calls,
                   options::OPT_mno_long_calls, "long-calls");
  AddTargetFeature(Args, Features, options::OPT_mmt, options::OPT_mno_mt,"mt");
+
+  if (Arg *A = Args.getLastArg(options::OPT_mindirect_jump_EQ)) {
+    StringRef Val = StringRef(A->getValue());
+    if (Val == "hazard") {
+      Arg *B =
+          Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips);
+      Arg *C = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16);
+
+      if (B && B->getOption().matches(options::OPT_mmicromips))
+        D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
+            << "hazard" << "micromips";
+      else if (C && C->getOption().matches(options::OPT_mips16))
+        D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
+            << "hazard" << "mips16";
+      else if (mips::supportsIndirectJumpHazardBarrier(CPUName))
+        Features.push_back("+use-indirect-jump-hazard");
+      else
+        D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
+            << "hazard" << CPUName;
+    } else
+      D.Diag(diag::err_drv_unknown_indirect_jump_opt) << Val;
+  }
 }

 mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
@@ -327,6 +349,23 @@ mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
      .Default(NanLegacy);
 }

+bool mips::supportsIndirectJumpHazardBarrier(StringRef &CPU) {
+  // Supporting the hazard barrier method of dealing with indirect
+  // jumps requires MIPSR2 support.
+  return llvm::StringSwitch<bool>(CPU)
+      .Case("mips32r2", true)
+      .Case("mips32r3", true)
+      .Case("mips32r5", true)
+      .Case("mips32r6", true)
+      .Case("mips64r2", true)
+      .Case("mips64r3", true)
+      .Case("mips64r5", true)
+      .Case("mips64r6", true)
+      .Case("octeon", true)
+      .Case("p5600", true)
+      .Default(false);
+}
+
 bool mips::hasCompactBranches(StringRef &CPU) {
  // mips32r6 and mips64r6 have compact branches.
  return llvm::StringSwitch<bool>(CPU)
--- a/clang/lib/Driver/ToolChains/Arch/Mips.h
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.h
@@ -53,6 +53,7 @@ bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
 bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
                   StringRef CPUName, StringRef ABIName,
                   mips::FloatABI FloatABI);
+bool supportsIndirectJumpHazardBarrier(StringRef &CPU);

 } // end namespace mips
 } // end namespace target
--- a/clang/test/Driver/mips-features.c
+++ b/clang/test/Driver/mips-features.c
@@ -270,3 +270,9 @@
 // LONG-CALLS-ON: "-target-feature" "+long-calls"
 // LONG-CALLS-OFF: "-target-feature" "-long-calls"
 // LONG-CALLS-DEF-NOT: "long-calls"
+
+// -mindirect-jump=hazard
+// RUN: %clang -target mips-unknown-linux-gnu -### -c %s \
+// RUN:        -mindirect-jump=hazard 2>&1 \
+// RUN:   | FileCheck --check-prefix=INDIRECT-BH %s
+// INDIRECT-BH: "-target-feature" "+use-indirect-jump-hazard"
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -84,3 +84,13 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
 // CLZERO: "-target-feature" "+clzero"
 // NO-CLZERO: "-target-feature" "-clzero"
+
+// RUN: %clang -target i386-linux-gnu -mretpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE %s
+// RUN: %clang -target i386-linux-gnu -mno-retpoline %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RETPOLINE %s
+// RETPOLINE: "-target-feature" "+retpoline"
+// NO-RETPOLINE: "-target-feature" "-retpoline"
+
+// RUN: %clang -target i386-linux-gnu -mretpoline -mretpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RETPOLINE-EXTERNAL-THUNK %s
+// RUN: %clang -target i386-linux-gnu -mretpoline -mno-retpoline-external-thunk %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RETPOLINE-EXTERNAL-THUNK %s
+// RETPOLINE-EXTERNAL-THUNK: "-target-feature" "+retpoline-external-thunk"
+// NO-RETPOLINE-EXTERNAL-THUNK: "-target-feature" "-retpoline-external-thunk"
--- a/lld/ELF/Arch/Mips.cpp
+++ b/lld/ELF/Arch/Mips.cpp
@@ -203,7 +203,8 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {

  write32<E>(Buf + 16, 0x03e07825); // move  $15, $31
  write32<E>(Buf + 20, 0x0018c082); // srl   $24, $24, 2
-  write32<E>(Buf + 24, 0x0320f809); // jalr  $25
+  uint32_t JalrInst = Config->ZHazardplt ? 0x0320fc09 : 0x0320f809;
+  write32<E>(Buf + 24, JalrInst); // jalr.hb $25 or jalr $25
  write32<E>(Buf + 28, 0x2718fffe); // subu  $24, $24, 2

  uint64_t GotPlt = InX::GotPlt->getVA();
@@ -217,10 +218,14 @@ void MIPS<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
                          uint64_t PltEntryAddr, int32_t Index,
                          unsigned RelOff) const {
  const endianness E = ELFT::TargetEndianness;
+  uint32_t JrInst = isMipsR6<ELFT>()
+                        ? (Config->ZHazardplt ? 0x03200409 : 0x03200009)
+                        : (Config->ZHazardplt ? 0x03200408 : 0x03200008);
+
  write32<E>(Buf, 0x3c0f0000);     // lui   $15, %hi(.got.plt entry)
  write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15)
                                   // jr    $25
-  write32<E>(Buf + 8, isMipsR6<ELFT>() ? 0x03200009 : 0x03200008);
+  write32<E>(Buf + 8, JrInst);     // jr  $25 / jr.hb $25
  write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry)
  writeMipsHi16<E>(Buf, GotPltEntryAddr);
  writeMipsLo16<E>(Buf + 4, GotPltEntryAddr);
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -21,7 +21,7 @@ using namespace lld;
 using namespace lld::elf;

 namespace {
-class X86 final : public TargetInfo {
+class X86 : public TargetInfo {
 public:
  X86();
  RelExpr getRelExpr(uint32_t Type, const SymbolBody &S,
@@ -358,7 +358,153 @@ void X86::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const {
  memcpy(Loc - 2, Inst, sizeof(Inst));
 }

-TargetInfo *elf::getX86TargetInfo() {
-  static X86 Target;
-  return &Target;
+namespace {
+class RetpolinePic : public X86 {
+public:
+  RetpolinePic();
+  void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override;
+  void writePltHeader(uint8_t *Buf) const override;
+  void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+                int32_t Index, unsigned RelOff) const override;
+};
+
+class RetpolineNoPic : public X86 {
+public:
+  RetpolineNoPic();
+  void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override;
+  void writePltHeader(uint8_t *Buf) const override;
+  void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+                int32_t Index, unsigned RelOff) const override;
+};
+} // namespace
+
+RetpolinePic::RetpolinePic() {
+  PltHeaderSize = 48;
+  PltEntrySize = 32;
+}
+
+void RetpolinePic::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {
+  write32le(Buf, S.getPltVA() + 17);
+}
+
+void RetpolinePic::writePltHeader(uint8_t *Buf) const {
+  const uint8_t Insn[] = {
+      0xff, 0xb3, 0,    0,    0,    0,          // 0:    pushl GOTPLT+4(%ebx)
+      0x50,                                     // 6:    pushl %eax
+      0x8b, 0x83, 0,    0,    0,    0,          // 7:    mov GOTPLT+8(%ebx), %eax
+      0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
+      0xf3, 0x90,                               // 12: loop: pause
+      0x0f, 0xae, 0xe8,                         // 14:   lfence
+      0xeb, 0xf9,                               // 17:   jmp loop
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
+      0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
+      0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
+      0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
+      0x89, 0xc8,                               // 2b:   mov %ecx, %eax
+      0x59,                                     // 2d:   pop %ecx
+      0xc3,                                     // 2e:   ret
+      0xcc,                                     // 2f:   int3
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltHeaderSize);
+
+  uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
+  uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
+  write32le(Buf + 2, GotPlt + 4);
+  write32le(Buf + 9, GotPlt + 8);
+}
+
+void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+                            uint64_t PltEntryAddr, int32_t Index,
+                            unsigned RelOff) const {
+  const uint8_t Insn[] = {
+      0x50,                   // pushl %eax
+      0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
+      0xe8, 0,    0, 0, 0,    // call plt+0x20
+      0xe9, 0,    0, 0, 0,    // jmp plt+0x12
+      0x68, 0,    0, 0, 0,    // pushl $reloc_offset
+      0xe9, 0,    0, 0, 0,    // jmp plt+0
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltEntrySize);
+
+  uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
+  write32le(Buf + 3, GotPltEntryAddr - Ebx);
+  write32le(Buf + 8, -Index * PltEntrySize - PltHeaderSize - 12 + 32);
+  write32le(Buf + 13, -Index * PltEntrySize - PltHeaderSize - 17 + 18);
+  write32le(Buf + 18, RelOff);
+  write32le(Buf + 23, -Index * PltEntrySize - PltHeaderSize - 27);
+}
+
+RetpolineNoPic::RetpolineNoPic() {
+  PltHeaderSize = 48;
+  PltEntrySize = 32;
+}
+
+void RetpolineNoPic::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {
+  write32le(Buf, S.getPltVA() + 16);
+}
+
+void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
+  const uint8_t PltData[] = {
+      0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
+      0x50,                            // 6:    pushl %eax
+      0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
+      0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
+      0xf3, 0x90,                      // 11: loop: pause
+      0x0f, 0xae, 0xe8,                // 13:   lfence
+      0xeb, 0xf9,                      // 16:   jmp loop
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
+      0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
+      0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
+      0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
+      0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
+      0x89, 0xc8,                      // 2b:   mov %ecx, %eax
+      0x59,                            // 2d:   pop %ecx
+      0xc3,                            // 2e:   ret
+      0xcc,                            // 2f:   int3
+  };
+  memcpy(Buf, PltData, sizeof(PltData));
+  assert(sizeof(PltData) == TargetInfo::PltHeaderSize);
+
+  uint32_t GotPlt = InX::GotPlt->getVA();
+  write32le(Buf + 2, GotPlt + 4);
+  write32le(Buf + 8, GotPlt + 8);
+}
+
+void RetpolineNoPic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+                              uint64_t PltEntryAddr, int32_t Index,
+                              unsigned RelOff) const {
+  const uint8_t Insn[] = {
+      0x50,             // 0:  pushl %eax
+      0xa1, 0, 0, 0, 0, // 1:  mov foo_in_GOT, %eax
+      0xe8, 0, 0, 0, 0, // 6:  call plt+0x20
+      0xe9, 0, 0, 0, 0, // b:  jmp plt+0x11
+      0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
+      0xe9, 0, 0, 0, 0, // 15: jmp plt+0
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltEntrySize);
+
+  write32le(Buf + 2, GotPltEntryAddr);
+  write32le(Buf + 7, -Index * PltEntrySize - PltHeaderSize - 11 + 32);
+  write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16 + 17);
+  write32le(Buf + 17, RelOff);
+  write32le(Buf + 22, -Index * PltEntrySize - PltHeaderSize - 26);
+}
+
+TargetInfo *elf::getX86TargetInfo() {
+  if (Config->ZRetpolineplt) {
+    if (Config->Pic) {
+      static RetpolinePic T;
+      return &T;
+    }
+    static RetpolineNoPic T;
+    return &T;
+  }
+
+  static X86 T;
+  return &T;
 }
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -23,7 +23,7 @@ using namespace lld;
 using namespace lld::elf;

 namespace {
-template <class ELFT> class X86_64 final : public TargetInfo {
+template <class ELFT> class X86_64 : public TargetInfo {
 public:
  X86_64();
  RelExpr getRelExpr(uint32_t Type, const SymbolBody &S,
@@ -462,12 +462,136 @@ void X86_64<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const {
  write32le(Loc - 1, Val + 1);
 }

-TargetInfo *elf::getX32TargetInfo() {
-  static X86_64<ELF32LE> Target;
-  return &Target;
+namespace {
+template <class ELFT> class Retpoline : public X86_64<ELFT> {
+public:
+  Retpoline();
+  void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override;
+  void writePltHeader(uint8_t *Buf) const override;
+  void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+                int32_t Index, unsigned RelOff) const override;
+};
+
+template <class ELFT> class RetpolineZNow : public X86_64<ELFT> {
+public:
+  RetpolineZNow();
+  void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override {}
+  void writePltHeader(uint8_t *Buf) const override;
+  void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
+                int32_t Index, unsigned RelOff) const override;
+};
+} // namespace
+
+template <class ELFT> Retpoline<ELFT>::Retpoline() {
+  TargetInfo::PltHeaderSize = 48;
+  TargetInfo::PltEntrySize = 32;
 }

-TargetInfo *elf::getX86_64TargetInfo() {
-  static X86_64<ELF64LE> Target;
-  return &Target;
+template <class ELFT>
+void Retpoline<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {
+  write32le(Buf, S.getPltVA() + 17);
 }
+
+template <class ELFT> void Retpoline<ELFT>::writePltHeader(uint8_t *Buf) const {
+  const uint8_t Insn[] = {
+      0xff, 0x35, 0,    0,    0,    0,          // 0:    pushq GOTPLT+8(%rip)
+      0x4c, 0x8b, 0x1d, 0,    0,    0,    0,    // 6:    mov GOTPLT+16(%rip), %r11
+      0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    callq next
+      0xf3, 0x90,                               // 12: loop: pause
+      0x0f, 0xae, 0xe8,                         // 14:   lfence
+      0xeb, 0xf9,                               // 17:   jmp loop
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
+      0x4c, 0x89, 0x1c, 0x24,                   // 20: next: mov %r11, (%rsp)
+      0xc3,                                     // 24:   ret
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; .align 16
+      0xcc, 0xcc, 0xcc, 0xcc,
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltHeaderSize);
+
+  uint64_t GotPlt = InX::GotPlt->getVA();
+  uint64_t Plt = InX::Plt->getVA();
+  write32le(Buf + 2, GotPlt - Plt - 6 + 8);
+  write32le(Buf + 9, GotPlt - Plt - 13 + 16);
+}
+
+template <class ELFT>
+void Retpoline<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+                               uint64_t PltEntryAddr, int32_t Index,
+                               unsigned RelOff) const {
+  const uint8_t Insn[] = {
+      0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0:  mov foo@GOTPLT(%rip), %r11
+      0xe8, 0,    0,    0, 0,       // 7:  callq plt+0x20
+      0xe9, 0,    0,    0, 0,       // c:  jmp plt+0x12
+      0x68, 0,    0,    0, 0,       // 11: pushq <relocation index>
+      0xe9, 0,    0,    0, 0,       // 16: jmp plt+0
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; .align 16
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltEntrySize);
+
+  uint64_t Off = TargetInfo::PltHeaderSize + TargetInfo::PltEntrySize * Index;
+
+  write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
+  write32le(Buf + 8, -Off - 12 + 32);
+  write32le(Buf + 13, -Off - 17 + 18);
+  write32le(Buf + 18, Index);
+  write32le(Buf + 23, -Off - 27);
+}
+
+template <class ELFT> RetpolineZNow<ELFT>::RetpolineZNow() {
+  TargetInfo::PltHeaderSize = 32;
+  TargetInfo::PltEntrySize = 16;
+}
+
+template <class ELFT>
+void RetpolineZNow<ELFT>::writePltHeader(uint8_t *Buf) const {
+  const uint8_t Insn[] = {
+      0xe8, 0x0b, 0x00, 0x00, 0x00, // 0:    call next
+      0xf3, 0x90,                   // 5:  loop: pause
+      0x0f, 0xae, 0xe8,             // 7:    lfence
+      0xeb, 0xf9,                   // a:    jmp loop
+      0xcc, 0xcc, 0xcc, 0xcc,       // c:    int3; .align 16
+      0x4c, 0x89, 0x1c, 0x24,       // 10: next: mov %r11, (%rsp)
+      0xc3,                         // 14:   ret
+      0xcc,                         // 15: int3; .align 16
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+      0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltHeaderSize);
+}
+
+template <class ELFT>
+void RetpolineZNow<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
+                                   uint64_t PltEntryAddr, int32_t Index,
+                                   unsigned RelOff) const {
+  const uint8_t Insn[] = {
+      0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r11
+      0xe9, 0,    0,    0, 0,       // jmp plt+0
+      0xcc, 0xcc, 0xcc, 0xcc,       // int3; .align 16
+  };
+  memcpy(Buf, Insn, sizeof(Insn));
+  assert(sizeof(Insn) == TargetInfo::PltEntrySize);
+
+  write32le(Buf + 3, GotPltEntryAddr - PltEntryAddr - 7);
+  write32le(Buf + 8,
+            -Index * TargetInfo::PltEntrySize - TargetInfo::PltHeaderSize - 12);
+}
+
+template <class ELFT> TargetInfo *getTargetInfo() {
+  if (Config->ZRetpolineplt) {
+    if (Config->ZNow) {
+      static RetpolineZNow<ELFT> T;
+      return &T;
+    }
+    static Retpoline<ELFT> T;
+    return &T;
+  }
+
+  static X86_64<ELFT> T;
+  return &T;
+}
+
+TargetInfo *elf::getX32TargetInfo() { return getTargetInfo<ELF32LE>(); }
+TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo<ELF64LE>(); }
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -148,6 +148,7 @@ struct Configuration {
  bool WarnMissingEntry;
  bool ZCombreloc;
  bool ZExecstack;
+  bool ZHazardplt;
  bool ZNocopyreloc;
  bool ZNodelete;
  bool ZNodlopen;
@@ -156,6 +157,7 @@ struct Configuration {
  bool ZRelro;
  bool ZRodynamic;
  bool ZText;
+  bool ZRetpolineplt;
  bool ExitEarly;
  bool ZWxneeded;
  DiscardPolicy Discard;
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -682,12 +682,14 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
  Config->WarnCommon = Args.hasArg(OPT_warn_common);
  Config->ZCombreloc = !hasZOption(Args, "nocombreloc");
  Config->ZExecstack = hasZOption(Args, "execstack");
+  Config->ZHazardplt = hasZOption(Args, "hazardplt");
  Config->ZNocopyreloc = hasZOption(Args, "nocopyreloc");
  Config->ZNodelete = hasZOption(Args, "nodelete");
  Config->ZNodlopen = hasZOption(Args, "nodlopen");
  Config->ZNow = hasZOption(Args, "now");
  Config->ZOrigin = hasZOption(Args, "origin");
  Config->ZRelro = !hasZOption(Args, "norelro");
+  Config->ZRetpolineplt = hasZOption(Args, "retpolineplt");
  Config->ZRodynamic = hasZOption(Args, "rodynamic");
  Config->ZStackSize = getZOptionValue(Args, "stack-size", 0);
  Config->ZText = !hasZOption(Args, "notext");
--- a/lld/test/ELF/i386-retpoline-nopic.s
+++ b/lld/test/ELF/i386-retpoline-nopic.s
@@ -0,0 +1,65 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %s -o %t1.o
+// RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/shared.s -o %t2.o
+// RUN: ld.lld -shared %t2.o -o %t2.so
+
+// RUN: ld.lld %t1.o %t2.so -o %t.exe -z retpolineplt
+// RUN: llvm-objdump -d -s %t.exe | FileCheck %s
+
+// CHECK:      Disassembly of section .plt:
+// CHECK-NEXT: .plt:
+// CHECK-NEXT: 11010:       ff 35 04 20 01 00       pushl   73732
+// CHECK-NEXT: 11016:       50      pushl   %eax
+// CHECK-NEXT: 11017:       a1 08 20 01 00  movl    73736, %eax
+// CHECK-NEXT: 1101c:       e8 0f 00 00 00  calll   15 <.plt+0x20>
+// CHECK-NEXT: 11021:       f3 90   pause
+// CHECK-NEXT: 11023:       0f ae e8        lfence
+// CHECK-NEXT: 11026:       eb f9   jmp     -7 <.plt+0x11>
+// CHECK-NEXT: 11028:       cc      int3
+// CHECK-NEXT: 11029:       cc      int3
+// CHECK-NEXT: 1102a:       cc      int3
+// CHECK-NEXT: 1102b:       cc      int3
+// CHECK-NEXT: 1102c:       cc      int3
+// CHECK-NEXT: 1102d:       cc      int3
+// CHECK-NEXT: 1102e:       cc      int3
+// CHECK-NEXT: 1102f:       cc      int3
+// CHECK-NEXT: 11030:       89 0c 24        movl    %ecx, (%esp)
+// CHECK-NEXT: 11033:       8b 4c 24 04     movl    4(%esp), %ecx
+// CHECK-NEXT: 11037:       89 44 24 04     movl    %eax, 4(%esp)
+// CHECK-NEXT: 1103b:       89 c8   movl    %ecx, %eax
+// CHECK-NEXT: 1103d:       59      popl    %ecx
+// CHECK-NEXT: 1103e:       c3      retl
+// CHECK-NEXT: 1103f:       cc      int3
+// CHECK-NEXT: 11040:       50      pushl   %eax
+// CHECK-NEXT: 11041:       a1 0c 20 01 00  movl    73740, %eax
+// CHECK-NEXT: 11046:       e8 e5 ff ff ff  calll   -27 <.plt+0x20>
+// CHECK-NEXT: 1104b:       e9 d1 ff ff ff  jmp     -47 <.plt+0x11>
+// CHECK-NEXT: 11050:       68 00 00 00 00  pushl   $0
+// CHECK-NEXT: 11055:       e9 b6 ff ff ff  jmp     -74 <.plt>
+// CHECK-NEXT: 1105a:       cc      int3
+// CHECK-NEXT: 1105b:       cc      int3
+// CHECK-NEXT: 1105c:       cc      int3
+// CHECK-NEXT: 1105d:       cc      int3
+// CHECK-NEXT: 1105e:       cc      int3
+// CHECK-NEXT: 1105f:       cc      int3
+// CHECK-NEXT: 11060:       50      pushl   %eax
+// CHECK-NEXT: 11061:       a1 10 20 01 00  movl    73744, %eax
+// CHECK-NEXT: 11066:       e8 c5 ff ff ff  calll   -59 <.plt+0x20>
+// CHECK-NEXT: 1106b:       e9 b1 ff ff ff  jmp     -79 <.plt+0x11>
+// CHECK-NEXT: 11070:       68 08 00 00 00  pushl   $8
+// CHECK-NEXT: 11075:       e9 96 ff ff ff  jmp     -106 <.plt>
+// CHECK-NEXT: 1107a:       cc      int3
+// CHECK-NEXT: 1107b:       cc      int3
+// CHECK-NEXT: 1107c:       cc      int3
+// CHECK-NEXT: 1107d:       cc      int3
+// CHECK-NEXT: 1107e:       cc      int3
+// CHECK-NEXT: 1107f:       cc      int3
+
+// CHECK:      Contents of section .got.plt:
+// CHECK-NEXT: 00300100 00000000 00000000 50100100
+// CHECK-NEXT: 70100100
+
+.global _start
+_start:
+  jmp bar@PLT
+  jmp zed@PLT
--- a/lld/test/ELF/i386-retpoline-pic.s
+++ b/lld/test/ELF/i386-retpoline-pic.s
@@ -0,0 +1,62 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux -position-independent %s -o %t1.o
+// RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux -position-independent %p/Inputs/shared.s -o %t2.o
+// RUN: ld.lld -shared %t2.o -o %t2.so
+
+// RUN: ld.lld %t1.o %t2.so -o %t.exe -z retpolineplt -pie
+// RUN: llvm-objdump -d -s %t.exe | FileCheck %s
+
+// CHECK:      Disassembly of section .plt:
+// CHECK-NEXT: .plt:
+// CHECK-NEXT: 1010:       ff b3 04 20 00 00       pushl   8196(%ebx)
+// CHECK-NEXT: 1016:       50      pushl   %eax
+// CHECK-NEXT: 1017:       8b 83 08 20 00 00       movl    8200(%ebx), %eax
+// CHECK-NEXT: 101d:       e8 0e 00 00 00  calll   14 <.plt+0x20>
+// CHECK-NEXT: 1022:       f3 90   pause
+// CHECK-NEXT: 1024:       0f ae e8        lfence
+// CHECK-NEXT: 1027:       eb f9   jmp     -7 <.plt+0x12>
+// CHECK-NEXT: 1029:       cc      int3
+// CHECK-NEXT: 102a:       cc      int3
+// CHECK-NEXT: 102b:       cc      int3
+// CHECK-NEXT: 102c:       cc      int3
+// CHECK-NEXT: 102d:       cc      int3
+// CHECK-NEXT: 102e:       cc      int3
+// CHECK-NEXT: 102f:       cc      int3
+// CHECK-NEXT: 1030:       89 0c 24        movl    %ecx, (%esp)
+// CHECK-NEXT: 1033:       8b 4c 24 04     movl    4(%esp), %ecx
+// CHECK-NEXT: 1037:       89 44 24 04     movl    %eax, 4(%esp)
+// CHECK-NEXT: 103b:       89 c8   movl    %ecx, %eax
+// CHECK-NEXT: 103d:       59      popl    %ecx
+// CHECK-NEXT: 103e:       c3      retl
+// CHECK-NEXT: 103f:       cc      int3
+// CHECK-NEXT: 1040:       50      pushl   %eax
+// CHECK-NEXT: 1041:       8b 83 0c 20 00 00       movl    8204(%ebx), %eax
+// CHECK-NEXT: 1047:       e8 e4 ff ff ff  calll   -28 <.plt+0x20>
+// CHECK-NEXT: 104c:       e9 d1 ff ff ff  jmp     -47 <.plt+0x12>
+// CHECK-NEXT: 1051:       68 00 00 00 00  pushl   $0
+// CHECK-NEXT: 1056:       e9 b5 ff ff ff  jmp     -75 <.plt>
+// CHECK-NEXT: 105b:       cc      int3
+// CHECK-NEXT: 105c:       cc      int3
+// CHECK-NEXT: 105d:       cc      int3
+// CHECK-NEXT: 105e:       cc      int3
+// CHECK-NEXT: 105f:       cc      int3
+// CHECK-NEXT: 1060:       50      pushl   %eax
+// CHECK-NEXT: 1061:       8b 83 10 20 00 00       movl    8208(%ebx), %eax
+// CHECK-NEXT: 1067:       e8 c4 ff ff ff  calll   -60 <.plt+0x20>
+// CHECK-NEXT: 106c:       e9 b1 ff ff ff  jmp     -79 <.plt+0x12>
+// CHECK-NEXT: 1071:       68 08 00 00 00  pushl   $8
+// CHECK-NEXT: 1076:       e9 95 ff ff ff  jmp     -107 <.plt>
+// CHECK-NEXT: 107b:       cc      int3
+// CHECK-NEXT: 107c:       cc      int3
+// CHECK-NEXT: 107d:       cc      int3
+// CHECK-NEXT: 107e:       cc      int3
+// CHECK-NEXT: 107f:       cc      int3
+
+// CHECK:      Contents of section .got.plt:
+// CHECK-NEXT: 2000 00300000 00000000 00000000 51100000
+// CHECK-NEXT: 2010 71100000
+
+.global _start
+_start:
+  jmp bar@PLT
+  jmp zed@PLT
--- a/lld/test/ELF/mips-plt-r6.s
+++ b/lld/test/ELF/mips-plt-r6.s
@@ -6,7 +6,10 @@
 # RUN:         -mcpu=mips32r6 %S/Inputs/mips-dynamic.s -o %t2.o
 # RUN: ld.lld %t2.o -shared -o %t.so
 # RUN: ld.lld %t1.o %t.so -o %t.exe
-# RUN: llvm-objdump -d %t.exe | FileCheck %s
+# RUN: llvm-objdump -d %t.exe | FileCheck %s --check-prefixes=DEFAULT,CHECK
+# RUN: ld.lld %t2.o -shared -o %t.so -z hazardplt
+# RUN: ld.lld %t1.o %t.so -o %t.exe -z hazardplt
+# RUN: llvm-objdump -d %t.exe | FileCheck %s --check-prefixes=HAZARDPLT,CHECK

 # REQUIRES: mips

@@ -24,12 +27,14 @@
 # CHECK-NEXT:   2001c:       03 1c c0 23     subu    $24, $24, $gp
 # CHECK-NEXT:   20020:       03 e0 78 25     move    $15, $ra
 # CHECK-NEXT:   20024:       00 18 c0 82     srl     $24, $24, 2
-# CHECK-NEXT:   20028:       03 20 f8 09     jalr    $25
+# DEFAULT:      20028:       03 20 f8 09     jalr    $25
+# HAZARDPLT:    20028:       03 20 fc 09     jalr.hb $25
 # CHECK-NEXT:   2002c:       27 18 ff fe     addiu   $24, $24, -2

 # CHECK-NEXT:   20030:       3c 0f 00 03     aui     $15, $zero, 3
 # CHECK-NEXT:   20034:       8d f9 00 0c     lw      $25, 12($15)
-# CHECK-NEXT:   20038:       03 20 00 09     jr      $25
+# DEFAULT:      20038:       03 20 00 09     jr      $25
+# HAZARDPLT:    20038:       03 20 04 09     jr.hb   $25
 # CHECK-NEXT:   2003c:       25 f8 00 0c     addiu   $24, $15, 12

  .text
--- a/lld/test/ELF/x86-64-retpoline-znow.s
+++ b/lld/test/ELF/x86-64-retpoline-znow.s
@@ -0,0 +1,53 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/shared.s -o %t2.o
+// RUN: ld.lld -shared %t2.o -o %t2.so
+
+// RUN: ld.lld -shared %t1.o %t2.so -o %t.exe -z retpolineplt -z now
+// RUN: llvm-objdump -d -s %t.exe | FileCheck %s
+
+// CHECK:      Disassembly of section .plt:
+// CHECK-NEXT: .plt:
+// CHECK-NEXT: 1010:	e8 0b 00 00 00 	callq	11 <.plt+0x10>
+// CHECK-NEXT: 1015:	f3 90 	pause
+// CHECK-NEXT: 1017:	0f ae e8 	lfence
+// CHECK-NEXT: 101a:	eb f9 	jmp	-7 <.plt+0x5>
+// CHECK-NEXT: 101c:	cc 	int3
+// CHECK-NEXT: 101d:	cc 	int3
+// CHECK-NEXT: 101e:	cc 	int3
+// CHECK-NEXT: 101f:	cc 	int3
+// CHECK-NEXT: 1020:	4c 89 1c 24 	movq	%r11, (%rsp)
+// CHECK-NEXT: 1024:	c3 	retq
+// CHECK-NEXT: 1025:	cc 	int3
+// CHECK-NEXT: 1026:	cc 	int3
+// CHECK-NEXT: 1027:	cc 	int3
+// CHECK-NEXT: 1028:	cc 	int3
+// CHECK-NEXT: 1029:	cc 	int3
+// CHECK-NEXT: 102a:	cc 	int3
+// CHECK-NEXT: 102b:	cc 	int3
+// CHECK-NEXT: 102c:	cc 	int3
+// CHECK-NEXT: 102d:	cc 	int3
+// CHECK-NEXT: 102e:	cc 	int3
+// CHECK-NEXT: 102f:	cc 	int3
+// CHECK-NEXT: 1030:    4c 8b 1d b1 10 00 00 movq 4273(%rip), %r11
+// CHECK-NEXT: 1037:	e9 d4 ff ff ff 	jmp	-44 <.plt>
+// CHECK-NEXT: 103c:	cc 	int3
+// CHECK-NEXT: 103d:	cc 	int3
+// CHECK-NEXT: 103e:	cc 	int3
+// CHECK-NEXT: 103f:	cc 	int3
+// CHECK-NEXT: 1040:    4c 8b 1d a9 10 00 00 movq 4265(%rip), %r11
+// CHECK-NEXT: 1047:	e9 c4 ff ff ff 	jmp	-60 <.plt>
+// CHECK-NEXT: 104c:	cc 	int3
+// CHECK-NEXT: 104d:	cc 	int3
+// CHECK-NEXT: 104e:	cc 	int3
+// CHECK-NEXT: 104f:	cc 	int3
+
+// CHECK:      Contents of section .got.plt:
+// CHECK-NEXT: 20d0 00200000 00000000 00000000 00000000
+// CHECK-NEXT: 20e0 00000000 00000000 00000000 00000000
+// CHECK-NEXT: 20f0 00000000 00000000
+
+.global _start
+_start:
+  jmp bar@PLT
+  jmp zed@PLT
--- a/lld/test/ELF/x86-64-retpoline.s
+++ b/lld/test/ELF/x86-64-retpoline.s
@@ -0,0 +1,66 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/shared.s -o %t2.o
+// RUN: ld.lld -shared %t2.o -o %t2.so
+
+// RUN: ld.lld -shared %t1.o %t2.so -o %t.exe -z retpolineplt
+// RUN: llvm-objdump -d -s %t.exe | FileCheck %s
+
+// CHECK:      Disassembly of section .plt:
+// CHECK-NEXT: .plt:
+// CHECK-NEXT: 1010:       ff 35 f2 0f 00 00       pushq   4082(%rip)
+// CHECK-NEXT: 1016:       4c 8b 1d f3 0f 00 00    movq    4083(%rip), %r11
+// CHECK-NEXT: 101d:       e8 0e 00 00 00  callq   14 <.plt+0x20>
+// CHECK-NEXT: 1022:       f3 90   pause
+// CHECK-NEXT: 1024:       0f ae e8        lfence
+// CHECK-NEXT: 1027:       eb f9   jmp     -7 <.plt+0x12>
+// CHECK-NEXT: 1029:       cc      int3
+// CHECK-NEXT: 102a:       cc      int3
+// CHECK-NEXT: 102b:       cc      int3
+// CHECK-NEXT: 102c:       cc      int3
+// CHECK-NEXT: 102d:       cc      int3
+// CHECK-NEXT: 102e:       cc      int3
+// CHECK-NEXT: 102f:       cc      int3
+// CHECK-NEXT: 1030:       4c 89 1c 24     movq    %r11, (%rsp)
+// CHECK-NEXT: 1034:       c3      retq
+// CHECK-NEXT: 1035:       cc      int3
+// CHECK-NEXT: 1036:       cc      int3
+// CHECK-NEXT: 1037:       cc      int3
+// CHECK-NEXT: 1038:       cc      int3
+// CHECK-NEXT: 1039:       cc      int3
+// CHECK-NEXT: 103a:       cc      int3
+// CHECK-NEXT: 103b:       cc      int3
+// CHECK-NEXT: 103c:       cc      int3
+// CHECK-NEXT: 103d:       cc      int3
+// CHECK-NEXT: 103e:       cc      int3
+// CHECK-NEXT: 103f:       cc      int3
+// CHECK-NEXT: 1040:       4c 8b 1d d1 0f 00 00    movq    4049(%rip), %r11
+// CHECK-NEXT: 1047:       e8 e4 ff ff ff  callq   -28 <.plt+0x20>
+// CHECK-NEXT: 104c:       e9 d1 ff ff ff  jmp     -47 <.plt+0x12>
+// CHECK-NEXT: 1051:       68 00 00 00 00  pushq   $0
+// CHECK-NEXT: 1056:       e9 b5 ff ff ff  jmp     -75 <.plt>
+// CHECK-NEXT: 105b:       cc      int3
+// CHECK-NEXT: 105c:       cc      int3
+// CHECK-NEXT: 105d:       cc      int3
+// CHECK-NEXT: 105e:       cc      int3
+// CHECK-NEXT: 105f:       cc      int3
+// CHECK-NEXT: 1060:       4c 8b 1d b9 0f 00 00    movq    4025(%rip), %r11
+// CHECK-NEXT: 1067:       e8 c4 ff ff ff  callq   -60 <.plt+0x20>
+// CHECK-NEXT: 106c:       e9 b1 ff ff ff  jmp     -79 <.plt+0x12>
+// CHECK-NEXT: 1071:       68 01 00 00 00  pushq   $1
+// CHECK-NEXT: 1076:       e9 95 ff ff ff  jmp     -107 <.plt>
+// CHECK-NEXT: 107b:       cc      int3
+// CHECK-NEXT: 107c:       cc      int3
+// CHECK-NEXT: 107d:       cc      int3
+// CHECK-NEXT: 107e:       cc      int3
+// CHECK-NEXT: 107f:       cc      int3
+
+// CHECK:      Contents of section .got.plt:
+// CHECK-NEXT: 2000 00300000 00000000 00000000 00000000
+// CHECK-NEXT: 2010 00000000 00000000 51100000 00000000
+// CHECK-NEXT: 2020 71100000 00000000
+
+.global _start
+_start:
+  jmp bar@PLT
+  jmp zed@PLT
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -26,7 +26,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
  set(LLVM_VERSION_MINOR 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 1)
+  set(LLVM_VERSION_PATCH 2)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
  set(LLVM_VERSION_SUFFIX "")
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -420,6 +420,9 @@ namespace llvm {
  /// shuffles.
  FunctionPass *createExpandReductionsPass();

+  // This pass expands indirectbr instructions.
+  FunctionPass *createIndirectBrExpandPass();
+
 } // End llvm namespace

 #endif
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -406,6 +406,13 @@ protected:
  /// immediately before machine code is emitted.
  virtual void addPreEmitPass() { }

+  /// Targets may add passes immediately before machine code is emitted in this
+  /// callback. This is called even later than `addPreEmitPass`.
+  // FIXME: Rename `addPreEmitPass` to something more sensible given its actual
+  // position and remove the `2` suffix here as this callback is what
+  // `addPreEmitPass` *should* be but in reality isn't.
+  virtual void addPreEmitPass2() {}
+
  /// Utilities for targets to add passes to the pass manager.
  ///

--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -157,6 +157,7 @@ void initializeIVUsersWrapperPassPass(PassRegistry&);
 void initializeIfConverterPass(PassRegistry&);
 void initializeImplicitNullChecksPass(PassRegistry&);
 void initializeIndVarSimplifyLegacyPassPass(PassRegistry&);
+void initializeIndirectBrExpandPassPass(PassRegistry&);
 void initializeInductiveRangeCheckEliminationPass(PassRegistry&);
 void initializeInferAddressSpacesPass(PassRegistry&);
 void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&);
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -799,7 +799,7 @@ public:
  }

  /// Return true if lowering to a jump table is allowed.
-  bool areJTsAllowed(const Function *Fn) const {
+  virtual bool areJTsAllowed(const Function *Fn) const {
    if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
      return false;

--- a/llvm/include/llvm/Target/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/Target/TargetSubtargetInfo.h
@@ -172,6 +172,9 @@ public:
  /// \brief True if the subtarget should run the atomic expansion pass.
  virtual bool enableAtomicExpand() const;

+  /// True if the subtarget should run the indirectbr expansion pass.
+  virtual bool enableIndirectBrExpand() const;
+
  /// \brief Override generic scheduling policy within a region.
  ///
  /// This is a convenient way for targets that don't provide any custom
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_library(LLVMCodeGen
  GlobalMerge.cpp
  IfConversion.cpp
  ImplicitNullChecks.cpp
+  IndirectBrExpandPass.cpp
  InlineSpiller.cpp
  InterferenceCache.cpp
  InterleavedAccessPass.cpp
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -39,6 +39,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
  initializeGCModuleInfoPass(Registry);
  initializeIfConverterPass(Registry);
  initializeImplicitNullChecksPass(Registry);
+  initializeIndirectBrExpandPassPass(Registry);
  initializeInterleavedAccessPass(Registry);
  initializeLiveDebugValuesPass(Registry);
  initializeLiveDebugVariablesPass(Registry);
--- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -0,0 +1,221 @@
+//===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Implements an expansion pass to turn `indirectbr` instructions in the IR
+/// into `switch` instructions. This works by enumerating the basic blocks in
+/// a dense range of integers, replacing each `blockaddr` constant with the
+/// corresponding integer constant, and then building a switch that maps from
+/// the integers to the actual blocks. All of the indirectbr instructions in the
+/// function are redirected to this common switch.
+///
+/// While this is generically useful if a target is unable to codegen
+/// `indirectbr` natively, it is primarily useful when there is some desire to
+/// get the builtin non-jump-table lowering of a switch even when the input
+/// source contained an explicit indirect branch construct.
+///
+/// Note that it doesn't make any sense to enable this pass unless a target also
+/// disables jump-table lowering of switches. Doing that is likely to pessimize
+/// the code.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "indirectbr-expand"
+
+namespace {
+
+class IndirectBrExpandPass : public FunctionPass {
+  const TargetLowering *TLI = nullptr;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  IndirectBrExpandPass() : FunctionPass(ID) {
+    initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+};
+
+} // end anonymous namespace
+
+char IndirectBrExpandPass::ID = 0;
+
+INITIALIZE_PASS(IndirectBrExpandPass, DEBUG_TYPE,
+                "Expand indirectbr instructions", false, false)
+
+FunctionPass *llvm::createIndirectBrExpandPass() {
+  return new IndirectBrExpandPass();
+}
+
+bool IndirectBrExpandPass::runOnFunction(Function &F) {
+  auto &DL = F.getParent()->getDataLayout();
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+  if (!TPC)
+    return false;
+
+  auto &TM = TPC->getTM<TargetMachine>();
+  auto &STI = *TM.getSubtargetImpl(F);
+  if (!STI.enableIndirectBrExpand())
+    return false;
+  TLI = STI.getTargetLowering();
+
+  SmallVector<IndirectBrInst *, 1> IndirectBrs;
+
+  // Set of all potential successors for indirectbr instructions.
+  SmallPtrSet<BasicBlock *, 4> IndirectBrSuccs;
+
+  // Build a list of indirectbrs that we want to rewrite.
+  for (BasicBlock &BB : F)
+    if (auto *IBr = dyn_cast<IndirectBrInst>(BB.getTerminator())) {
+      // Handle the degenerate case of no successors by replacing the indirectbr
+      // with unreachable as there is no successor available.
+      if (IBr->getNumSuccessors() == 0) {
+        (void)new UnreachableInst(F.getContext(), IBr);
+        IBr->eraseFromParent();
+        continue;
+      }
+
+      IndirectBrs.push_back(IBr);
+      for (BasicBlock *SuccBB : IBr->successors())
+        IndirectBrSuccs.insert(SuccBB);
+    }
+
+  if (IndirectBrs.empty())
+    return false;
+
+  // If we need to replace any indirectbrs we need to establish integer
+  // constants that will correspond to each of the basic blocks in the function
+  // whose address escapes. We do that here and rewrite all the blockaddress
+  // constants to just be those integer constants cast to a pointer type.
+  SmallVector<BasicBlock *, 4> BBs;
+
+  for (BasicBlock &BB : F) {
+    // Skip blocks that aren't successors to an indirectbr we're going to
+    // rewrite.
+    if (!IndirectBrSuccs.count(&BB))
+      continue;
+
+    auto IsBlockAddressUse = [&](const Use &U) {
+      return isa<BlockAddress>(U.getUser());
+    };
+    auto BlockAddressUseIt = llvm::find_if(BB.uses(), IsBlockAddressUse);
+    if (BlockAddressUseIt == BB.use_end())
+      continue;
+
+    assert(std::find_if(std::next(BlockAddressUseIt), BB.use_end(),
+                        IsBlockAddressUse) == BB.use_end() &&
+           "There should only ever be a single blockaddress use because it is "
+           "a constant and should be uniqued.");
+
+    auto *BA = cast<BlockAddress>(BlockAddressUseIt->getUser());
+
+    // Skip if the constant was formed but ended up not being used (due to DCE
+    // or whatever).
+    if (!BA->isConstantUsed())
+      continue;
+
+    // Compute the index we want to use for this basic block. We can't use zero
+    // because null can be compared with block addresses.
+    int BBIndex = BBs.size() + 1;
+    BBs.push_back(&BB);
+
+    auto *ITy = cast<IntegerType>(DL.getIntPtrType(BA->getType()));
+    ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex);
+
+    // Now rewrite the blockaddress to an integer constant based on the index.
+    // FIXME: We could potentially preserve the uses as arguments to inline asm.
+    // This would allow some uses such as diagnostic information in crashes to
+    // have higher quality even when this transform is enabled, but would break
+    // users that round-trip blockaddresses through inline assembly and then
+    // back into an indirectbr.
+    BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType()));
+  }
+
+  if (BBs.empty()) {
+    // There are no blocks whose address is taken, so any indirectbr instruction
+    // cannot get a valid input and we can replace all of them with unreachable.
+    for (auto *IBr : IndirectBrs) {
+      (void)new UnreachableInst(F.getContext(), IBr);
+      IBr->eraseFromParent();
+    }
+    return true;
+  }
+
+  BasicBlock *SwitchBB;
+  Value *SwitchValue;
+
+  // Compute a common integer type across all the indirectbr instructions.
+  IntegerType *CommonITy = nullptr;
+  for (auto *IBr : IndirectBrs) {
+    auto *ITy =
+        cast<IntegerType>(DL.getIntPtrType(IBr->getAddress()->getType()));
+    if (!CommonITy || ITy->getBitWidth() > CommonITy->getBitWidth())
+      CommonITy = ITy;
+  }
+
+  auto GetSwitchValue = [DL, CommonITy](IndirectBrInst *IBr) {
+    return CastInst::CreatePointerCast(
+        IBr->getAddress(), CommonITy,
+        Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr);
+  };
+
+  if (IndirectBrs.size() == 1) {
+    // If we only have one indirectbr, we can just directly replace it within
+    // its block.
+    SwitchBB = IndirectBrs[0]->getParent();
+    SwitchValue = GetSwitchValue(IndirectBrs[0]);
+    IndirectBrs[0]->eraseFromParent();
+  } else {
+    // Otherwise we need to create a new block to hold the switch across BBs,
+    // jump to that block instead of each indirectbr, and phi together the
+    // values for the switch.
+    SwitchBB = BasicBlock::Create(F.getContext(), "switch_bb", &F);
+    auto *SwitchPN = PHINode::Create(CommonITy, IndirectBrs.size(),
+                                     "switch_value_phi", SwitchBB);
+    SwitchValue = SwitchPN;
+
+    // Now replace the indirectbr instructions with direct branches to the
+    // switch block and fill out the PHI operands.
+    for (auto *IBr : IndirectBrs) {
+      SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent());
+      BranchInst::Create(SwitchBB, IBr);
+      IBr->eraseFromParent();
+    }
+  }
+
+  // Now build the switch in the block. The block will have no terminator
+  // already.
+  auto *SI = SwitchInst::Create(SwitchValue, BBs[0], BBs.size(), SwitchBB);
+
+  // Add a case for each block.
+  for (int i : llvm::seq<int>(1, BBs.size()))
+    SI->addCase(ConstantInt::get(CommonITy, i + 1), BBs[i]);
+
+  return true;
+}
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -790,6 +790,9 @@ void TargetPassConfig::addMachinePasses() {
  if (EnableMachineOutliner)
    PM->add(createMachineOutlinerPass());

+  // Add passes that directly emit MI after all other MI passes.
+  addPreEmitPass2();
+
  AddingMachinePasses = false;
 }

--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -37,6 +37,10 @@ bool TargetSubtargetInfo::enableAtomicExpand() const {
  return true;
 }

+bool TargetSubtargetInfo::enableIndirectBrExpand() const {
+  return false;
+}
+
 bool TargetSubtargetInfo::enableMachineScheduler() const {
  return false;
 }
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -5133,6 +5133,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
  // It also applies for registers Rt and Rs of microMIPSr6 jalrc.hb instruction
  // and registers Rd and Base for microMIPS lwp instruction
  case Mips::JALR_HB:
+  case Mips::JALR_HB64:
  case Mips::JALRC_HB_MMR6:
  case Mips::JALRC_MMR6:
    if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -1878,4 +1878,16 @@ let AddedComplexity = 41 in {
  def : StoreRegImmPat<SDC1_D64_MMR6, f64>, FGR_64, ISA_MICROMIPS32R6;
 }

-def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
+def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS64R6;
+
+def TAILCALLREG_MMR6 : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6;
+
+def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase<JRC16_MMR6,
+                                                         GPR32Opnd>,
+                                ISA_MICROMIPS32R6;
+
+def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+              (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6;
+
+def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+              (TAILCALL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6;
--- a/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -352,6 +352,8 @@ class LWUPC_MM64R6_DESC {
  bit IsPCRelativeLoad = 1;
 }

+class JRC16_64_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR64Opnd>;
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Definitions
@@ -465,6 +467,9 @@ let DecoderNamespace = "MicroMipsR6" in {
  def LWUPC_MM64R6 : R6MMR6Rel, LWUPC_MM64R6_ENC, LWUPC_MM64R6_DESC,
                     ISA_MICROMIPS64R6;
 }
+let DecoderNamespace = "MicroMips64r6" in
+  def JRC16_64_MMR6 : R6MMR6Rel, JRC16_64_MMR6_DESC, JRC16_MMR6_ENC,
+                      ISA_MICROMIPS64R6;

 let AdditionalPredicates = [InMicroMips] in
 defm : MaterializeImms<i64, ZERO_64, DADDIU_MM64R6, LUi64, ORi64>;
@@ -505,6 +510,9 @@ def : MipsPat<(subc GPR64:$lhs, GPR64:$rhs),

 def : MipsPat<(atomic_load_64 addr:$a), (LD_MM64R6 addr:$a)>, ISA_MICROMIPS64R6;

+def TAILCALLREG64_MMR6 : TailCallReg<JRC16_64_MMR6, GPR64Opnd>,
+                         ISA_MICROMIPS64R6;
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction aliases
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -993,6 +993,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {

 def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;

+def TAILCALLREG_MM  : TailCallReg<JRC16_MM, GPR32Opnd>,
+                      ISA_MICROMIPS32_NOT_MIPS32R6;
+
+def PseudoIndirectBranch_MM : PseudoIndirectBranchBase<JR_MM, GPR32Opnd>,
+                              ISA_MICROMIPS32_NOT_MIPS32R6;
+
 let DecoderNamespace = "MicroMips" in {
  def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
                 RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
--- a/llvm/lib/Target/Mips/Mips.td
+++ b/llvm/lib/Target/Mips/Mips.td
@@ -193,6 +193,10 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
 def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true",
                                        "Disable use of the jal instruction">;

+def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard",
+                                                    "UseIndirectJumpsHazard",
+                                                    "true", "Use indirect jump"
+                        " guards to prevent certain speculation based attacks">;
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1008,3 +1008,42 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
              (SELEQZ i32:$f, i32:$cond)>,
              ISA_MIPS32R6;
 }
+
+// Pseudo instructions
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
+    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+  class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
+    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
+    PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
+}
+
+class PseudoIndirectBranchBaseR6<Instruction JumpInst, Register RT,
+                                 RegisterOperand RO> :
+    MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
+               II_IndirectBranchPseudo>,
+    PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)> {
+  let isTerminator=1;
+  let isBarrier=1;
+  let hasDelaySlot = 1;
+  let isBranch = 1;
+  let isIndirectBranch = 1;
+  bit isCTI = 1;
+}
+
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            NoIndirectJumpGuards] in {
+  def TAILCALLR6REG : TailCallRegR6<JALR, ZERO, GPR32Opnd>, ISA_MIPS32R6;
+  def PseudoIndirectBranchR6 : PseudoIndirectBranchBaseR6<JALR, ZERO,
+                                                          GPR32Opnd>,
+                               ISA_MIPS32R6;
+}
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            UseIndirectJumpsHazard] in {
+  def TAILCALLHBR6REG : TailCallReg<JR_HB_R6, GPR32Opnd>, ISA_MIPS32R6;
+  def PseudoIndrectHazardBranchR6 : PseudoIndirectBranchBase<JR_HB_R6,
+                                                             GPR32Opnd>,
+                                    ISA_MIPS32R6;
+}
+
--- a/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -236,13 +236,32 @@ let isCodeGenOnly = 1 in {
  def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
  def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
  def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
-  def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+  let AdditionalPredicates = [NoIndirectJumpGuards] in
+    def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+}
+let AdditionalPredicates = [NotInMicroMips],
+    DecoderNamespace = "Mips64" in {
+  def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
+  def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2;
+}
+def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            NoIndirectJumpGuards] in {
+  def TAILCALLREG64 : TailCallReg<JR64, GPR64Opnd>, ISA_MIPS3_NOT_32R6_64R6,
+                      PTR_64;
+  def PseudoIndirectBranch64 : PseudoIndirectBranchBase<JR64, GPR64Opnd>,
+                               ISA_MIPS3_NOT_32R6_64R6;
 }

-def TAILCALLREG64 : TailCallReg<GPR64Opnd>;
-
-def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
-def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            UseIndirectJumpsHazard] in {
+  def TAILCALLREGHB64 : TailCallReg<JR_HB64, GPR64Opnd>,
+                        ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
+  def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64,
+                                                              GPR64Opnd>,
+                                     ISA_MIPS32R2_NOT_32R6_64R6;
+}

 /// Multiply and Divide Instructions.
 let AdditionalPredicates = [NotInMicroMips] in {
@@ -515,6 +534,10 @@ def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5>,
            ISA_MIPS3;
 }

+
+let AdditionalPredicates = [UseIndirectJumpsHazard] in
+  def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>;
+
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
@@ -803,6 +826,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
                      (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>,
                      ISA_MIPS3;

+  def : MipsInstAlias<"jalr.hb $rs", (JALR_HB64 RA_64, GPR64Opnd:$rs), 1>,
+        ISA_MIPS64;
 // Two operand (implicit 0 selector) versions:
  def : MipsInstAlias<"dmtc0 $rt, $rd",
                      (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
--- a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -104,6 +104,16 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd,

 class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>;
 class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>;
+
+class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> {
+  bit isBranch = 1;
+  bit isIndirectBranch = 1;
+  bit hasDelaySlot = 1;
+  bit isTerminator=1;
+  bit isBarrier=1;
+  bit isCTI = 1;
+  InstrItinClass Itinerary = II_JR_HB;
+}
 //===----------------------------------------------------------------------===//
 //
 // Instruction Definitions
@@ -136,6 +146,7 @@ def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
 let DecoderNamespace = "Mips32r6_64r6_GP64" in {
  def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
  def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64;
+  def JR_HB64_R6 : JR_HB_R6_ENC, JR_HB64_R6_DESC, ISA_MIPS32R6;
 }
 let AdditionalPredicates = [NotInMicroMips],
    DecoderNamespace = "Mips32r6_64r6_PTR64" in {
@@ -277,3 +288,22 @@ def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f),
 def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f),
              (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>,
              ISA_MIPS64R6;
+
+// Pseudo instructions
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            NoIndirectJumpGuards] in {
+  def TAILCALL64R6REG : TailCallRegR6<JALR64, ZERO_64, GPR64Opnd>, ISA_MIPS64R6;
+  def PseudoIndirectBranch64R6 : PseudoIndirectBranchBaseR6<JALR64, ZERO_64,
+                                                            GPR64Opnd>,
+                                 ISA_MIPS64R6;
+}
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            UseIndirectJumpsHazard] in {
+  def TAILCALLHB64R6REG : TailCallReg<JR_HB64_R6, GPR64Opnd>,
+                          ISA_MIPS64R6;
+  def PseudoIndrectHazardBranch64R6 : PseudoIndirectBranchBase<JR_HB64_R6,
+                                                                 GPR64Opnd>,
+                                      ISA_MIPS64R6;
+}
--- a/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -53,7 +53,7 @@ class DSPInst<string opstr = "">

 class PseudoDSP<dag outs, dag ins, list<dag> pattern,
                InstrItinClass itin = IIPseudo>
-    : MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+    : MipsPseudo<outs, ins, pattern, itin> {
  let InsnPredicates = [HasDSP];
 }

--- a/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -128,7 +128,7 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
 // Mips Pseudo Instructions Format
 class MipsPseudo<dag outs, dag ins, list<dag> pattern,
                 InstrItinClass itin = IIPseudo> :
-  MipsInst<outs, ins, "", pattern, itin, Pseudo> {
+  MipsInst<outs, ins, "", pattern, itin, Pseudo>, PredicateControl {
  let isCodeGenOnly = 1;
  let isPseudo = 1;
 }
@@ -136,7 +136,7 @@ class MipsPseudo<dag outs, dag ins, list<dag> pattern,
 // Mips32/64 Pseudo Instruction Format
 class PseudoSE<dag outs, dag ins, list<dag> pattern,
               InstrItinClass itin = IIPseudo> :
-  MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+  MipsPseudo<outs, ins, pattern, itin> {
  let EncodingPredicates = [HasStdEnc];
 }

--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -286,7 +286,7 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
    case Mips::JR:
    case Mips::PseudoReturn:
    case Mips::PseudoIndirectBranch:
-    case Mips::TAILCALLREG:
+    case Mips::PseudoIndirectBranch_MM:
      canUseShortMicroMipsCTI = true;
      break;
    }
@@ -365,18 +365,19 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
    // For MIPSR6, the instruction 'jic' can be used for these cases. Some
    // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'.
    case Mips::JR:
+    case Mips::PseudoIndirectBranchR6:
+    case Mips::PseudoIndirectBranch_MM:
    case Mips::PseudoReturn:
-    case Mips::PseudoIndirectBranch:
-    case Mips::TAILCALLREG:
+    case Mips::TAILCALLR6REG:
      if (canUseShortMicroMipsCTI)
        return Mips::JRC16_MM;
      return Mips::JIC;
    case Mips::JALRPseudo:
      return Mips::JIALC;
    case Mips::JR64:
+    case Mips::PseudoIndirectBranch64R6:
    case Mips::PseudoReturn64:
-    case Mips::PseudoIndirectBranch64:
-    case Mips::TAILCALLREG64:
+    case Mips::TAILCALL64R6REG:
      return Mips::JIC64;
    case Mips::JALR64Pseudo:
      return Mips::JIALC64;
@@ -526,3 +527,26 @@ bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
  }
  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
 }
+
+//  Perform target specific instruction verification.
+bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI,
+                                      StringRef &ErrInfo) const {
+  switch (MI.getOpcode()) {
+    case Mips::TAILCALLREG:
+    case Mips::PseudoIndirectBranch:
+    case Mips::JR:
+    case Mips::JR64:
+    case Mips::JALR:
+    case Mips::JALR64:
+    case Mips::JALRPseudo:
+      if (!Subtarget.useIndirectJumpsHazard())
+        return true;
+
+      ErrInfo = "invalid instruction when using jump guards!";
+      return false;
+    default:
+      return true;
+  }
+
+  return true;
+}
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -138,6 +138,10 @@ public:
  bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
                             unsigned &SrcOpIdx2) const override;

+  /// Perform target specific instruction verification.
+  bool verifyInstruction(const MachineInstr &MI,
+                         StringRef &ErrInfo) const override;
+
 protected:
  bool isZeroImm(const MachineOperand &op) const;

--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -212,6 +212,8 @@ def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">,
                       AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">;
 def InMips16Mode :    Predicate<"Subtarget->inMips16Mode()">,
                      AssemblerPredicate<"FeatureMips16">;
+def NotInMips16Mode : Predicate<"!Subtarget->inMips16Mode()">,
+                      AssemblerPredicate<"!FeatureMips16">;
 def HasCnMips    :    Predicate<"Subtarget->hasCnMips()">,
                      AssemblerPredicate<"FeatureCnMips">;
 def NotCnMips    :    Predicate<"!Subtarget->hasCnMips()">,
@@ -242,7 +244,10 @@ def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
               AssemblerPredicate<"!FeatureMadd4">;
 def HasMT  : Predicate<"Subtarget->hasMT()">,
             AssemblerPredicate<"FeatureMT">;
-
+def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">,
+                            AssemblerPredicate<"FeatureUseIndirectJumpsHazard">;
+def NoIndirectJumpGuards : Predicate<"!Subtarget->useIndirectJumpsHazard()">,
+                           AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">;
 //===----------------------------------------------------------------------===//
 // Mips GPR size adjectives.
 // They are mutually exclusive.
@@ -1480,8 +1485,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
    PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
    PseudoInstExpansion<(JumpInst Opnd:$target)>;

-  class TailCallReg<RegisterOperand RO> :
-    MipsPseudo<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>;
+  class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
+    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
+    PseudoInstExpansion<(JumpInst RO:$rs)>;
 }

 class BAL_BR_Pseudo<Instruction RealInst> :
@@ -2005,7 +2011,7 @@ def BLTZL   : MMRel, CBranchZero<"bltzl", brtarget, setlt, GPR32Opnd, 0>,
 def B       : UncondBranch<BEQ>;

 def JAL  : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
-let AdditionalPredicates = [NotInMicroMips] in {
+let AdditionalPredicates = [NotInMicroMips, NoIndirectJumpGuards] in {
  def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
  def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
 }
@@ -2025,14 +2031,16 @@ def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
 let Predicates = [NotInMicroMips] in { 
  def TAILCALL : TailCall<J, jmptarget>;
 }
-
-def TAILCALLREG : TailCallReg<GPR32Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            NoIndirectJumpGuards] in
+  def TAILCALLREG : TailCallReg<JR, GPR32Opnd>, ISA_MIPS1_NOT_32R6_64R6;

 // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
-class PseudoIndirectBranchBase<RegisterOperand RO> :
+class PseudoIndirectBranchBase<Instruction JumpInst, RegisterOperand RO> :
    MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
-               II_IndirectBranchPseudo> {
+               II_IndirectBranchPseudo>,
+    PseudoInstExpansion<(JumpInst RO:$rs)> {
  let isTerminator=1;
  let isBarrier=1;
  let hasDelaySlot = 1;
@@ -2041,7 +2049,10 @@ class PseudoIndirectBranchBase<RegisterOperand RO> :
  bit isCTI = 1;
 }

-def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            NoIndirectJumpGuards] in
+  def PseudoIndirectBranch : PseudoIndirectBranchBase<JR, GPR32Opnd>,
+                             ISA_MIPS1_NOT_32R6_64R6;

 // Return instructions are matched as a RetRA instruction, then are expanded
 // into PseudoReturn/PseudoReturn64 after register allocation. Finally,
@@ -2213,8 +2224,8 @@ class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
  list<dag> Pattern = [];
 }

-class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>,
-                   JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
+class JR_HB_DESC<RegisterOperand RO> :
+  InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, JR_HB_DESC_BASE<"jr.hb", RO> {
  let isBranch=1;
  let isIndirectBranch=1;
  let hasDelaySlot=1;
@@ -2223,8 +2234,9 @@ class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>,
  bit isCTI = 1;
 }

-class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>,
-                     JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> {
+class JALR_HB_DESC<RegisterOperand RO> :
+  InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, JALR_HB_DESC_BASE<"jalr.hb",
+                                                                     RO> {
  let isIndirectBranch=1;
  let hasDelaySlot=1;
  bit isCTI = 1;
@@ -2233,8 +2245,19 @@ class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>,
 class JR_HB_ENC : JR_HB_FM<8>;
 class JALR_HB_ENC : JALR_HB_FM<9>;

-def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
-def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
+def JR_HB : JR_HB_DESC<GPR32Opnd>, JR_HB_ENC, ISA_MIPS32R2_NOT_32R6_64R6;
+def JALR_HB : JALR_HB_DESC<GPR32Opnd>, JALR_HB_ENC, ISA_MIPS32;
+
+let AdditionalPredicates = [NotInMicroMips, UseIndirectJumpsHazard] in
+  def JALRHBPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR_HB, RA>;
+
+
+let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
+                            UseIndirectJumpsHazard] in {
+  def TAILCALLREGHB : TailCallReg<JR_HB, GPR32Opnd>, ISA_MIPS32_NOT_32R6_64R6;
+  def PseudoIndirectHazardBranch : PseudoIndirectBranchBase<JR_HB, GPR32Opnd>,
+                                   ISA_MIPS32R2_NOT_32R6_64R6;
+}

 class TLB<string asmstr, InstrItinClass itin = NoItinerary> :
  InstSE<(outs), (ins), asmstr, [], itin, FrmOther, asmstr>;
@@ -2368,7 +2391,8 @@ def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
 let Predicates = [NotInMicroMips] in {
 def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
 }
-def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
+def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>,
+      ISA_MIPS32;
 def : MipsInstAlias<"neg $rt, $rs",
                    (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
 def : MipsInstAlias<"neg $rt",
--- a/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -341,11 +341,16 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
          .addReg(Mips::SP).addImm(8);

-      if (Subtarget.hasMips32r6())
+      if (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard())
        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR))
          .addReg(Mips::ZERO).addReg(Mips::AT);
-      else
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
+      else {
+        unsigned JROp =
+            Subtarget.useIndirectJumpsHazard()
+                ? (Subtarget.hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
+                : Mips::JR;
+        BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
+      }

      if (Subtarget.isTargetNaCl()) {
        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
@@ -414,14 +419,21 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
        .addReg(Mips::SP_64).addImm(0);

-      if (Subtarget.hasMips64r6())
+      if (Subtarget.hasMips64r6() && !Subtarget.useIndirectJumpsHazard())
        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64))
-          .addReg(Mips::ZERO_64).addReg(Mips::AT_64);
-      else
-        BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
+            .addReg(Mips::ZERO_64)
+            .addReg(Mips::AT_64);
+      else {
+        unsigned JROp =
+            Subtarget.useIndirectJumpsHazard()
+                ? (Subtarget.hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
+                : Mips::JR64;
+        BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
+      }

      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
-        .addReg(Mips::SP_64).addImm(16);
+          .addReg(Mips::SP_64)
+          .addImm(16);
      BalTgtMBB->rbegin()->bundleWithPred();
    }

--- a/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,8 +70,8 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
      InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
      HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
      Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
-      HasEVA(false), DisableMadd4(false), HasMT(false), TM(TM),
-      TargetTriple(TT), TSInfo(),
+      HasEVA(false), DisableMadd4(false), HasMT(false),
+      UseIndirectJumpsHazard(false), TM(TM), TargetTriple(TT), TSInfo(),
      InstrInfo(
          MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
      FrameLowering(MipsFrameLowering::create(*this)),
@@ -103,6 +103,15 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
  if (IsFPXX && (isABI_N32() || isABI_N64()))
    report_fatal_error("FPXX is not permitted for the N32/N64 ABI's.", false);

+  if (UseIndirectJumpsHazard) {
+    if (InMicroMipsMode)
+      report_fatal_error(
+          "cannot combine indirect jumps with hazard barriers and microMIPS");
+    if (!hasMips32r2())
+      report_fatal_error(
+          "indirect jumps with hazard barriers requires MIPS32R2 or later");
+  }
+
  if (hasMips32r6()) {
    StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";

--- a/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -152,6 +152,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
  // HasMT -- support MT ASE.
  bool HasMT;

+  // Use hazard variants of the jump register instructions for indirect
+  // function calls and jump tables.
+  bool UseIndirectJumpsHazard;
+
  // Disable use of the `jal` instruction.
  bool UseLongCalls = false;

@@ -266,6 +270,9 @@ public:
  bool disableMadd4() const { return DisableMadd4; }
  bool hasEVA() const { return HasEVA; }
  bool hasMT() const { return HasMT; }
+  bool useIndirectJumpsHazard() const {
+    return UseIndirectJumpsHazard && hasMips32r2();
+  }
  bool useSmallSection() const { return UseSmallSection; }

  bool hasStandardEncoding() const { return !inMips16Mode(); }
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -57,6 +57,7 @@ set(sources
  X86OptimizeLEAs.cpp
  X86PadShortFunction.cpp
  X86RegisterInfo.cpp
+  X86RetpolineThunks.cpp
  X86SelectionDAGInfo.cpp
  X86ShuffleDecodeConstantPool.cpp
  X86Subtarget.cpp
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -22,6 +22,7 @@ namespace llvm {
 class FunctionPass;
 class ImmutablePass;
 class InstructionSelector;
+class ModulePass;
 class PassRegistry;
 class X86RegisterBankInfo;
 class X86Subtarget;
@@ -98,6 +99,9 @@ void initializeFixupBWInstPassPass(PassRegistry &);
 /// encoding when possible in order to reduce code size.
 FunctionPass *createX86EvexToVexInsts();

+/// This pass creates the thunks for the retpoline feature.
+FunctionPass *createX86RetpolineThunksPass();
+
 InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
                                                  X86Subtarget &,
                                                  X86RegisterBankInfo &);
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -290,6 +290,27 @@ def FeatureERMSB
          "ermsb", "HasERMSB", "true",
          "REP MOVS/STOS are fast">;

+// Enable mitigation of some aspects of speculative execution related
+// vulnerabilities by removing speculatable indirect branches. This disables
+// jump-table formation, rewrites explicit `indirectbr` instructions into
+// `switch` instructions, and uses a special construct called a "retpoline" to
+// prevent speculation of the remaining indirect branches (indirect calls and
+// tail calls).
+def FeatureRetpoline
+    : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+                       "Remove speculation of indirect branches from the "
+                       "generated code, either by avoiding them entirely or "
+                       "lowering them with a speculation blocking construct.">;
+
+// Rely on external thunks for the emitted retpoline calls. This allows users
+// to provide their own custom thunk definitions in highly specialized
+// environments such as a kernel that does boot-time hot patching.
+def FeatureRetpolineExternalThunk
+    : SubtargetFeature<
+          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
+          "Enable retpoline, but with an externally provided thunk.",
+          [FeatureRetpoline]>;
+
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -344,6 +344,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
 static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
                              char Mode, raw_ostream &O) {
  unsigned Reg = MO.getReg();
+  bool EmitPercent = true;
+
  switch (Mode) {
  default: return true;  // Unknown mode.
  case 'b': // Print QImode register
@@ -358,6 +360,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
  case 'k': // Print SImode register
    Reg = getX86SubSuperRegister(Reg, 32);
    break;
+  case 'V':
+    EmitPercent = false;
+    LLVM_FALLTHROUGH;
  case 'q':
    // Print 64-bit register names if 64-bit integer registers are available.
    // Otherwise, print 32-bit register names.
@@ -365,7 +370,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
    break;
  }

-  O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+  if (EmitPercent)
+    O << '%';
+
+  O << X86ATTInstPrinter::getRegisterName(Reg);
  return false;
 }

@@ -438,6 +446,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
    case 'w': // Print HImode register
    case 'k': // Print SImode register
    case 'q': // Print DImode register
+    case 'V': // Print native register without '%'
      if (MO.isReg())
        return printAsmMRegister(*this, MO, ExtraCode[0], O);
      printOperand(*this, MI, OpNo, O);
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -30,6 +30,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
  StackMaps SM;
  FaultMaps FM;
  std::unique_ptr<MCCodeEmitter> CodeEmitter;
+  bool NeedsRetpoline = false;

  // This utility class tracks the length of a stackmap instruction's 'shadow'.
  // It is used by the X86AsmPrinter to ensure that the stackmap shadow
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3161,6 +3161,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
      (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
    return false;

+  // Functions using retpoline should use SDISel for calls.
+  if (Subtarget->useRetpoline())
+    return false;
+
  // Handle only C, fastcc, and webkit_js calling conventions for now.
  switch (CC) {
  default: return false;
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -742,6 +742,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
                                          bool InProlog) const {
  bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;

+  // FIXME: Add retpoline support and remove this.
+  if (Is64Bit && IsLargeCodeModel && STI.useRetpoline())
+    report_fatal_error("Emitting stack probe calls on 64-bit with the large "
+                       "code model and retpoline not yet implemented.");
+
  unsigned CallOp;
  if (Is64Bit)
    CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
@@ -2337,6 +2342,10 @@ void X86FrameLowering::adjustForSegmentedStacks(
    // This solution is not perfect, as it assumes that the .rodata section
    // is laid out within 2^31 bytes of each function body, but this seems
    // to be sufficient for JIT.
+    // FIXME: Add retpoline support and remove the error here..
+    if (STI.useRetpoline())
+      report_fatal_error("Emitting morestack calls on 64-bit with the large "
+                         "code model and retpoline not yet implemented.");
    BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
        .addReg(X86::RIP)
        .addImm(0)
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -550,11 +550,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.

    if (OptLevel != CodeGenOpt::None &&
-        // Only does this when target favors doesn't favor register indirect
-        // call.
+        // Only do this when the target can fold the load into the call or
+        // jmp.
+        !Subtarget->useRetpoline() &&
        ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
         (N->getOpcode() == X86ISD::TC_RETURN &&
-          // Only does this if load can be folded into TC_RETURN.
          (Subtarget->is64Bit() ||
           !getTargetMachine().isPositionIndependent())))) {
      /// Also try moving call address load from outside callseq_start to just
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24994,6 +24994,15 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
  return isShuffleMaskLegal(Mask, VT);
 }

+bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
+  // If the subtarget is using retpolines, we need to not generate jump tables.
+  if (Subtarget.useRetpoline())
+    return false;
+
+  // Otherwise, fallback on the generic logic.
+  return TargetLowering::areJTsAllowed(Fn);
+}
+
 //===----------------------------------------------------------------------===//
 //                           X86 Scheduler Hooks
 //===----------------------------------------------------------------------===//
@@ -26225,6 +26234,130 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
  return BB;
 }

+static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
+  switch (RPOpc) {
+  case X86::RETPOLINE_CALL32:
+    return X86::CALLpcrel32;
+  case X86::RETPOLINE_CALL64:
+    return X86::CALL64pcrel32;
+  case X86::RETPOLINE_TCRETURN32:
+    return X86::TCRETURNdi;
+  case X86::RETPOLINE_TCRETURN64:
+    return X86::TCRETURNdi64;
+  }
+  llvm_unreachable("not retpoline opcode");
+}
+
+static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
+                                      unsigned Reg) {
+  if (Subtarget.useRetpolineExternalThunk()) {
+    // When using an external thunk for retpolines, we pick names that match the
+    // names GCC happens to use as well. This helps simplify the implementation
+    // of the thunks for kernels where they have no easy ability to create
+    // aliases and are doing non-trivial configuration of the thunk's body. For
+    // example, the Linux kernel will do boot-time hot patching of the thunk
+    // bodies and cannot easily export aliases of these to loaded modules.
+    //
+    // Note that at any point in the future, we may need to change the semantics
+    // of how we implement retpolines and at that time will likely change the
+    // name of the called thunk. Essentially, there is no hard guarantee that
+    // LLVM will generate calls to specific thunks, we merely make a best-effort
+    // attempt to help out kernels and other systems where duplicating the
+    // thunks is costly.
+    switch (Reg) {
+    case X86::EAX:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__x86_indirect_thunk_eax";
+    case X86::ECX:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__x86_indirect_thunk_ecx";
+    case X86::EDX:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__x86_indirect_thunk_edx";
+    case X86::EDI:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__x86_indirect_thunk_edi";
+    case X86::R11:
+      assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+      return "__x86_indirect_thunk_r11";
+    }
+    llvm_unreachable("unexpected reg for retpoline");
+  }
+
+  // When targeting an internal COMDAT thunk use an LLVM-specific name.
+  switch (Reg) {
+  case X86::EAX:
+    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+    return "__llvm_retpoline_eax";
+  case X86::ECX:
+    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+    return "__llvm_retpoline_ecx";
+  case X86::EDX:
+    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+    return "__llvm_retpoline_edx";
+  case X86::EDI:
+    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+    return "__llvm_retpoline_edi";
+  case X86::R11:
+    assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+    return "__llvm_retpoline_r11";
+  }
+  llvm_unreachable("unexpected reg for retpoline");
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
+                                        MachineBasicBlock *BB) const {
+  // Copy the virtual register into the R11 physical register and
+  // call the retpoline thunk.
+  DebugLoc DL = MI.getDebugLoc();
+  const X86InstrInfo *TII = Subtarget.getInstrInfo();
+  unsigned CalleeVReg = MI.getOperand(0).getReg();
+  unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
+
+  // Find an available scratch register to hold the callee. On 64-bit, we can
+  // just use R11, but we scan for uses anyway to ensure we don't generate
+  // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
+  // already a register use operand to the call to hold the callee. If none
+  // are available, use EDI instead. EDI is chosen because EBX is the PIC base
+  // register and ESI is the base pointer to realigned stack frames with VLAs.
+  SmallVector<unsigned, 3> AvailableRegs;
+  if (Subtarget.is64Bit())
+    AvailableRegs.push_back(X86::R11);
+  else
+    AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
+
+  // Zero out any registers that are already used.
+  for (const auto &MO : MI.operands()) {
+    if (MO.isReg() && MO.isUse())
+      for (unsigned &Reg : AvailableRegs)
+        if (Reg == MO.getReg())
+          Reg = 0;
+  }
+
+  // Choose the first remaining non-zero available register.
+  unsigned AvailableReg = 0;
+  for (unsigned MaybeReg : AvailableRegs) {
+    if (MaybeReg) {
+      AvailableReg = MaybeReg;
+      break;
+    }
+  }
+  if (!AvailableReg)
+    report_fatal_error("calling convention incompatible with retpoline, no "
+                       "available registers");
+
+  const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
+
+  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
+      .addReg(CalleeVReg);
+  MI.getOperand(0).ChangeToES(Symbol);
+  MI.setDesc(TII->get(Opc));
+  MachineInstrBuilder(*BB->getParent(), &MI)
+      .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
+  return BB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
                                    MachineBasicBlock *MBB) const {
@@ -26689,6 +26822,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
  case X86::TLS_base_addr32:
  case X86::TLS_base_addr64:
    return EmitLoweredTLSAddr(MI, BB);
+  case X86::RETPOLINE_CALL32:
+  case X86::RETPOLINE_CALL64:
+  case X86::RETPOLINE_TCRETURN32:
+  case X86::RETPOLINE_TCRETURN64:
+    return EmitLoweredRetpoline(MI, BB);
  case X86::CATCHRET:
    return EmitLoweredCatchRet(MI, BB);
  case X86::CATCHPAD:
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -986,6 +986,9 @@ namespace llvm {
    bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
                                EVT VT) const override;

+    /// Returns true if lowering to a jump table is allowed.
+    bool areJTsAllowed(const Function *Fn) const override;
+
    /// If true, then instruction selection should
    /// seek to shrink the FP constant of the specified type to a smaller type
    /// in order to save space and / or reduce runtime.
@@ -1289,6 +1292,9 @@ namespace llvm {
    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
                                          MachineBasicBlock *BB) const;

+    MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
+                                            MachineBasicBlock *BB) const;
+
    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                        MachineBasicBlock *MBB) const;

--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1106,14 +1106,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),

 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
          (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[Not64BitMode]>;
+          Requires<[Not64BitMode, NotUseRetpoline]>;

 // FIXME: This is disabled for 32-bit PIC mode because the global base
 // register which is part of the address mode may be assigned a
 // callee-saved register.
 def : Pat<(X86tcret (load addr:$dst), imm:$off),
          (TCRETURNmi addr:$dst, imm:$off)>,
-          Requires<[Not64BitMode, IsNotPIC]>;
+          Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>;

 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
          (TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1125,13 +1125,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),

 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
          (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, NotUseRetpoline]>;

 // Don't fold loads into X86tcret requiring more than 6 regs.
 // There wouldn't be enough scratch registers for base+index.
 def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
          (TCRETURNmi64 addr:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, NotUseRetpoline]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+          (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[In64BitMode, UseRetpoline]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+          (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[Not64BitMode, UseRetpoline]>;

 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -211,11 +211,12 @@ let isCall = 1 in
                      Sched<[WriteJumpLd]>;
    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
                        "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
-                      OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
+                      OpSize32, Requires<[Not64BitMode,NotUseRetpoline]>,
+                      Sched<[WriteJump]>;
    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
                        IIC_CALL_MEM>, OpSize32,
-                      Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                      Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
                      Sched<[WriteJumpLd]>;

    let Predicates = [Not64BitMode] in {
@@ -298,11 +299,12 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
  def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
                        "call{q}\t{*}$dst", [(X86call GR64:$dst)],
                        IIC_CALL_RI>,
-                      Requires<[In64BitMode]>;
+                      Requires<[In64BitMode,NotUseRetpoline]>;
  def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                        "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
                        IIC_CALL_MEM>,
-                      Requires<[In64BitMode,FavorMemIndirectCall]>;
+                      Requires<[In64BitMode,FavorMemIndirectCall,
+                                NotUseRetpoline]>;

  def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
                       "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
@@ -341,6 +343,27 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
  }
 }

+let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
+    Uses = [RSP],
+    usesCustomInserter = 1,
+    SchedRW = [WriteJump] in {
+  def RETPOLINE_CALL32 :
+    PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
+            Requires<[Not64BitMode,UseRetpoline]>;
+
+  def RETPOLINE_CALL64 :
+    PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
+            Requires<[In64BitMode,UseRetpoline]>;
+
+  // Retpoline variant of indirect tail calls.
+  let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+    def RETPOLINE_TCRETURN64 :
+      PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
+    def RETPOLINE_TCRETURN32 :
+      PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
+  }
+}
+
 // Conditional tail calls are similar to the above, but they are branches
 // rather than barriers, and they use EFLAGS.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -917,6 +917,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
 def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
 def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
+def UseRetpoline : Predicate<"Subtarget->useRetpoline()">;
+def NotUseRetpoline : Predicate<"!Subtarget->useRetpoline()">;

 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -874,6 +874,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
      // address is to far away. (TODO: support non-relative addressing)
      break;
    case MachineOperand::MO_Register:
+      // FIXME: Add retpoline support and remove this.
+      if (Subtarget->useRetpoline())
+        report_fatal_error("Lowering register statepoints with retpoline not "
+                           "yet implemented.");
      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
      CallOpcode = X86::CALL64r;
      break;
@@ -1028,6 +1032,10 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,

    EmitAndCountInstruction(
        MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
+    // FIXME: Add retpoline support and remove this.
+    if (Subtarget->useRetpoline())
+      report_fatal_error(
+          "Lowering patchpoint with retpoline not yet implemented.");
    EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
  }

--- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/llvm/lib/Target/X86/X86RetpolineThunks.cpp
@@ -0,0 +1,265 @@
+//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86  --=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that injects an MI thunk implementing a "retpoline". This is
+/// a RET-implemented trampoline that is used to lower indirect calls in a way
+/// that prevents speculation on some x86 processors and can be used to mitigate
+/// security vulnerabilities due to targeted speculative execution and side
+/// channels such as CVE-2017-5715.
+///
+/// TODO(chandlerc): All of this code could use better comments and
+/// documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-retpoline-thunks"
+
+static const char ThunkNamePrefix[] = "__llvm_retpoline_";
+static const char R11ThunkName[]    = "__llvm_retpoline_r11";
+static const char EAXThunkName[]    = "__llvm_retpoline_eax";
+static const char ECXThunkName[]    = "__llvm_retpoline_ecx";
+static const char EDXThunkName[]    = "__llvm_retpoline_edx";
+static const char EDIThunkName[]    = "__llvm_retpoline_edi";
+
+namespace {
+class X86RetpolineThunks : public MachineFunctionPass {
+public:
+  static char ID;
+
+  X86RetpolineThunks() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
+
+  bool doInitialization(Module &M) override;
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineModuleInfo>();
+    AU.addPreserved<MachineModuleInfo>();
+  }
+
+private:
+  MachineModuleInfo *MMI;
+  const TargetMachine *TM;
+  bool Is64Bit;
+  const X86Subtarget *STI;
+  const X86InstrInfo *TII;
+
+  bool InsertedThunks;
+
+  void createThunkFunction(Module &M, StringRef Name);
+  void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
+  void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
+};
+
+} // end anonymous namespace
+
+FunctionPass *llvm::createX86RetpolineThunksPass() {
+  return new X86RetpolineThunks();
+}
+
+char X86RetpolineThunks::ID = 0;
+
+bool X86RetpolineThunks::doInitialization(Module &M) {
+  InsertedThunks = false;
+  return false;
+}
+
+bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << getPassName() << '\n');
+
+  TM = &MF.getTarget();;
+  STI = &MF.getSubtarget<X86Subtarget>();
+  TII = STI->getInstrInfo();
+  Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
+
+  MMI = &getAnalysis<MachineModuleInfo>();
+  Module &M = const_cast<Module &>(*MMI->getModule());
+
+  // If this function is not a thunk, check to see if we need to insert
+  // a thunk.
+  if (!MF.getName().startswith(ThunkNamePrefix)) {
+    // If we've already inserted a thunk, nothing else to do.
+    if (InsertedThunks)
+      return false;
+
+    // Only add a thunk if one of the functions has the retpoline feature
+    // enabled in its subtarget, and doesn't enable external thunks.
+    // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+    // nothing will end up calling it.
+    // FIXME: It's a little silly to look at every function just to enumerate
+    // the subtargets, but eventually we'll want to look at them for indirect
+    // calls, so maybe this is OK.
+    if (!STI->useRetpoline() || STI->useRetpolineExternalThunk())
+      return false;
+
+    // Otherwise, we need to insert the thunk.
+    // WARNING: This is not really a well behaving thing to do in a function
+    // pass. We extract the module and insert a new function (and machine
+    // function) directly into the module.
+    if (Is64Bit)
+      createThunkFunction(M, R11ThunkName);
+    else
+      for (StringRef Name :
+           {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
+        createThunkFunction(M, Name);
+    InsertedThunks = true;
+    return true;
+  }
+
+  // If this *is* a thunk function, we need to populate it with the correct MI.
+  if (Is64Bit) {
+    assert(MF.getName() == "__llvm_retpoline_r11" &&
+           "Should only have an r11 thunk on 64-bit targets");
+
+    // __llvm_retpoline_r11:
+    //   callq .Lr11_call_target
+    // .Lr11_capture_spec:
+    //   pause
+    //   lfence
+    //   jmp .Lr11_capture_spec
+    // .align 16
+    // .Lr11_call_target:
+    //   movq %r11, (%rsp)
+    //   retq
+    populateThunk(MF, X86::R11);
+  } else {
+    // For 32-bit targets we need to emit a collection of thunks for various
+    // possible scratch registers as well as a fallback that uses EDI, which is
+    // normally callee saved.
+    //   __llvm_retpoline_eax:
+    //         calll .Leax_call_target
+    //   .Leax_capture_spec:
+    //         pause
+    //         jmp .Leax_capture_spec
+    //   .align 16
+    //   .Leax_call_target:
+    //         movl %eax, (%esp)  # Clobber return addr
+    //         retl
+    //
+    //   __llvm_retpoline_ecx:
+    //   ... # Same setup
+    //         movl %ecx, (%esp)
+    //         retl
+    //
+    //   __llvm_retpoline_edx:
+    //   ... # Same setup
+    //         movl %edx, (%esp)
+    //         retl
+    //
+    //   __llvm_retpoline_edi:
+    //   ... # Same setup
+    //         movl %edi, (%esp)
+    //         retl
+    if (MF.getName() == EAXThunkName)
+      populateThunk(MF, X86::EAX);
+    else if (MF.getName() == ECXThunkName)
+      populateThunk(MF, X86::ECX);
+    else if (MF.getName() == EDXThunkName)
+      populateThunk(MF, X86::EDX);
+    else if (MF.getName() == EDIThunkName)
+      populateThunk(MF, X86::EDI);
+    else
+      llvm_unreachable("Invalid thunk name on x86-32!");
+  }
+
+  return true;
+}
+
+void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
+  assert(Name.startswith(ThunkNamePrefix) &&
+         "Created a thunk with an unexpected prefix!");
+
+  LLVMContext &Ctx = M.getContext();
+  auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+  Function *F =
+      Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+  F->setVisibility(GlobalValue::HiddenVisibility);
+  F->setComdat(M.getOrInsertComdat(Name));
+
+  // Add Attributes so that we don't create a frame, unwind information, or
+  // inline.
+  AttrBuilder B;
+  B.addAttribute(llvm::Attribute::NoUnwind);
+  B.addAttribute(llvm::Attribute::Naked);
+  F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+  // Populate our function a bit so that we can verify.
+  BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+  IRBuilder<> Builder(Entry);
+
+  Builder.CreateRetVoid();
+}
+
+void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
+                                                    unsigned Reg) {
+  const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
+  const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
+  addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
+      .addReg(Reg);
+}
+
+void X86RetpolineThunks::populateThunk(MachineFunction &MF,
+                                       Optional<unsigned> Reg) {
+  // Set MF properties. We never use vregs...
+  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+
+  MachineBasicBlock *Entry = &MF.front();
+  Entry->clear();
+
+  MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+  MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+  MF.push_back(CaptureSpec);
+  MF.push_back(CallTarget);
+
+  const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+  const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+
+  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget);
+  Entry->addSuccessor(CallTarget);
+  Entry->addSuccessor(CaptureSpec);
+  CallTarget->setHasAddressTaken();
+
+  // In the capture loop for speculation, we want to stop the processor from
+  // speculating as fast as possible. On Intel processors, the PAUSE instruction
+  // will block speculation without consuming any execution resources. On AMD
+  // processors, the PAUSE instruction is (essentially) a nop, so we also use an
+  // LFENCE instruction which they have advised will stop speculation as well
+  // with minimal resource utilization. We still end the capture with a jump to
+  // form an infinite loop to fully guarantee that no matter what implementation
+  // of the x86 ISA, speculating this code path never escapes.
+  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
+  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
+  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
+  CaptureSpec->setHasAddressTaken();
+  CaptureSpec->addSuccessor(CaptureSpec);
+
+  CallTarget->setAlignment(4);
+  insertRegReturnAddrClobber(*CallTarget, *Reg);
+  BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
+}
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -315,6 +315,8 @@ void X86Subtarget::initializeEnvironment() {
  HasCLFLUSHOPT = false;
  HasCLWB = false;
  IsBTMemSlow = false;
+  UseRetpoline = false;
+  UseRetpolineExternalThunk = false;
  IsPMULLDSlow = false;
  IsSHLDSlow = false;
  IsUAMem16Slow = false;
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -297,6 +297,14 @@ protected:
  /// Processor supports Cache Line Write Back instruction
  bool HasCLWB;

+  /// Use a retpoline thunk rather than indirect calls to block speculative
+  /// execution.
+  bool UseRetpoline;
+
+  /// When using a retpoline thunk, call an externally provided thunk rather
+  /// than emitting one inside the compiler.
+  bool UseRetpolineExternalThunk;
+
  /// Use software floating point for code generation.
  bool UseSoftFloat;

@@ -506,6 +514,8 @@ public:
  bool hasPKU() const { return HasPKU; }
  bool hasMPX() const { return HasMPX; }
  bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
+  bool useRetpoline() const { return UseRetpoline; }
+  bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }

  bool isXRaySupported() const override { return is64Bit(); }

@@ -639,6 +649,10 @@ public:
  /// compiler runtime or math libraries.
  bool hasSinCos() const;

+  /// If we are using retpolines, we need to expand indirectbr to avoid it
+  /// lowering to an actual indirect jump.
+  bool enableIndirectBrExpand() const override { return useRetpoline(); }
+
  /// Enable the MachineScheduler pass for all X86 subtargets.
  bool enableMachineScheduler() const override { return true; }

--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -305,6 +305,7 @@ public:
  void addPreRegAlloc() override;
  void addPostRegAlloc() override;
  void addPreEmitPass() override;
+  void addPreEmitPass2() override;
  void addPreSched2() override;
 };

@@ -334,6 +335,11 @@ void X86PassConfig::addIRPasses() {

  if (TM->getOptLevel() != CodeGenOpt::None)
    addPass(createInterleavedAccessPass());
+
+  // Add passes that handle indirect branch removal and insertion of a retpoline
+  // thunk. These will be a no-op unless a function subtarget has the retpoline
+  // feature enabled.
+  addPass(createIndirectBrExpandPass());
 }

 bool X86PassConfig::addInstSelector() {
@@ -418,3 +424,7 @@ void X86PassConfig::addPreEmitPass() {
    addPass(createX86EvexToVexInsts());
  }
 }
+
+void X86PassConfig::addPreEmitPass2() {
+  addPass(createX86RetpolineThunksPass());
+}
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/calls.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/calls.ll
@@ -0,0 +1,188 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc  < %s -mtriple=mips-mti-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R2
+; RUN: llc  < %s -mtriple=mips-img-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R6
+; RUN: llc  < %s -mtriple=mips64-mti-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R2
+; RUN: llc  < %s -mtriple=mips64-img-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R6
+
+; RUN: llc  < %s -mtriple=mips-mti-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R2
+; RUN: llc  < %s -mtriple=mips-img-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R6
+; RUN: llc  < %s -mtriple=mips64-mti-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R2
+; RUN: llc  < %s -mtriple=mips64-img-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R6
+
+define void @fooNonTail(void (i32)* nocapture %f1) nounwind {
+; MIPS32R2-LABEL: fooNonTail:
+; MIPS32R2:       # BB#0: # %entry
+; MIPS32R2-NEXT:    addiu $sp, $sp, -24
+; MIPS32R2-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R2-NEXT:    move $1, $4
+; MIPS32R2-NEXT:    move $25, $1
+; MIPS32R2-NEXT:    jalr.hb $25
+; MIPS32R2-NEXT:    addiu $4, $zero, 13
+; MIPS32R2-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    addiu $sp, $sp, 24
+;
+; MIPS32R6-LABEL: fooNonTail:
+; MIPS32R6:       # BB#0: # %entry
+; MIPS32R6-NEXT:    addiu $sp, $sp, -24
+; MIPS32R6-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32R6-NEXT:    move $1, $4
+; MIPS32R6-NEXT:    move $25, $1
+; MIPS32R6-NEXT:    jalr.hb $25
+; MIPS32R6-NEXT:    addiu $4, $zero, 13
+; MIPS32R6-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    addiu $sp, $sp, 24
+;
+; MIPS64R2-LABEL: fooNonTail:
+; MIPS64R2:       # BB#0: # %entry
+; MIPS64R2-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R2-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R2-NEXT:    move $1, $4
+; MIPS64R2-NEXT:    move $25, $1
+; MIPS64R2-NEXT:    jalr.hb $25
+; MIPS64R2-NEXT:    daddiu $4, $zero, 13
+; MIPS64R2-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS64R6-LABEL: fooNonTail:
+; MIPS64R6:       # BB#0: # %entry
+; MIPS64R6-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R6-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS64R6-NEXT:    move $1, $4
+; MIPS64R6-NEXT:    move $25, $1
+; MIPS64R6-NEXT:    jalr.hb $25
+; MIPS64R6-NEXT:    daddiu $4, $zero, 13
+; MIPS64R6-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS64R6-NEXT:    jr $ra
+; MIPS64R6-NEXT:    daddiu $sp, $sp, 16
+;
+; PIC-MIPS32R2-LABEL: fooNonTail:
+; PIC-MIPS32R2:       # BB#0: # %entry
+; PIC-MIPS32R2-NEXT:    addiu $sp, $sp, -24
+; PIC-MIPS32R2-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; PIC-MIPS32R2-NEXT:    move $1, $4
+; PIC-MIPS32R2-NEXT:    move $25, $1
+; PIC-MIPS32R2-NEXT:    jalr.hb $25
+; PIC-MIPS32R2-NEXT:    addiu $4, $zero, 13
+; PIC-MIPS32R2-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; PIC-MIPS32R2-NEXT:    jr $ra
+; PIC-MIPS32R2-NEXT:    addiu $sp, $sp, 24
+;
+; PIC-MIPS32R6-LABEL: fooNonTail:
+; PIC-MIPS32R6:       # BB#0: # %entry
+; PIC-MIPS32R6-NEXT:    addiu $sp, $sp, -24
+; PIC-MIPS32R6-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; PIC-MIPS32R6-NEXT:    move $1, $4
+; PIC-MIPS32R6-NEXT:    move $25, $1
+; PIC-MIPS32R6-NEXT:    jalr.hb $25
+; PIC-MIPS32R6-NEXT:    addiu $4, $zero, 13
+; PIC-MIPS32R6-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; PIC-MIPS32R6-NEXT:    jr $ra
+; PIC-MIPS32R6-NEXT:    addiu $sp, $sp, 24
+;
+; PIC-MIPS64R2-LABEL: fooNonTail:
+; PIC-MIPS64R2:       # BB#0: # %entry
+; PIC-MIPS64R2-NEXT:    daddiu $sp, $sp, -16
+; PIC-MIPS64R2-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; PIC-MIPS64R2-NEXT:    move $1, $4
+; PIC-MIPS64R2-NEXT:    move $25, $1
+; PIC-MIPS64R2-NEXT:    jalr.hb $25
+; PIC-MIPS64R2-NEXT:    daddiu $4, $zero, 13
+; PIC-MIPS64R2-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; PIC-MIPS64R2-NEXT:    jr $ra
+; PIC-MIPS64R2-NEXT:    daddiu $sp, $sp, 16
+;
+; PIC-MIPS64R6-LABEL: fooNonTail:
+; PIC-MIPS64R6:       # BB#0: # %entry
+; PIC-MIPS64R6-NEXT:    daddiu $sp, $sp, -16
+; PIC-MIPS64R6-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; PIC-MIPS64R6-NEXT:    move $1, $4
+; PIC-MIPS64R6-NEXT:    move $25, $1
+; PIC-MIPS64R6-NEXT:    jalr.hb $25
+; PIC-MIPS64R6-NEXT:    daddiu $4, $zero, 13
+; PIC-MIPS64R6-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; PIC-MIPS64R6-NEXT:    jr $ra
+; PIC-MIPS64R6-NEXT:    daddiu $sp, $sp, 16
+entry:
+  call void %f1(i32 13) nounwind
+  ret void
+}
+
+define i32 @fooTail(i32 (i32)* nocapture %f1) nounwind {
+; MIPS32R2-LABEL: fooTail:
+; MIPS32R2:       # BB#0: # %entry
+; MIPS32R2-NEXT:    move $1, $4
+; MIPS32R2-NEXT:    move $25, $1
+; MIPS32R2-NEXT:    jr.hb $25
+; MIPS32R2-NEXT:    addiu $4, $zero, 14
+;
+; MIPS32R6-LABEL: fooTail:
+; MIPS32R6:       # BB#0: # %entry
+; MIPS32R6-NEXT:    move $1, $4
+; MIPS32R6-NEXT:    move $25, $1
+; MIPS32R6-NEXT:    jr.hb $25
+; MIPS32R6-NEXT:    addiu $4, $zero, 14
+;
+; MIPS64R2-LABEL: fooTail:
+; MIPS64R2:       # BB#0: # %entry
+; MIPS64R2-NEXT:    move $1, $4
+; MIPS64R2-NEXT:    move $25, $1
+; MIPS64R2-NEXT:    jr.hb $25
+; MIPS64R2-NEXT:    daddiu $4, $zero, 14
+;
+; MIPS64R6-LABEL: fooTail:
+; MIPS64R6:       # BB#0: # %entry
+; MIPS64R6-NEXT:    move $1, $4
+; MIPS64R6-NEXT:    move $25, $1
+; MIPS64R6-NEXT:    jr.hb $25
+; MIPS64R6-NEXT:    daddiu $4, $zero, 14
+;
+; PIC-MIPS32R2-LABEL: fooTail:
+; PIC-MIPS32R2:       # BB#0: # %entry
+; PIC-MIPS32R2-NEXT:    move $1, $4
+; PIC-MIPS32R2-NEXT:    move $25, $1
+; PIC-MIPS32R2-NEXT:    jr.hb $25
+; PIC-MIPS32R2-NEXT:    addiu $4, $zero, 14
+;
+; PIC-MIPS32R6-LABEL: fooTail:
+; PIC-MIPS32R6:       # BB#0: # %entry
+; PIC-MIPS32R6-NEXT:    move $1, $4
+; PIC-MIPS32R6-NEXT:    move $25, $1
+; PIC-MIPS32R6-NEXT:    jr.hb $25
+; PIC-MIPS32R6-NEXT:    addiu $4, $zero, 14
+;
+; PIC-MIPS64R2-LABEL: fooTail:
+; PIC-MIPS64R2:       # BB#0: # %entry
+; PIC-MIPS64R2-NEXT:    move $1, $4
+; PIC-MIPS64R2-NEXT:    move $25, $1
+; PIC-MIPS64R2-NEXT:    jr.hb $25
+; PIC-MIPS64R2-NEXT:    daddiu $4, $zero, 14
+;
+; PIC-MIPS64R6-LABEL: fooTail:
+; PIC-MIPS64R6:       # BB#0: # %entry
+; PIC-MIPS64R6-NEXT:    move $1, $4
+; PIC-MIPS64R6-NEXT:    move $25, $1
+; PIC-MIPS64R6-NEXT:    jr.hb $25
+; PIC-MIPS64R6-NEXT:    daddiu $4, $zero, 14
+entry:
+   %0 = tail call i32 %f1(i32 14) nounwind
+   ret i32 %0
+}
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-call.mir
@@ -0,0 +1,58 @@
+# RUN: not llc -mtriple=mips-mti-linux-gnu -mcpu=mips32r2 %s \
+# RUN:         -start-after=expand-isel-pseudos -stop-after=expand-isel-pseudos \
+# RUN:         -verify-machineinstrs -mattr=+use-indirect-jump-hazard -o - 2>&1 \
+# RUN:   | FileCheck %s
+
+# Test that calls are checked when using indirect jumps guards (hazard variant).
+
+# CHECK: Bad machine code: invalid instruction when using jump guards!
+--- |
+  define i32 @fooTail(i32 (i32)* nocapture %f1) {
+  entry:
+    %0 = tail call i32 %f1(i32 14)
+    ret i32 %0
+  }
+...
+---
+name:            fooTail
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr32, preferred-register: '' }
+  - { id: 1, class: gpr32, preferred-register: '' }
+liveins:
+  - { reg: '%a0', virtual-reg: '%0' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+stack:
+constants:
+body:             |
+  bb.0.entry:
+    liveins: %a0
+
+    %0:gpr32 = COPY %a0
+    %1:gpr32 = ADDiu %zero, 14
+    %a0 = COPY %1
+    TAILCALLREG %0, csr_o32, implicit-def dead %at, implicit %a0
+
+...
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/guards-verify-tailcall.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -mtriple=mips-mti-linux-gnu -mcpu=mips32r2 %s \
+# RUN:         -start-after=expand-isel-pseudos -stop-after=expand-isel-pseudos \
+# RUN:         -verify-machineinstrs -mattr=+use-indirect-jump-hazard -o - 2>&1 \
+# RUN:   | FileCheck %s
+
+# That that tail calls are checked when using indirect jump guards (hazard variant).
+
+# CHECK: Bad machine code: invalid instruction when using jump guards!
+--- |
+  define i32 @fooTail(i32 (i32)* nocapture %f1) {
+  entry:
+    %0 = tail call i32 %f1(i32 14)
+    ret i32 %0
+  }
+
+...
+---
+name:            fooTail
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr32, preferred-register: '' }
+  - { id: 1, class: gpr32, preferred-register: '' }
+liveins:
+  - { reg: '%a0', virtual-reg: '%0' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+stack:
+constants:
+body:             |
+  bb.0.entry:
+    liveins: %a0
+
+    %0:gpr32 = COPY %a0
+    %1:gpr32 = ADDiu %zero, 14
+    %a0 = COPY %1
+    TAILCALLREG %0, csr_o32, implicit-def dead %at, implicit %a0
+
+...
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
@@ -0,0 +1,657 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc  < %s -mtriple=mips-mti-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R2
+; RUN: llc  < %s -mtriple=mips-img-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS32R6
+; RUN: llc  < %s -mtriple=mips64-mti-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R2
+; RUN: llc  < %s -mtriple=mips64-img-linux-gnu -relocation-model=static \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=MIPS64R6
+
+; RUN: llc  < %s -mtriple=mips-mti-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R2
+; RUN: llc  < %s -mtriple=mips-img-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips32r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS32R6
+; RUN: llc  < %s -mtriple=mips64-mti-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r2 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R2
+; RUN: llc  < %s -mtriple=mips64-img-linux-gnu -relocation-model=pic \
+; RUN:   -mips-tail-calls=1 -mcpu=mips64r6 -mattr=+use-indirect-jump-hazard \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=PIC-MIPS64R6
+
+@.str = private unnamed_addr constant [2 x i8] c"A\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"B\00", align 1
+@.str.2 = private unnamed_addr constant [2 x i8] c"C\00", align 1
+@.str.3 = private unnamed_addr constant [2 x i8] c"D\00", align 1
+@.str.4 = private unnamed_addr constant [2 x i8] c"E\00", align 1
+@.str.5 = private unnamed_addr constant [2 x i8] c"F\00", align 1
+@.str.6 = private unnamed_addr constant [2 x i8] c"G\00", align 1
+@.str.7 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define i8* @_Z3fooi(i32 signext %Letter) {
+; MIPS32R2-LABEL: _Z3fooi:
+; MIPS32R2:       # BB#0: # %entry
+; MIPS32R2-NEXT:    addiu $sp, $sp, -16
+; MIPS32R2-NEXT:  $cfi0:
+; MIPS32R2-NEXT:    .cfi_def_cfa_offset 16
+; MIPS32R2-NEXT:    sltiu $1, $4, 7
+; MIPS32R2-NEXT:    beqz $1, $BB0_3
+; MIPS32R2-NEXT:    sw $4, 4($sp)
+; MIPS32R2-NEXT:  $BB0_1: # %entry
+; MIPS32R2-NEXT:    sll $1, $4, 2
+; MIPS32R2-NEXT:    lui $2, %hi($JTI0_0)
+; MIPS32R2-NEXT:    addu $1, $1, $2
+; MIPS32R2-NEXT:    lw $1, %lo($JTI0_0)($1)
+; MIPS32R2-NEXT:    jr.hb $1
+; MIPS32R2-NEXT:    nop
+; MIPS32R2-NEXT:  $BB0_2: # %sw.bb
+; MIPS32R2-NEXT:    lui $1, %hi($.str)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_3: # %sw.epilog
+; MIPS32R2-NEXT:    lui $1, %hi($.str.7)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.7)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_4: # %sw.bb1
+; MIPS32R2-NEXT:    lui $1, %hi($.str.1)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.1)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_5: # %sw.bb2
+; MIPS32R2-NEXT:    lui $1, %hi($.str.2)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.2)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_6: # %sw.bb3
+; MIPS32R2-NEXT:    lui $1, %hi($.str.3)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.3)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_7: # %sw.bb4
+; MIPS32R2-NEXT:    lui $1, %hi($.str.4)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.4)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_8: # %sw.bb5
+; MIPS32R2-NEXT:    lui $1, %hi($.str.5)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.5)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_9: # %sw.bb6
+; MIPS32R2-NEXT:    lui $1, %hi($.str.6)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.6)
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_10: # %return
+; MIPS32R2-NEXT:    lw $2, 8($sp)
+; MIPS32R2-NEXT:    jr $ra
+; MIPS32R2-NEXT:    addiu $sp, $sp, 16
+;
+; MIPS32R6-LABEL: _Z3fooi:
+; MIPS32R6:       # BB#0: # %entry
+; MIPS32R6-NEXT:    addiu $sp, $sp, -16
+; MIPS32R6-NEXT:  $cfi0:
+; MIPS32R6-NEXT:    .cfi_def_cfa_offset 16
+; MIPS32R6-NEXT:    sltiu $1, $4, 7
+; MIPS32R6-NEXT:    beqz $1, $BB0_3
+; MIPS32R6-NEXT:    sw $4, 4($sp)
+; MIPS32R6-NEXT:  $BB0_1: # %entry
+; MIPS32R6-NEXT:    sll $1, $4, 2
+; MIPS32R6-NEXT:    lui $2, %hi($JTI0_0)
+; MIPS32R6-NEXT:    addu $1, $1, $2
+; MIPS32R6-NEXT:    lw $1, %lo($JTI0_0)($1)
+; MIPS32R6-NEXT:    jr.hb $1
+; MIPS32R6-NEXT:    nop
+; MIPS32R6-NEXT:  $BB0_2: # %sw.bb
+; MIPS32R6-NEXT:    lui $1, %hi($.str)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_3: # %sw.epilog
+; MIPS32R6-NEXT:    lui $1, %hi($.str.7)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.7)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_4: # %sw.bb1
+; MIPS32R6-NEXT:    lui $1, %hi($.str.1)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.1)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_5: # %sw.bb2
+; MIPS32R6-NEXT:    lui $1, %hi($.str.2)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.2)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_6: # %sw.bb3
+; MIPS32R6-NEXT:    lui $1, %hi($.str.3)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.3)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_7: # %sw.bb4
+; MIPS32R6-NEXT:    lui $1, %hi($.str.4)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.4)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_8: # %sw.bb5
+; MIPS32R6-NEXT:    lui $1, %hi($.str.5)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.5)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_9: # %sw.bb6
+; MIPS32R6-NEXT:    lui $1, %hi($.str.6)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.6)
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_10: # %return
+; MIPS32R6-NEXT:    lw $2, 8($sp)
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    addiu $sp, $sp, 16
+;
+; MIPS64R2-LABEL: _Z3fooi:
+; MIPS64R2:       # BB#0: # %entry
+; MIPS64R2-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R2-NEXT:  .Lcfi0:
+; MIPS64R2-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R2-NEXT:    sw $4, 4($sp)
+; MIPS64R2-NEXT:    lwu $2, 4($sp)
+; MIPS64R2-NEXT:    sltiu $1, $2, 7
+; MIPS64R2-NEXT:    beqz $1, .LBB0_3
+; MIPS64R2-NEXT:    nop
+; MIPS64R2-NEXT:  .LBB0_1: # %entry
+; MIPS64R2-NEXT:    daddiu $1, $zero, 8
+; MIPS64R2-NEXT:    dmult $2, $1
+; MIPS64R2-NEXT:    mflo $1
+; MIPS64R2-NEXT:    lui $2, %highest(.LJTI0_0)
+; MIPS64R2-NEXT:    daddiu $2, $2, %higher(.LJTI0_0)
+; MIPS64R2-NEXT:    dsll $2, $2, 16
+; MIPS64R2-NEXT:    daddiu $2, $2, %hi(.LJTI0_0)
+; MIPS64R2-NEXT:    dsll $2, $2, 16
+; MIPS64R2-NEXT:    daddu $1, $1, $2
+; MIPS64R2-NEXT:    ld $1, %lo(.LJTI0_0)($1)
+; MIPS64R2-NEXT:    jr.hb $1
+; MIPS64R2-NEXT:    nop
+; MIPS64R2-NEXT:  .LBB0_2: # %sw.bb
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_3: # %sw.epilog
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.7)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.7)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.7)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.7)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_4: # %sw.bb1
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.1)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.1)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.1)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.1)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_5: # %sw.bb2
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.2)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.2)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.2)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.2)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_6: # %sw.bb3
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.3)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.3)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.3)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.3)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_7: # %sw.bb4
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.4)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.4)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.4)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.4)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_8: # %sw.bb5
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.5)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.5)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.5)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.5)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_9: # %sw.bb6
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.6)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.6)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.6)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.6)
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_10: # %return
+; MIPS64R2-NEXT:    ld $2, 8($sp)
+; MIPS64R2-NEXT:    jr $ra
+; MIPS64R2-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPS64R6-LABEL: _Z3fooi:
+; MIPS64R6:       # BB#0: # %entry
+; MIPS64R6-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R6-NEXT:  .Lcfi0:
+; MIPS64R6-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R6-NEXT:    sw $4, 4($sp)
+; MIPS64R6-NEXT:    lwu $2, 4($sp)
+; MIPS64R6-NEXT:    sltiu $1, $2, 7
+; MIPS64R6-NEXT:    beqzc $1, .LBB0_3
+; MIPS64R6-NEXT:  .LBB0_1: # %entry
+; MIPS64R6-NEXT:    dsll $1, $2, 3
+; MIPS64R6-NEXT:    lui $2, %highest(.LJTI0_0)
+; MIPS64R6-NEXT:    daddiu $2, $2, %higher(.LJTI0_0)
+; MIPS64R6-NEXT:    dsll $2, $2, 16
+; MIPS64R6-NEXT:    daddiu $2, $2, %hi(.LJTI0_0)
+; MIPS64R6-NEXT:    dsll $2, $2, 16
+; MIPS64R6-NEXT:    daddu $1, $1, $2
+; MIPS64R6-NEXT:    ld $1, %lo(.LJTI0_0)($1)
+; MIPS64R6-NEXT:    jr.hb $1
+; MIPS64R6-NEXT:    nop
+; MIPS64R6-NEXT:  .LBB0_2: # %sw.bb
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_3: # %sw.epilog
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.7)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.7)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.7)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.7)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_4: # %sw.bb1
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.1)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.1)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.1)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.1)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_5: # %sw.bb2
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.2)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.2)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.2)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.2)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_6: # %sw.bb3
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.3)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.3)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.3)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.3)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_7: # %sw.bb4
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.4)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.4)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.4)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.4)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_8: # %sw.bb5
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.5)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.5)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.5)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.5)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_9: # %sw.bb6
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.6)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.6)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.6)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.6)
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_10: # %return
+; MIPS64R6-NEXT:    ld $2, 8($sp)
+; MIPS64R6-NEXT:    jr $ra
+; MIPS64R6-NEXT:    daddiu $sp, $sp, 16
+;
+; PIC-MIPS32R2-LABEL: _Z3fooi:
+; PIC-MIPS32R2:       # BB#0: # %entry
+; PIC-MIPS32R2-NEXT:    lui $2, %hi(_gp_disp)
+; PIC-MIPS32R2-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; PIC-MIPS32R2-NEXT:    addiu $sp, $sp, -16
+; PIC-MIPS32R2-NEXT:  $cfi0:
+; PIC-MIPS32R2-NEXT:    .cfi_def_cfa_offset 16
+; PIC-MIPS32R2-NEXT:    addu $2, $2, $25
+; PIC-MIPS32R2-NEXT:    sltiu $1, $4, 7
+; PIC-MIPS32R2-NEXT:    beqz $1, $BB0_3
+; PIC-MIPS32R2-NEXT:    sw $4, 4($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_1: # %entry
+; PIC-MIPS32R2-NEXT:    sll $1, $4, 2
+; PIC-MIPS32R2-NEXT:    lw $3, %got($JTI0_0)($2)
+; PIC-MIPS32R2-NEXT:    addu $1, $1, $3
+; PIC-MIPS32R2-NEXT:    lw $1, %lo($JTI0_0)($1)
+; PIC-MIPS32R2-NEXT:    addu $1, $1, $2
+; PIC-MIPS32R2-NEXT:    jr.hb $1
+; PIC-MIPS32R2-NEXT:    nop
+; PIC-MIPS32R2-NEXT:  $BB0_2: # %sw.bb
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_3: # %sw.epilog
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.7)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.7)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_4: # %sw.bb1
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.1)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.1)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_5: # %sw.bb2
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.2)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.2)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_6: # %sw.bb3
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.3)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.3)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_7: # %sw.bb4
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.4)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.4)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_8: # %sw.bb5
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.5)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.5)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_9: # %sw.bb6
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.6)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.6)
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_10: # %return
+; PIC-MIPS32R2-NEXT:    lw $2, 8($sp)
+; PIC-MIPS32R2-NEXT:    jr $ra
+; PIC-MIPS32R2-NEXT:    addiu $sp, $sp, 16
+;
+; PIC-MIPS32R6-LABEL: _Z3fooi:
+; PIC-MIPS32R6:       # BB#0: # %entry
+; PIC-MIPS32R6-NEXT:    lui $2, %hi(_gp_disp)
+; PIC-MIPS32R6-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; PIC-MIPS32R6-NEXT:    addiu $sp, $sp, -16
+; PIC-MIPS32R6-NEXT:  $cfi0:
+; PIC-MIPS32R6-NEXT:    .cfi_def_cfa_offset 16
+; PIC-MIPS32R6-NEXT:    addu $2, $2, $25
+; PIC-MIPS32R6-NEXT:    sltiu $1, $4, 7
+; PIC-MIPS32R6-NEXT:    beqz $1, $BB0_3
+; PIC-MIPS32R6-NEXT:    sw $4, 4($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_1: # %entry
+; PIC-MIPS32R6-NEXT:    sll $1, $4, 2
+; PIC-MIPS32R6-NEXT:    lw $3, %got($JTI0_0)($2)
+; PIC-MIPS32R6-NEXT:    addu $1, $1, $3
+; PIC-MIPS32R6-NEXT:    lw $1, %lo($JTI0_0)($1)
+; PIC-MIPS32R6-NEXT:    addu $1, $1, $2
+; PIC-MIPS32R6-NEXT:    jr.hb $1
+; PIC-MIPS32R6-NEXT:    nop
+; PIC-MIPS32R6-NEXT:  $BB0_2: # %sw.bb
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_3: # %sw.epilog
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.7)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.7)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_4: # %sw.bb1
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.1)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.1)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_5: # %sw.bb2
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.2)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.2)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_6: # %sw.bb3
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.3)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.3)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_7: # %sw.bb4
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.4)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.4)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_8: # %sw.bb5
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.5)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.5)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_9: # %sw.bb6
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.6)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.6)
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_10: # %return
+; PIC-MIPS32R6-NEXT:    lw $2, 8($sp)
+; PIC-MIPS32R6-NEXT:    jr $ra
+; PIC-MIPS32R6-NEXT:    addiu $sp, $sp, 16
+;
+; PIC-MIPS64R2-LABEL: _Z3fooi:
+; PIC-MIPS64R2:       # BB#0: # %entry
+; PIC-MIPS64R2-NEXT:    daddiu $sp, $sp, -16
+; PIC-MIPS64R2-NEXT:  .Lcfi0:
+; PIC-MIPS64R2-NEXT:    .cfi_def_cfa_offset 16
+; PIC-MIPS64R2-NEXT:    lui $1, %hi(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R2-NEXT:    daddu $1, $1, $25
+; PIC-MIPS64R2-NEXT:    daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R2-NEXT:    sw $4, 4($sp)
+; PIC-MIPS64R2-NEXT:    lwu $3, 4($sp)
+; PIC-MIPS64R2-NEXT:    sltiu $1, $3, 7
+; PIC-MIPS64R2-NEXT:    beqz $1, .LBB0_3
+; PIC-MIPS64R2-NEXT:    nop
+; PIC-MIPS64R2-NEXT:  .LBB0_1: # %entry
+; PIC-MIPS64R2-NEXT:    daddiu $1, $zero, 8
+; PIC-MIPS64R2-NEXT:    dmult $3, $1
+; PIC-MIPS64R2-NEXT:    mflo $1
+; PIC-MIPS64R2-NEXT:    ld $3, %got_page(.LJTI0_0)($2)
+; PIC-MIPS64R2-NEXT:    daddu $1, $1, $3
+; PIC-MIPS64R2-NEXT:    ld $1, %got_ofst(.LJTI0_0)($1)
+; PIC-MIPS64R2-NEXT:    daddu $1, $1, $2
+; PIC-MIPS64R2-NEXT:    jr.hb $1
+; PIC-MIPS64R2-NEXT:    nop
+; PIC-MIPS64R2-NEXT:  .LBB0_2: # %sw.bb
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_3: # %sw.epilog
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.7)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.7)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_4: # %sw.bb1
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.1)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.1)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_5: # %sw.bb2
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.2)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.2)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_6: # %sw.bb3
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.3)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.3)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_7: # %sw.bb4
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.4)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.4)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_8: # %sw.bb5
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.5)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.5)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_9: # %sw.bb6
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.6)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.6)
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_10: # %return
+; PIC-MIPS64R2-NEXT:    ld $2, 8($sp)
+; PIC-MIPS64R2-NEXT:    jr $ra
+; PIC-MIPS64R2-NEXT:    daddiu $sp, $sp, 16
+;
+; PIC-MIPS64R6-LABEL: _Z3fooi:
+; PIC-MIPS64R6:       # BB#0: # %entry
+; PIC-MIPS64R6-NEXT:    daddiu $sp, $sp, -16
+; PIC-MIPS64R6-NEXT:  .Lcfi0:
+; PIC-MIPS64R6-NEXT:    .cfi_def_cfa_offset 16
+; PIC-MIPS64R6-NEXT:    lui $1, %hi(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R6-NEXT:    daddu $1, $1, $25
+; PIC-MIPS64R6-NEXT:    daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
+; PIC-MIPS64R6-NEXT:    sw $4, 4($sp)
+; PIC-MIPS64R6-NEXT:    lwu $3, 4($sp)
+; PIC-MIPS64R6-NEXT:    sltiu $1, $3, 7
+; PIC-MIPS64R6-NEXT:    beqzc $1, .LBB0_3
+; PIC-MIPS64R6-NEXT:  .LBB0_1: # %entry
+; PIC-MIPS64R6-NEXT:    dsll $1, $3, 3
+; PIC-MIPS64R6-NEXT:    ld $3, %got_page(.LJTI0_0)($2)
+; PIC-MIPS64R6-NEXT:    daddu $1, $1, $3
+; PIC-MIPS64R6-NEXT:    ld $1, %got_ofst(.LJTI0_0)($1)
+; PIC-MIPS64R6-NEXT:    daddu $1, $1, $2
+; PIC-MIPS64R6-NEXT:    jr.hb $1
+; PIC-MIPS64R6-NEXT:    nop
+; PIC-MIPS64R6-NEXT:  .LBB0_2: # %sw.bb
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_3: # %sw.epilog
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.7)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.7)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_4: # %sw.bb1
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.1)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.1)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_5: # %sw.bb2
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.2)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.2)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_6: # %sw.bb3
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.3)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.3)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_7: # %sw.bb4
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.4)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.4)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_8: # %sw.bb5
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.5)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.5)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_9: # %sw.bb6
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.6)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.6)
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_10: # %return
+; PIC-MIPS64R6-NEXT:    ld $2, 8($sp)
+; PIC-MIPS64R6-NEXT:    jr $ra
+; PIC-MIPS64R6-NEXT:    daddiu $sp, $sp, 16
+entry:
+  %retval = alloca i8*, align 8
+  %Letter.addr = alloca i32, align 4
+  store i32 %Letter, i32* %Letter.addr, align 4
+  %0 = load i32, i32* %Letter.addr, align 4
+  switch i32 %0, label %sw.epilog [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb4
+    i32 5, label %sw.bb5
+    i32 6, label %sw.bb6
+  ]
+
+sw.bb:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.bb1:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.bb2:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.bb3:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.bb4:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.bb5:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.5, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.bb6:
+  store i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.6, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+sw.epilog:
+  store i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str.7, i32 0, i32 0), i8** %retval, align 8
+  br label %return
+
+return:
+  %1 = load i8*, i8** %retval, align 8
+  ret i8* %1
+}
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+;       Except for the NACL version which isn't parsed by update_llc_test_checks.py
+
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -force-mips-long-branch -O3 \
+; RUN:   -mcpu=mips32r2 -mattr=+use-indirect-jump-hazard -relocation-model=pic \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=O32-PIC
+
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r6 \
+; RUN:   -force-mips-long-branch -O3 -mattr=+use-indirect-jump-hazard  \
+; RUN:   -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=O32-R6-PIC
+
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 -target-abi=n64 \
+; RUN:   -force-mips-long-branch -O3 -relocation-model=pic \
+; RUN:   -mattr=+use-indirect-jump-hazard -verify-machineinstrs \
+; RUN:   < %s | FileCheck %s -check-prefix=MIPS64
+
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r6 -target-abi=n64 \
+; RUN:   -force-mips-long-branch -O3 -mattr=+use-indirect-jump-hazard \
+; RUN:   -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=N64-R6
+
+; Test that the long branches also get changed to their hazard variants.
+
+@x = external global i32
+
+define void @test1(i32 signext %s) {
+; O32-PIC-LABEL: test1:
+; O32-PIC:       # BB#0: # %entry
+; O32-PIC-NEXT:    lui $2, %hi(_gp_disp)
+; O32-PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-PIC-NEXT:    bnez $4, $BB0_3
+; O32-PIC-NEXT:    addu $2, $2, $25
+; O32-PIC-NEXT:  # BB#1: # %entry
+; O32-PIC-NEXT:    addiu $sp, $sp, -8
+; O32-PIC-NEXT:    sw $ra, 0($sp)
+; O32-PIC-NEXT:    lui $1, %hi(($BB0_4)-($BB0_2))
+; O32-PIC-NEXT:    bal $BB0_2
+; O32-PIC-NEXT:    addiu $1, $1, %lo(($BB0_4)-($BB0_2))
+; O32-PIC-NEXT:  $BB0_2: # %entry
+; O32-PIC-NEXT:    addu $1, $ra, $1
+; O32-PIC-NEXT:    lw $ra, 0($sp)
+; O32-PIC-NEXT:    jr.hb $1
+; O32-PIC-NEXT:    addiu $sp, $sp, 8
+; O32-PIC-NEXT:  $BB0_3: # %then
+; O32-PIC-NEXT:    lw $1, %got(x)($2)
+; O32-PIC-NEXT:    addiu $2, $zero, 1
+; O32-PIC-NEXT:    sw $2, 0($1)
+; O32-PIC-NEXT:  $BB0_4: # %end
+; O32-PIC-NEXT:    jr $ra
+; O32-PIC-NEXT:    nop
+;
+; O32-R6-PIC-LABEL: test1:
+; O32-R6-PIC:       # BB#0: # %entry
+; O32-R6-PIC-NEXT:    lui $2, %hi(_gp_disp)
+; O32-R6-PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; O32-R6-PIC-NEXT:    bnez $4, $BB0_3
+; O32-R6-PIC-NEXT:    addu $2, $2, $25
+; O32-R6-PIC-NEXT:  # BB#1: # %entry
+; O32-R6-PIC-NEXT:    addiu $sp, $sp, -8
+; O32-R6-PIC-NEXT:    sw $ra, 0($sp)
+; O32-R6-PIC-NEXT:    lui $1, %hi(($BB0_4)-($BB0_2))
+; O32-R6-PIC-NEXT:    bal $BB0_2
+; O32-R6-PIC-NEXT:    addiu $1, $1, %lo(($BB0_4)-($BB0_2))
+; O32-R6-PIC-NEXT:  $BB0_2: # %entry
+; O32-R6-PIC-NEXT:    addu $1, $ra, $1
+; O32-R6-PIC-NEXT:    lw $ra, 0($sp)
+; O32-R6-PIC-NEXT:    jr.hb $1
+; O32-R6-PIC-NEXT:    addiu $sp, $sp, 8
+; O32-R6-PIC-NEXT:  $BB0_3: # %then
+; O32-R6-PIC-NEXT:    lw $1, %got(x)($2)
+; O32-R6-PIC-NEXT:    addiu $2, $zero, 1
+; O32-R6-PIC-NEXT:    sw $2, 0($1)
+; O32-R6-PIC-NEXT:  $BB0_4: # %end
+; O32-R6-PIC-NEXT:    jrc $ra
+;
+; MIPS64-LABEL: test1:
+; MIPS64:       # BB#0: # %entry
+; MIPS64-NEXT:    lui $1, %hi(%neg(%gp_rel(test1)))
+; MIPS64-NEXT:    bnez $4, .LBB0_3
+; MIPS64-NEXT:    daddu $2, $1, $25
+; MIPS64-NEXT:  # BB#1: # %entry
+; MIPS64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64-NEXT:    sd $ra, 0($sp)
+; MIPS64-NEXT:    daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2)
+; MIPS64-NEXT:    dsll $1, $1, 16
+; MIPS64-NEXT:    bal .LBB0_2
+; MIPS64-NEXT:    daddiu $1, $1, %lo(.LBB0_4-.LBB0_2)
+; MIPS64-NEXT:  .LBB0_2: # %entry
+; MIPS64-NEXT:    daddu $1, $ra, $1
+; MIPS64-NEXT:    ld $ra, 0($sp)
+; MIPS64-NEXT:    jr.hb $1
+; MIPS64-NEXT:    daddiu $sp, $sp, 16
+; MIPS64-NEXT:  .LBB0_3: # %then
+; MIPS64-NEXT:    daddiu $1, $2, %lo(%neg(%gp_rel(test1)))
+; MIPS64-NEXT:    addiu $2, $zero, 1
+; MIPS64-NEXT:    ld $1, %got_disp(x)($1)
+; MIPS64-NEXT:    sw $2, 0($1)
+; MIPS64-NEXT:  .LBB0_4: # %end
+; MIPS64-NEXT:    jr $ra
+; MIPS64-NEXT:    nop
+;
+; N64-R6-LABEL: test1:
+; N64-R6:       # BB#0: # %entry
+; N64-R6-NEXT:    lui $1, %hi(%neg(%gp_rel(test1)))
+; N64-R6-NEXT:    bnez $4, .LBB0_3
+; N64-R6-NEXT:    daddu $2, $1, $25
+; N64-R6-NEXT:  # BB#1: # %entry
+; N64-R6-NEXT:    daddiu $sp, $sp, -16
+; N64-R6-NEXT:    sd $ra, 0($sp)
+; N64-R6-NEXT:    daddiu $1, $zero, %hi(.LBB0_4-.LBB0_2)
+; N64-R6-NEXT:    dsll $1, $1, 16
+; N64-R6-NEXT:    bal .LBB0_2
+; N64-R6-NEXT:    daddiu $1, $1, %lo(.LBB0_4-.LBB0_2)
+; N64-R6-NEXT:  .LBB0_2: # %entry
+; N64-R6-NEXT:    daddu $1, $ra, $1
+; N64-R6-NEXT:    ld $ra, 0($sp)
+; N64-R6-NEXT:    jr.hb $1
+; N64-R6-NEXT:    daddiu $sp, $sp, 16
+; N64-R6-NEXT:  .LBB0_3: # %then
+; N64-R6-NEXT:    daddiu $1, $2, %lo(%neg(%gp_rel(test1)))
+; N64-R6-NEXT:    addiu $2, $zero, 1
+; N64-R6-NEXT:    ld $1, %got_disp(x)($1)
+; N64-R6-NEXT:    sw $2, 0($1)
+; N64-R6-NEXT:  .LBB0_4: # %end
+; N64-R6-NEXT:    jrc $ra
+entry:
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %end, label %then
+
+then:
+  store i32 1, i32* @x, align 4
+  br label %end
+
+end:
+  ret void
+
+}
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-calls.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=mips-unknwon-linux-gnu -mcpu=mips32r2 \
+; RUN:   -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
+; RUN:   -verify-machineinstrs | FileCheck -check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n32 \
+; RUN:   -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
+; RUN:   -verify-machineinstrs | FileCheck -check-prefix=N32 %s
+
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n64 \
+; RUN:   -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
+; RUN:   -verify-machineinstrs | FileCheck -check-prefix=N64 %s
+
+declare void @callee()
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)
+
+@val = internal unnamed_addr global [20 x i32] zeroinitializer, align 4
+
+; Test that the long call sequence uses the hazard barrier instruction variant.
+define void @caller() {
+; O32-LABEL: caller:
+; O32:       # BB#0:
+; O32-NEXT:    addiu $sp, $sp, -24
+; O32-NEXT:  $cfi0:
+; O32-NEXT:    .cfi_def_cfa_offset 24
+; O32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; O32-NEXT:  $cfi1:
+; O32-NEXT:    .cfi_offset 31, -4
+; O32-NEXT:    lui $1, %hi(callee)
+; O32-NEXT:    addiu $25, $1, %lo(callee)
+; O32-NEXT:    jalr.hb $25
+; O32-NEXT:    nop
+; O32-NEXT:    lui $1, %hi(val)
+; O32-NEXT:    addiu $1, $1, %lo(val)
+; O32-NEXT:    lui $2, 20560
+; O32-NEXT:    ori $2, $2, 20560
+; O32-NEXT:    sw $2, 96($1)
+; O32-NEXT:    sw $2, 92($1)
+; O32-NEXT:    sw $2, 88($1)
+; O32-NEXT:    sw $2, 84($1)
+; O32-NEXT:    sw $2, 80($1)
+; O32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; O32-NEXT:    jr $ra
+; O32-NEXT:    addiu $sp, $sp, 24
+;
+; N32-LABEL: caller:
+; N32:       # BB#0:
+; N32-NEXT:    addiu $sp, $sp, -16
+; N32-NEXT:  .Lcfi0:
+; N32-NEXT:    .cfi_def_cfa_offset 16
+; N32-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; N32-NEXT:  .Lcfi1:
+; N32-NEXT:    .cfi_offset 31, -8
+; N32-NEXT:    lui $1, %hi(callee)
+; N32-NEXT:    addiu $25, $1, %lo(callee)
+; N32-NEXT:    jalr.hb $25
+; N32-NEXT:    nop
+; N32-NEXT:    lui $1, %hi(val)
+; N32-NEXT:    addiu $1, $1, %lo(val)
+; N32-NEXT:    lui $2, 1285
+; N32-NEXT:    daddiu $2, $2, 1285
+; N32-NEXT:    dsll $2, $2, 16
+; N32-NEXT:    daddiu $2, $2, 1285
+; N32-NEXT:    dsll $2, $2, 20
+; N32-NEXT:    daddiu $2, $2, 20560
+; N32-NEXT:    sdl $2, 88($1)
+; N32-NEXT:    sdl $2, 80($1)
+; N32-NEXT:    lui $3, 20560
+; N32-NEXT:    ori $3, $3, 20560
+; N32-NEXT:    sw $3, 96($1)
+; N32-NEXT:    sdr $2, 95($1)
+; N32-NEXT:    sdr $2, 87($1)
+; N32-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; N32-NEXT:    jr $ra
+; N32-NEXT:    addiu $sp, $sp, 16
+;
+; N64-LABEL: caller:
+; N64:       # BB#0:
+; N64-NEXT:    daddiu $sp, $sp, -16
+; N64-NEXT:  .Lcfi0:
+; N64-NEXT:    .cfi_def_cfa_offset 16
+; N64-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
+; N64-NEXT:  .Lcfi1:
+; N64-NEXT:    .cfi_offset 31, -8
+; N64-NEXT:    lui $1, %highest(callee)
+; N64-NEXT:    daddiu $1, $1, %higher(callee)
+; N64-NEXT:    dsll $1, $1, 16
+; N64-NEXT:    daddiu $1, $1, %hi(callee)
+; N64-NEXT:    dsll $1, $1, 16
+; N64-NEXT:    daddiu $25, $1, %lo(callee)
+; N64-NEXT:    jalr.hb $25
+; N64-NEXT:    nop
+; N64-NEXT:    lui $1, %highest(val)
+; N64-NEXT:    daddiu $1, $1, %higher(val)
+; N64-NEXT:    dsll $1, $1, 16
+; N64-NEXT:    daddiu $1, $1, %hi(val)
+; N64-NEXT:    dsll $1, $1, 16
+; N64-NEXT:    daddiu $1, $1, %lo(val)
+; N64-NEXT:    lui $2, 1285
+; N64-NEXT:    daddiu $2, $2, 1285
+; N64-NEXT:    dsll $2, $2, 16
+; N64-NEXT:    daddiu $2, $2, 1285
+; N64-NEXT:    dsll $2, $2, 20
+; N64-NEXT:    daddiu $2, $2, 20560
+; N64-NEXT:    lui $3, 20560
+; N64-NEXT:    sdl $2, 88($1)
+; N64-NEXT:    sdl $2, 80($1)
+; N64-NEXT:    ori $3, $3, 20560
+; N64-NEXT:    sw $3, 96($1)
+; N64-NEXT:    sdr $2, 95($1)
+; N64-NEXT:    sdr $2, 87($1)
+; N64-NEXT:    ld $ra, 8($sp) # 8-byte Folded Reload
+; N64-NEXT:    jr $ra
+; N64-NEXT:    daddiu $sp, $sp, 16
+  call void @callee()
+  call void @llvm.memset.p0i8.i32(i8* bitcast (i32* getelementptr inbounds ([20 x i32], [20 x i32]* @val, i64 1, i32 0) to i8*), i8 80, i32 20, i32 4, i1 false)
+  ret  void
+}
+
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/unsupported-micromips.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=mips-unknown-linux -mcpu=mips32r2 -mattr=+micromips,+use-indirect-jump-hazard %s 2>&1 | FileCheck %s
+
+; Test that microMIPS and indirect jump with hazard barriers is not supported.
+
+; CHECK: LLVM ERROR: cannot combine indirect jumps with hazard barriers and microMIPS
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/unsupported-mips32.ll
@@ -0,0 +1,5 @@
+; RUN: not llc -mtriple=mips-unknown-linux -mcpu=mips32 -mattr=+use-indirect-jump-hazard %s 2>&1 | FileCheck %s
+
+; Test that mips32 and indirect jump with hazard barriers is not supported.
+
+; CHECK: LLVM ERROR: indirect jumps with hazard barriers requires MIPS32R2 or later
--- a/llvm/test/CodeGen/Mips/tailcall/tailcall.ll
+++ b/llvm/test/CodeGen/Mips/tailcall/tailcall.ll
@@ -163,7 +163,7 @@ entry:
 ; STATIC32: j
 ; PIC64: jr $25
 ; PIC64R6: jrc $25
-; PIC64R6MM: jr $25
+; PIC64R6MM: jrc16 $25
 ; STATIC64: j
 ; PIC16: jalrc

--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -25,6 +25,7 @@
 ; CHECK-NEXT:       Inserts calls to mcount-like functions
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
+; CHECK-NEXT:       Expand indirectbr instructions
 ; CHECK-NEXT:     Rewrite Symbols
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Dominator Tree Construction
@@ -55,6 +56,7 @@
 ; CHECK-NEXT:       Machine Natural Loop Construction
 ; CHECK-NEXT:       Insert XRay ops
 ; CHECK-NEXT:       Implement the 'patchable-function' attribute
+; CHECK-NEXT:       X86 Retpoline Thunks
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       MachineDominator Tree Construction
--- a/llvm/test/CodeGen/X86/inline-asm-modifier-V.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-modifier-V.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s
+
+; If the target does not have 64-bit integer registers, emit 32-bit register
+; names.
+
+; X86: call __x86_indirect_thunk_e{{[abcd]}}x
+; X64: call __x86_indirect_thunk_r
+
+define void @q_modifier(i32* %p) {
+entry:
+  tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
+  ret void
+}
--- a/llvm/test/CodeGen/X86/retpoline-external.ll
+++ b/llvm/test/CodeGen/X86/retpoline-external.ll
@@ -0,0 +1,166 @@
+; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
+; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
+
+; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
+; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:       movl %[[x]], %edi
+; X64:       callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64:       movl %[[x]], %edi
+; X64:       callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:       callq bar
+; X64FAST:       callq __x86_indirect_thunk_r11
+; X64FAST:       callq bar
+; X64FAST:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X86-LABEL: icall_reg:
+; X86-DAG:   movl 12(%esp), %[[fp:[^ ]*]]
+; X86-DAG:   movl 16(%esp), %[[x:[^ ]*]]
+; X86:       pushl %[[x]]
+; X86:       calll bar
+; X86:       movl %[[fp]], %eax
+; X86:       pushl %[[x]]
+; X86:       calll __x86_indirect_thunk_eax
+; X86:       pushl %[[x]]
+; X86:       calll bar
+; X86:       movl %[[fp]], %eax
+; X86:       pushl %[[x]]
+; X86:       calll __x86_indirect_thunk_eax
+; X86-NOT:   # TAILCALL
+
+; X86FAST-LABEL: icall_reg:
+; X86FAST:       calll bar
+; X86FAST:       calll __x86_indirect_thunk_eax
+; X86FAST:       calll bar
+; X86FAST:       calll __x86_indirect_thunk_eax
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:       movq global_fp(%rip), %r11
+; X64FAST:       callq __x86_indirect_thunk_r11
+; X64FAST:       movq global_fp(%rip), %r11
+; X64FAST:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X86-LABEL: icall_global_fp:
+; X86:       movl global_fp, %eax
+; X86:       pushl 4(%esp)
+; X86:       calll __x86_indirect_thunk_eax
+; X86:       addl $4, %esp
+; X86:       movl global_fp, %eax
+; X86:       jmp __x86_indirect_thunk_eax # TAILCALL
+
+; X86FAST-LABEL: icall_global_fp:
+; X86FAST:       calll __x86_indirect_thunk_eax
+; X86FAST:       jmp __x86_indirect_thunk_eax # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:       movq %rdi, %[[obj:[^ ]*]]
+; X64:       movq (%[[obj]]), %[[vptr:[^ ]*]]
+; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:       movq %[[fp]], %r11
+; X64:       callq __x86_indirect_thunk_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:       callq __x86_indirect_thunk_r11
+; X64FAST:       jmp __x86_indirect_thunk_r11 # TAILCALL
+
+; X86-LABEL: vcall:
+; X86:       movl 8(%esp), %[[obj:[^ ]*]]
+; X86:       movl (%[[obj]]), %[[vptr:[^ ]*]]
+; X86:       movl 4(%[[vptr]]), %[[fp:[^ ]*]]
+; X86:       movl %[[fp]], %eax
+; X86:       pushl %[[obj]]
+; X86:       calll __x86_indirect_thunk_eax
+; X86:       addl $4, %esp
+; X86:       movl %[[fp]], %eax
+; X86:       jmp __x86_indirect_thunk_eax # TAILCALL
+
+; X86FAST-LABEL: vcall:
+; X86FAST:       calll __x86_indirect_thunk_eax
+; X86FAST:       jmp __x86_indirect_thunk_eax # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:       jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+; X86-LABEL: direct_tail:
+; X86:       jmp direct_callee # TAILCALL
+; X86FAST-LABEL: direct_tail:
+; X86FAST:   jmp direct_callee # TAILCALL
+
+
+; Lastly check that no thunks were emitted.
+; X64-NOT: __{{.*}}_retpoline_{{.*}}:
+; X64FAST-NOT: __{{.*}}_retpoline_{{.*}}:
+; X86-NOT: __{{.*}}_retpoline_{{.*}}:
+; X86FAST-NOT: __{{.*}}_retpoline_{{.*}}:
+
+
+attributes #0 = { "target-features"="+retpoline-external-thunk" }
--- a/llvm/test/CodeGen/X86/retpoline-regparm.ll
+++ b/llvm/test/CodeGen/X86/retpoline-regparm.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
+
+; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
+; because there are no available scratch registers.  The Linux kernel builds
+; with -mregparm=3, so we need to support it.  TCO should fail because we need
+; to restore EDI.
+
+define void @call_edi(void (i32, i32, i32)* %fp) #0 {
+entry:
+  tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+  ret void
+}
+
+; CHECK-LABEL: call_edi:
+;     EDI is used, so it must be saved.
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __llvm_retpoline_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+define void @edi_external(void (i32, i32, i32)* %fp) #1 {
+entry:
+  tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
+  ret void
+}
+
+; CHECK-LABEL: edi_external:
+; CHECK: pushl %edi
+; CHECK-DAG: xorl %eax, %eax
+; CHECK-DAG: xorl %edx, %edx
+; CHECK-DAG: xorl %ecx, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: calll __x86_indirect_thunk_edi
+; CHECK: popl %edi
+; CHECK: retl
+
+attributes #0 = { "target-features"="+retpoline" }
+attributes #1 = { "target-features"="+retpoline-external-thunk" }
--- a/llvm/test/CodeGen/X86/retpoline.ll
+++ b/llvm/test/CodeGen/X86/retpoline.ll
@@ -0,0 +1,359 @@
+; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
+; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
+
+; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
+; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
+entry:
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  tail call void @bar(i32 %x)
+  tail call void %fp(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG:   movl %esi, %[[x:[^ ]*]]
+; X64:       movl %[[x]], %edi
+; X64:       callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       callq __llvm_retpoline_r11
+; X64:       movl %[[x]], %edi
+; X64:       callq bar
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       jmp __llvm_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST:       callq bar
+; X64FAST:       callq __llvm_retpoline_r11
+; X64FAST:       callq bar
+; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
+
+; X86-LABEL: icall_reg:
+; X86-DAG:   movl 12(%esp), %[[fp:[^ ]*]]
+; X86-DAG:   movl 16(%esp), %[[x:[^ ]*]]
+; X86:       pushl %[[x]]
+; X86:       calll bar
+; X86:       movl %[[fp]], %eax
+; X86:       pushl %[[x]]
+; X86:       calll __llvm_retpoline_eax
+; X86:       pushl %[[x]]
+; X86:       calll bar
+; X86:       movl %[[fp]], %eax
+; X86:       pushl %[[x]]
+; X86:       calll __llvm_retpoline_eax
+; X86-NOT:   # TAILCALL
+
+; X86FAST-LABEL: icall_reg:
+; X86FAST:       calll bar
+; X86FAST:       calll __llvm_retpoline_eax
+; X86FAST:       calll bar
+; X86FAST:       calll __llvm_retpoline_eax
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+  %fp1 = load void (i32)*, void (i32)** @global_fp
+  call void %fp1(i32 %x)
+  %fp2 = load void (i32)*, void (i32)** @global_fp
+  tail call void %fp2(i32 %x)
+  ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG:   movl %edi, %[[x:[^ ]*]]
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:       callq __llvm_retpoline_r11
+; X64-DAG:   movl %[[x]], %edi
+; X64-DAG:   movq global_fp(%rip), %r11
+; X64:       jmp __llvm_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST:       movq global_fp(%rip), %r11
+; X64FAST:       callq __llvm_retpoline_r11
+; X64FAST:       movq global_fp(%rip), %r11
+; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
+
+; X86-LABEL: icall_global_fp:
+; X86:       movl global_fp, %eax
+; X86:       pushl 4(%esp)
+; X86:       calll __llvm_retpoline_eax
+; X86:       addl $4, %esp
+; X86:       movl global_fp, %eax
+; X86:       jmp __llvm_retpoline_eax # TAILCALL
+
+; X86FAST-LABEL: icall_global_fp:
+; X86FAST:       calll __llvm_retpoline_eax
+; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+  %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+  %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+  %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+  %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+  tail call void %fp(%struct.Foo* %obj)
+  tail call void %fp(%struct.Foo* %obj)
+  ret void
+}
+
+; X64-LABEL: vcall:
+; X64:       movq %rdi, %[[obj:[^ ]*]]
+; X64:       movq (%[[obj]]), %[[vptr:[^ ]*]]
+; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64:       movq %[[fp]], %r11
+; X64:       callq __llvm_retpoline_r11
+; X64-DAG:   movq %[[obj]], %rdi
+; X64-DAG:   movq %[[fp]], %r11
+; X64:       jmp __llvm_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST:       callq __llvm_retpoline_r11
+; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
+
+; X86-LABEL: vcall:
+; X86:       movl 8(%esp), %[[obj:[^ ]*]]
+; X86:       movl (%[[obj]]), %[[vptr:[^ ]*]]
+; X86:       movl 4(%[[vptr]]), %[[fp:[^ ]*]]
+; X86:       movl %[[fp]], %eax
+; X86:       pushl %[[obj]]
+; X86:       calll __llvm_retpoline_eax
+; X86:       addl $4, %esp
+; X86:       movl %[[fp]], %eax
+; X86:       jmp __llvm_retpoline_eax # TAILCALL
+
+; X86FAST-LABEL: vcall:
+; X86FAST:       calll __llvm_retpoline_eax
+; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+  tail call void @direct_callee()
+  ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64:       jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST:   jmp direct_callee # TAILCALL
+; X86-LABEL: direct_tail:
+; X86:       jmp direct_callee # TAILCALL
+; X86FAST-LABEL: direct_tail:
+; X86FAST:   jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+  call void @nonlazybind_callee()
+  tail call void @nonlazybind_callee()
+  ret void
+}
+
+; nonlazybind wasn't implemented in LLVM 5.0, so this looks the same as direct.
+; X64-LABEL: nonlazybind_caller:
+; X64:       callq nonlazybind_callee
+; X64:       jmp nonlazybind_callee # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST:   callq nonlazybind_callee
+; X64FAST:   jmp nonlazybind_callee # TAILCALL
+; X86-LABEL: nonlazybind_caller:
+; X86:       calll nonlazybind_callee
+; X86:       jmp nonlazybind_callee # TAILCALL
+; X86FAST-LABEL: nonlazybind_caller:
+; X86FAST:   calll nonlazybind_callee
+; X86FAST:   jmp nonlazybind_callee # TAILCALL
+
+
+@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb1),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb2),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb3),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb4),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb5),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb6),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb7),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb8),
+                                                   i8* blockaddress(@indirectbr_rewrite, %bb9)]
+
+; Check that when retpolines are enabled a function with indirectbr gets
+; rewritten to use switch, and that in turn doesn't get lowered as a jump
+; table.
+define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {
+; X64-LABEL: indirectbr_rewrite:
+; X64-NOT:     jmpq
+; X86-LABEL: indirectbr_rewrite:
+; X86-NOT:     jmpl
+entry:
+  %i0 = load i64, i64* %p
+  %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
+  %target0 = load i8*, i8** %target.i0
+  indirectbr i8* %target0, [label %bb1, label %bb3]
+
+bb0:
+  store volatile i64 0, i64* %sink
+  br label %latch
+
+bb1:
+  store volatile i64 1, i64* %sink
+  br label %latch
+
+bb2:
+  store volatile i64 2, i64* %sink
+  br label %latch
+
+bb3:
+  store volatile i64 3, i64* %sink
+  br label %latch
+
+bb4:
+  store volatile i64 4, i64* %sink
+  br label %latch
+
+bb5:
+  store volatile i64 5, i64* %sink
+  br label %latch
+
+bb6:
+  store volatile i64 6, i64* %sink
+  br label %latch
+
+bb7:
+  store volatile i64 7, i64* %sink
+  br label %latch
+
+bb8:
+  store volatile i64 8, i64* %sink
+  br label %latch
+
+bb9:
+  store volatile i64 9, i64* %sink
+  br label %latch
+
+latch:
+  %i.next = load i64, i64* %p
+  %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
+  %target.next = load i8*, i8** %target.i.next
+  ; Potentially hit a full 10 successors here so that even if we rewrite as
+  ; a switch it will try to be lowered with a jump table.
+  indirectbr i8* %target.next, [label %bb0,
+                                label %bb1,
+                                label %bb2,
+                                label %bb3,
+                                label %bb4,
+                                label %bb5,
+                                label %bb6,
+                                label %bb7,
+                                label %bb8,
+                                label %bb9]
+}
+
+; Lastly check that the necessary thunks were emitted.
+;
+; X64-LABEL:         .section        .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
+; X64-NEXT:          .hidden __llvm_retpoline_r11
+; X64-NEXT:          .weak   __llvm_retpoline_r11
+; X64:       __llvm_retpoline_r11:
+; X64-NEXT:  # {{.*}}                                # %entry
+; X64-NEXT:          callq   [[CALL_TARGET:.*]]
+; X64-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
+; X64-NEXT:                                          # %entry
+; X64-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; X64-NEXT:          pause
+; X64-NEXT:          lfence
+; X64-NEXT:          jmp     [[CAPTURE_SPEC]]
+; X64-NEXT:          .p2align        4, 0x90
+; X64-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X64-NEXT:                                          # %entry
+; X64-NEXT:          movq    %r11, (%rsp)
+; X64-NEXT:          retq
+;
+; X86-LABEL:         .section        .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
+; X86-NEXT:          .hidden __llvm_retpoline_eax
+; X86-NEXT:          .weak   __llvm_retpoline_eax
+; X86:       __llvm_retpoline_eax:
+; X86-NEXT:  # {{.*}}                                # %entry
+; X86-NEXT:          calll   [[CALL_TARGET:.*]]
+; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; X86-NEXT:          pause
+; X86-NEXT:          lfence
+; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
+; X86-NEXT:          .p2align        4, 0x90
+; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:          movl    %eax, (%esp)
+; X86-NEXT:          retl
+;
+; X86-LABEL:         .section        .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
+; X86-NEXT:          .hidden __llvm_retpoline_ecx
+; X86-NEXT:          .weak   __llvm_retpoline_ecx
+; X86:       __llvm_retpoline_ecx:
+; X86-NEXT:  # {{.*}}                                # %entry
+; X86-NEXT:          calll   [[CALL_TARGET:.*]]
+; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; X86-NEXT:          pause
+; X86-NEXT:          lfence
+; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
+; X86-NEXT:          .p2align        4, 0x90
+; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:          movl    %ecx, (%esp)
+; X86-NEXT:          retl
+;
+; X86-LABEL:         .section        .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
+; X86-NEXT:          .hidden __llvm_retpoline_edx
+; X86-NEXT:          .weak   __llvm_retpoline_edx
+; X86:       __llvm_retpoline_edx:
+; X86-NEXT:  # {{.*}}                                # %entry
+; X86-NEXT:          calll   [[CALL_TARGET:.*]]
+; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; X86-NEXT:          pause
+; X86-NEXT:          lfence
+; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
+; X86-NEXT:          .p2align        4, 0x90
+; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:          movl    %edx, (%esp)
+; X86-NEXT:          retl
+;
+; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
+; X86-NEXT:          .hidden __llvm_retpoline_edi
+; X86-NEXT:          .weak   __llvm_retpoline_edi
+; X86:       __llvm_retpoline_edi:
+; X86-NEXT:  # {{.*}}                                # %entry
+; X86-NEXT:          calll   [[CALL_TARGET:.*]]
+; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; X86-NEXT:          pause
+; X86-NEXT:          lfence
+; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
+; X86-NEXT:          .p2align        4, 0x90
+; X86-NEXT:  [[CALL_TARGET]]:                        # Block address taken
+; X86-NEXT:                                          # %entry
+; X86-NEXT:          movl    %edi, (%esp)
+; X86-NEXT:          retl
+
+
+attributes #0 = { "target-features"="+retpoline" }
+attributes #1 = { nonlazybind }
--- a/llvm/test/Transforms/IndirectBrExpand/basic.ll
+++ b/llvm/test/Transforms/IndirectBrExpand/basic.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -indirectbr-expand -S | FileCheck %s
+;
+; REQUIRES: x86-registered-target
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@test1.targets = constant [4 x i8*] [i8* blockaddress(@test1, %bb0),
+                                     i8* blockaddress(@test1, %bb1),
+                                     i8* blockaddress(@test1, %bb2),
+                                     i8* blockaddress(@test1, %bb3)]
+; CHECK-LABEL: @test1.targets = constant [4 x i8*]
+; CHECK:       [i8* inttoptr (i64 1 to i8*),
+; CHECK:        i8* inttoptr (i64 2 to i8*),
+; CHECK:        i8* inttoptr (i64 3 to i8*),
+; CHECK:        i8* blockaddress(@test1, %bb3)]
+
+define void @test1(i64* readonly %p, i64* %sink) #0 {
+; CHECK-LABEL: define void @test1(
+entry:
+  %i0 = load i64, i64* %p
+  %target.i0 = getelementptr [4 x i8*], [4 x i8*]* @test1.targets, i64 0, i64 %i0
+  %target0 = load i8*, i8** %target.i0
+  ; Only a subset of blocks are viable successors here.
+  indirectbr i8* %target0, [label %bb0, label %bb1]
+; CHECK-NOT:     indirectbr
+; CHECK:         %[[ENTRY_V:.*]] = ptrtoint i8* %{{.*}} to i64
+; CHECK-NEXT:    br label %[[SWITCH_BB:.*]]
+
+bb0:
+  store volatile i64 0, i64* %sink
+  br label %latch
+
+bb1:
+  store volatile i64 1, i64* %sink
+  br label %latch
+
+bb2:
+  store volatile i64 2, i64* %sink
+  br label %latch
+
+bb3:
+  store volatile i64 3, i64* %sink
+  br label %latch
+
+latch:
+  %i.next = load i64, i64* %p
+  %target.i.next = getelementptr [4 x i8*], [4 x i8*]* @test1.targets, i64 0, i64 %i.next
+  %target.next = load i8*, i8** %target.i.next
+  ; A different subset of blocks are viable successors here.
+  indirectbr i8* %target.next, [label %bb1, label %bb2]
+; CHECK-NOT:     indirectbr
+; CHECK:         %[[LATCH_V:.*]] = ptrtoint i8* %{{.*}} to i64
+; CHECK-NEXT:    br label %[[SWITCH_BB]]
+;
+; CHECK:       [[SWITCH_BB]]:
+; CHECK-NEXT:    %[[V:.*]] = phi i64 [ %[[ENTRY_V]], %entry ], [ %[[LATCH_V]], %latch ]
+; CHECK-NEXT:    switch i64 %[[V]], label %bb0 [
+; CHECK-NEXT:      i64 2, label %bb1
+; CHECK-NEXT:      i64 3, label %bb2
+; CHECK-NEXT:    ]
+}
+
+attributes #0 = { "target-features"="+retpoline" }
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -401,6 +401,7 @@ int main(int argc, char **argv) {
  initializeSjLjEHPreparePass(Registry);
  initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
  initializeGlobalMergePass(Registry);
+  initializeIndirectBrExpandPassPass(Registry);
  initializeInterleavedAccessPass(Registry);
  initializeCountingFunctionInserterPass(Registry);
  initializeUnreachableBlockElimLegacyPassPass(Registry);