Line data Source code
1 : //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// SI implementation of the TargetRegisterInfo class.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "SIRegisterInfo.h"
16 : #include "AMDGPURegisterBankInfo.h"
17 : #include "AMDGPUSubtarget.h"
18 : #include "SIInstrInfo.h"
19 : #include "SIMachineFunctionInfo.h"
20 : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 : #include "llvm/CodeGen/MachineFrameInfo.h"
22 : #include "llvm/CodeGen/MachineInstrBuilder.h"
23 : #include "llvm/CodeGen/RegisterScavenging.h"
24 : #include "llvm/IR/Function.h"
25 : #include "llvm/IR/LLVMContext.h"
26 :
27 : using namespace llvm;
28 :
29 : static bool hasPressureSet(const int *PSets, unsigned PSetID) {
30 59808 : for (unsigned i = 0; PSets[i] != -1; ++i) {
31 47348 : if (PSets[i] == (int)PSetID)
32 : return true;
33 : }
34 : return false;
35 : }
36 :
37 24920 : void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
38 : BitVector &PressureSets) const {
39 37380 : for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
40 24920 : const int *PSets = getRegUnitPressureSets(*U);
41 24920 : if (hasPressureSet(PSets, PSetID)) {
42 : PressureSets.set(PSetID);
43 : break;
44 : }
45 : }
46 24920 : }
47 :
48 : static cl::opt<bool> EnableSpillSGPRToSMEM(
49 : "amdgpu-spill-sgpr-to-smem",
50 : cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
51 : cl::init(false));
52 :
53 : static cl::opt<bool> EnableSpillSGPRToVGPR(
54 : "amdgpu-spill-sgpr-to-vgpr",
55 : cl::desc("Enable spilling VGPRs to SGPRs"),
56 : cl::ReallyHidden,
57 : cl::init(true));
58 :
59 2492 : SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
60 : AMDGPURegisterInfo(),
61 : SGPRPressureSets(getNumRegPressureSets()),
62 : VGPRPressureSets(getNumRegPressureSets()),
63 : SpillSGPRToVGPR(false),
64 2492 : SpillSGPRToSMEM(false) {
65 2492 : if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
66 5 : SpillSGPRToSMEM = true;
67 2487 : else if (EnableSpillSGPRToVGPR)
68 2483 : SpillSGPRToVGPR = true;
69 :
70 2492 : unsigned NumRegPressureSets = getNumRegPressureSets();
71 :
72 2492 : SGPRSetID = NumRegPressureSets;
73 2492 : VGPRSetID = NumRegPressureSets;
74 :
75 14952 : for (unsigned i = 0; i < NumRegPressureSets; ++i) {
76 12460 : classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
77 12460 : classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
78 : }
79 :
80 : // Determine the number of reg units for each pressure set.
81 2492 : std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
82 1084020 : for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
83 1081528 : const int *PSets = getRegUnitPressureSets(i);
84 2945544 : for (unsigned j = 0; PSets[j] != -1; ++j) {
85 3728032 : ++PressureSetRegUnits[PSets[j]];
86 : }
87 : }
88 :
89 : unsigned VGPRMax = 0, SGPRMax = 0;
90 14952 : for (unsigned i = 0; i < NumRegPressureSets; ++i) {
91 4984 : if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
92 2492 : VGPRSetID = i;
93 2492 : VGPRMax = PressureSetRegUnits[i];
94 2492 : continue;
95 : }
96 19936 : if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
97 9968 : SGPRSetID = i;
98 9968 : SGPRMax = PressureSetRegUnits[i];
99 : }
100 : }
101 :
102 : assert(SGPRSetID < NumRegPressureSets &&
103 : VGPRSetID < NumRegPressureSets);
104 2492 : }
105 :
106 18275 : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
107 : const MachineFunction &MF) const {
108 :
109 18275 : const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110 18275 : unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
111 18275 : unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
112 18275 : return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
113 : }
114 :
115 : static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
116 : unsigned Reg;
117 :
118 : // Try to place it in a hole after PrivateSegmentBufferReg.
119 18230 : if (RegCount & 3) {
120 : // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121 : // alignment constraints, so we have a hole where can put the wave offset.
122 18044 : Reg = RegCount - 1;
123 : } else {
124 : // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125 : // wave offset before it.
126 186 : Reg = RegCount - 5;
127 : }
128 :
129 : return Reg;
130 : }
131 :
132 18230 : unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
133 : const MachineFunction &MF) const {
134 18230 : const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
135 18230 : unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
136 36460 : return AMDGPU::SGPR_32RegClass.getRegister(Reg);
137 : }
138 :
139 2193 : unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
140 : const MachineFunction &MF) const {
141 2193 : return AMDGPU::SGPR32;
142 : }
143 :
144 41111 : BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
145 41111 : BitVector Reserved(getNumRegs());
146 :
147 : // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148 : // this seems likely to result in bugs, so I'm marking them as reserved.
149 41111 : reserveRegisterTuples(Reserved, AMDGPU::EXEC);
150 41111 : reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
151 :
152 : // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153 41111 : reserveRegisterTuples(Reserved, AMDGPU::M0);
154 :
155 : // Reserve the memory aperture registers.
156 41111 : reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
157 41111 : reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
158 41111 : reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
159 41111 : reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
160 :
161 : // Reserve xnack_mask registers - support is not implemented in Codegen.
162 41111 : reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
163 :
164 : // Reserve Trap Handler registers - support is not implemented in Codegen.
165 41111 : reserveRegisterTuples(Reserved, AMDGPU::TBA);
166 41111 : reserveRegisterTuples(Reserved, AMDGPU::TMA);
167 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
168 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
169 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
170 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
171 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
172 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
173 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
174 41111 : reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
175 :
176 41111 : const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
177 :
178 41111 : unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
179 41111 : unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
180 201677 : for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
181 160566 : unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
182 160566 : reserveRegisterTuples(Reserved, Reg);
183 : }
184 :
185 41111 : unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
186 41111 : unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
187 47975 : for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
188 6864 : unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
189 6864 : reserveRegisterTuples(Reserved, Reg);
190 : }
191 :
192 : const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
193 :
194 41111 : unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
195 41111 : if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
196 : // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197 41111 : reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
198 : }
199 :
200 41111 : unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
201 41111 : if (ScratchRSrcReg != AMDGPU::NoRegister) {
202 : // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
203 : // to spill.
204 : // TODO: May need to reserve a VGPR if doing LDS spilling.
205 41111 : reserveRegisterTuples(Reserved, ScratchRSrcReg);
206 : assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
207 : }
208 :
209 : // We have to assume the SP is needed in case there are calls in the function,
210 : // which is detected after the function is lowered. If we aren't really going
211 : // to need SP, don't bother reserving it.
212 41111 : unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
213 :
214 41111 : if (StackPtrReg != AMDGPU::NoRegister) {
215 41111 : reserveRegisterTuples(Reserved, StackPtrReg);
216 : assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
217 : }
218 :
219 41111 : unsigned FrameReg = MFI->getFrameOffsetReg();
220 41111 : if (FrameReg != AMDGPU::NoRegister) {
221 41111 : reserveRegisterTuples(Reserved, FrameReg);
222 : assert(!isSubRegister(ScratchRSrcReg, FrameReg));
223 : }
224 :
225 41111 : return Reserved;
226 : }
227 :
228 39456 : bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
229 : const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
230 39456 : if (Info->isEntryFunction()) {
231 35922 : const MachineFrameInfo &MFI = Fn.getFrameInfo();
232 35922 : return MFI.hasStackObjects() || MFI.hasCalls();
233 : }
234 :
235 : // May need scavenger for dealing with callee saved registers.
236 : return true;
237 : }
238 :
239 19728 : bool SIRegisterInfo::requiresFrameIndexScavenging(
240 : const MachineFunction &MF) const {
241 19728 : const MachineFrameInfo &MFI = MF.getFrameInfo();
242 19728 : if (MFI.hasStackObjects())
243 : return true;
244 :
245 : // May need to deal with callee saved registers.
246 : const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
247 19021 : return !Info->isEntryFunction();
248 : }
249 :
250 19331 : bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
251 : const MachineFunction &MF) const {
252 : // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253 : // create a virtual register for it during frame index elimination, so the
254 : // scavenger is directly needed.
255 19331 : return MF.getFrameInfo().hasStackObjects() &&
256 19331 : MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
257 401 : MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
258 : }
259 :
260 19715 : bool SIRegisterInfo::requiresVirtualBaseRegisters(
261 : const MachineFunction &) const {
262 : // There are no special dedicated stack or frame pointers.
263 19715 : return true;
264 : }
265 :
266 39494 : bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
267 : // This helps catch bugs as verifier errors.
268 39494 : return true;
269 : }
270 :
271 4400 : int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
272 : assert(SIInstrInfo::isMUBUF(*MI));
273 :
274 8800 : int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
275 : AMDGPU::OpName::offset);
276 8800 : return MI->getOperand(OffIdx).getImm();
277 : }
278 :
279 4 : int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
280 : int Idx) const {
281 4 : if (!SIInstrInfo::isMUBUF(*MI))
282 : return 0;
283 :
284 : assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
285 : AMDGPU::OpName::vaddr) &&
286 : "Should never see frame index on non-address operand");
287 :
288 4 : return getMUBUFInstrOffset(MI);
289 : }
290 :
291 4739 : bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
292 4739 : if (!MI->mayLoadOrStore())
293 : return false;
294 :
295 4396 : int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
296 :
297 4396 : return !isUInt<12>(FullOffset);
298 : }
299 :
300 0 : void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
301 : unsigned BaseReg,
302 : int FrameIdx,
303 : int64_t Offset) const {
304 : MachineBasicBlock::iterator Ins = MBB->begin();
305 0 : DebugLoc DL; // Defaults to "unknown"
306 :
307 0 : if (Ins != MBB->end())
308 : DL = Ins->getDebugLoc();
309 :
310 0 : MachineFunction *MF = MBB->getParent();
311 0 : const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
312 0 : const SIInstrInfo *TII = Subtarget.getInstrInfo();
313 :
314 0 : if (Offset == 0) {
315 0 : BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
316 : .addFrameIndex(FrameIdx);
317 : return;
318 : }
319 :
320 0 : MachineRegisterInfo &MRI = MF->getRegInfo();
321 0 : unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
322 :
323 0 : unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
324 :
325 0 : BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
326 : .addImm(Offset);
327 0 : BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
328 : .addFrameIndex(FrameIdx);
329 :
330 0 : TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
331 0 : .addReg(OffsetReg, RegState::Kill)
332 0 : .addReg(FIReg);
333 : }
334 :
335 0 : void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
336 : int64_t Offset) const {
337 :
338 0 : MachineBasicBlock *MBB = MI.getParent();
339 0 : MachineFunction *MF = MBB->getParent();
340 0 : const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
341 0 : const SIInstrInfo *TII = Subtarget.getInstrInfo();
342 :
343 : #ifndef NDEBUG
344 : // FIXME: Is it possible to be storing a frame index to itself?
345 : bool SeenFI = false;
346 : for (const MachineOperand &MO: MI.operands()) {
347 : if (MO.isFI()) {
348 : if (SeenFI)
349 : llvm_unreachable("should not see multiple frame indices");
350 :
351 : SeenFI = true;
352 : }
353 : }
354 : #endif
355 :
356 0 : MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
357 : assert(FIOp && FIOp->isFI() && "frame index must be address operand");
358 : assert(TII->isMUBUF(MI));
359 : assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
360 : MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
361 : "should only be seeing frame offset relative FrameIndex");
362 :
363 :
364 0 : MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
365 0 : int64_t NewOffset = OffsetOp->getImm() + Offset;
366 : assert(isUInt<12>(NewOffset) && "offset should be legal");
367 :
368 0 : FIOp->ChangeToRegister(BaseReg, false);
369 : OffsetOp->setImm(NewOffset);
370 0 : }
371 :
372 0 : bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
373 : unsigned BaseReg,
374 : int64_t Offset) const {
375 0 : if (!SIInstrInfo::isMUBUF(*MI))
376 : return false;
377 :
378 0 : int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
379 :
380 0 : return isUInt<12>(NewOffset);
381 : }
382 :
383 0 : const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
384 : const MachineFunction &MF, unsigned Kind) const {
385 : // This is inaccurate. It depends on the instruction and address space. The
386 : // only place where we should hit this is for dealing with frame indexes /
387 : // private accesses, so this is correct in that case.
388 0 : return &AMDGPU::VGPR_32RegClass;
389 : }
390 :
391 : static unsigned getNumSubRegsForSpillOp(unsigned Op) {
392 :
393 : switch (Op) {
394 : case AMDGPU::SI_SPILL_S512_SAVE:
395 : case AMDGPU::SI_SPILL_S512_RESTORE:
396 : case AMDGPU::SI_SPILL_V512_SAVE:
397 : case AMDGPU::SI_SPILL_V512_RESTORE:
398 : return 16;
399 : case AMDGPU::SI_SPILL_S256_SAVE:
400 : case AMDGPU::SI_SPILL_S256_RESTORE:
401 : case AMDGPU::SI_SPILL_V256_SAVE:
402 : case AMDGPU::SI_SPILL_V256_RESTORE:
403 : return 8;
404 : case AMDGPU::SI_SPILL_S128_SAVE:
405 : case AMDGPU::SI_SPILL_S128_RESTORE:
406 : case AMDGPU::SI_SPILL_V128_SAVE:
407 : case AMDGPU::SI_SPILL_V128_RESTORE:
408 : return 4;
409 : case AMDGPU::SI_SPILL_V96_SAVE:
410 : case AMDGPU::SI_SPILL_V96_RESTORE:
411 : return 3;
412 : case AMDGPU::SI_SPILL_S64_SAVE:
413 : case AMDGPU::SI_SPILL_S64_RESTORE:
414 : case AMDGPU::SI_SPILL_V64_SAVE:
415 : case AMDGPU::SI_SPILL_V64_RESTORE:
416 : return 2;
417 : case AMDGPU::SI_SPILL_S32_SAVE:
418 : case AMDGPU::SI_SPILL_S32_RESTORE:
419 : case AMDGPU::SI_SPILL_V32_SAVE:
420 : case AMDGPU::SI_SPILL_V32_RESTORE:
421 : return 1;
422 0 : default: llvm_unreachable("Invalid spill opcode");
423 : }
424 : }
425 :
426 : static int getOffsetMUBUFStore(unsigned Opc) {
427 0 : switch (Opc) {
428 : case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
429 : return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
430 : case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
431 : return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
432 : case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
433 : return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
434 : case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
435 : return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
436 : case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
437 : return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
438 : case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
439 : return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
440 : case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
441 : return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
442 : default:
443 : return -1;
444 : }
445 : }
446 :
447 1798 : static int getOffsetMUBUFLoad(unsigned Opc) {
448 1798 : switch (Opc) {
449 : case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
450 : return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
451 74 : case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
452 74 : return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
453 8 : case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
454 8 : return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
455 24 : case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
456 24 : return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
457 2 : case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
458 2 : return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
459 2 : case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
460 2 : return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
461 16 : case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
462 16 : return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
463 2 : case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
464 2 : return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
465 2 : case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
466 2 : return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
467 2 : case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
468 2 : return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
469 2 : case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
470 2 : return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
471 3 : case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
472 3 : return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
473 4 : case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
474 4 : return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
475 0 : default:
476 0 : return -1;
477 : }
478 : }
479 :
480 : // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 : // need to handle the case where an SGPR may need to be spilled while spilling.
482 0 : static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
483 : MachineFrameInfo &MFI,
484 : MachineBasicBlock::iterator MI,
485 : int Index,
486 : int64_t Offset) {
487 0 : MachineBasicBlock *MBB = MI->getParent();
488 : const DebugLoc &DL = MI->getDebugLoc();
489 0 : bool IsStore = MI->mayStore();
490 :
491 0 : unsigned Opc = MI->getOpcode();
492 0 : int LoadStoreOp = IsStore ?
493 0 : getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
494 0 : if (LoadStoreOp == -1)
495 0 : return false;
496 :
497 0 : const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
498 : MachineInstrBuilder NewMI =
499 0 : BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
500 : .add(*Reg)
501 0 : .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
502 0 : .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
503 : .addImm(Offset)
504 : .addImm(0) // glc
505 : .addImm(0) // slc
506 : .addImm(0) // tfe
507 : .cloneMemRefs(*MI);
508 :
509 0 : const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
510 : AMDGPU::OpName::vdata_in);
511 0 : if (VDataIn)
512 : NewMI.add(*VDataIn);
513 : return true;
514 : }
515 :
516 2543 : void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
517 : unsigned LoadStoreOp,
518 : int Index,
519 : unsigned ValueReg,
520 : bool IsKill,
521 : unsigned ScratchRsrcReg,
522 : unsigned ScratchOffsetReg,
523 : int64_t InstOffset,
524 : MachineMemOperand *MMO,
525 : RegScavenger *RS) const {
526 2543 : MachineBasicBlock *MBB = MI->getParent();
527 2543 : MachineFunction *MF = MI->getParent()->getParent();
528 2543 : const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
529 2543 : const SIInstrInfo *TII = ST.getInstrInfo();
530 2543 : const MachineFrameInfo &MFI = MF->getFrameInfo();
531 :
532 2543 : const MCInstrDesc &Desc = TII->get(LoadStoreOp);
533 : const DebugLoc &DL = MI->getDebugLoc();
534 2543 : bool IsStore = Desc.mayStore();
535 :
536 : bool Scavenged = false;
537 : unsigned SOffset = ScratchOffsetReg;
538 :
539 : const unsigned EltSize = 4;
540 2543 : const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
541 5086 : unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
542 2543 : unsigned Size = NumSubRegs * EltSize;
543 2543 : int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
544 : int64_t ScratchOffsetRegDelta = 0;
545 :
546 : unsigned Align = MFI.getObjectAlignment(Index);
547 : const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
548 :
549 : assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
550 :
551 2543 : if (!isUInt<12>(Offset + Size - EltSize)) {
552 : SOffset = AMDGPU::NoRegister;
553 :
554 : // We currently only support spilling VGPRs to EltSize boundaries, meaning
555 : // we can simplify the adjustment of Offset here to just scale with
556 : // WavefrontSize.
557 240 : Offset *= ST.getWavefrontSize();
558 :
559 : // We don't have access to the register scavenger if this function is called
560 : // during PEI::scavengeFrameVirtualRegs().
561 240 : if (RS)
562 0 : SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
563 :
564 0 : if (SOffset == AMDGPU::NoRegister) {
565 : // There are no free SGPRs, and since we are in the process of spilling
566 : // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
567 : // on SI/CI and on VI it is true until we implement spilling using scalar
568 : // stores), we have no way to free up an SGPR. Our solution here is to
569 : // add the offset directly to the ScratchOffset register, and then
570 : // subtract the offset after the spill to return ScratchOffset to it's
571 : // original value.
572 : SOffset = ScratchOffsetReg;
573 : ScratchOffsetRegDelta = Offset;
574 : } else {
575 : Scavenged = true;
576 : }
577 :
578 720 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
579 240 : .addReg(ScratchOffsetReg)
580 : .addImm(Offset);
581 :
582 : Offset = 0;
583 : }
584 :
585 9119 : for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
586 6576 : unsigned SubReg = NumSubRegs == 1 ?
587 5388 : ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
588 :
589 : unsigned SOffsetRegState = 0;
590 : unsigned SrcDstRegState = getDefRegState(!IsStore);
591 6576 : if (i + 1 == e) {
592 : SOffsetRegState |= getKillRegState(Scavenged);
593 : // The last implicit use carries the "Kill" flag.
594 2543 : SrcDstRegState |= getKillRegState(IsKill);
595 : }
596 :
597 6576 : MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
598 : MachineMemOperand *NewMMO
599 6576 : = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
600 : EltSize, MinAlign(Align, EltSize * i));
601 :
602 6576 : auto MIB = BuildMI(*MBB, MI, DL, Desc)
603 6576 : .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
604 6576 : .addReg(ScratchRsrcReg)
605 6576 : .addReg(SOffset, SOffsetRegState)
606 : .addImm(Offset)
607 : .addImm(0) // glc
608 : .addImm(0) // slc
609 : .addImm(0) // tfe
610 6576 : .addMemOperand(NewMMO);
611 :
612 6576 : if (NumSubRegs > 1)
613 5388 : MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
614 : }
615 :
616 2543 : if (ScratchOffsetRegDelta != 0) {
617 : // Subtract the offset we added to the ScratchOffset register.
618 720 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
619 240 : .addReg(ScratchOffsetReg)
620 : .addImm(ScratchOffsetRegDelta);
621 : }
622 2543 : }
623 :
624 : static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
625 : bool Store) {
626 28 : if (SuperRegSize % 16 == 0) {
627 : return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
628 : AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
629 : }
630 :
631 22 : if (SuperRegSize % 8 == 0) {
632 : return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
633 : AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
634 : }
635 :
636 : return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
637 : AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
638 : }
639 :
640 690 : bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
641 : int Index,
642 : RegScavenger *RS,
643 : bool OnlyToVGPR) const {
644 690 : MachineBasicBlock *MBB = MI->getParent();
645 690 : MachineFunction *MF = MBB->getParent();
646 690 : SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
647 : DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
648 :
649 : ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
650 690 : = MFI->getSGPRToVGPRSpills(Index);
651 690 : bool SpillToVGPR = !VGPRSpills.empty();
652 690 : if (OnlyToVGPR && !SpillToVGPR)
653 : return false;
654 :
655 690 : MachineRegisterInfo &MRI = MF->getRegInfo();
656 690 : const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
657 690 : const SIInstrInfo *TII = ST.getInstrInfo();
658 :
659 690 : unsigned SuperReg = MI->getOperand(0).getReg();
660 : bool IsKill = MI->getOperand(0).isKill();
661 : const DebugLoc &DL = MI->getDebugLoc();
662 :
663 690 : MachineFrameInfo &FrameInfo = MF->getFrameInfo();
664 :
665 690 : bool SpillToSMEM = spillSGPRToSMEM();
666 690 : if (SpillToSMEM && OnlyToVGPR)
667 : return false;
668 :
669 : assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
670 : SuperReg != MFI->getFrameOffsetReg() &&
671 : SuperReg != MFI->getScratchWaveOffsetReg()));
672 :
673 : assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
674 :
675 : unsigned OffsetReg = AMDGPU::M0;
676 : unsigned M0CopyReg = AMDGPU::NoRegister;
677 :
678 690 : if (SpillToSMEM) {
679 14 : if (RS->isRegUsed(AMDGPU::M0)) {
680 14 : M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
681 42 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
682 14 : .addReg(AMDGPU::M0);
683 : }
684 : }
685 :
686 : unsigned ScalarStoreOp;
687 690 : unsigned EltSize = 4;
688 690 : const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
689 704 : if (SpillToSMEM && isSGPRClass(RC)) {
690 : // XXX - if private_element_size is larger than 4 it might be useful to be
691 : // able to spill wider vmem spills.
692 : std::tie(EltSize, ScalarStoreOp) =
693 14 : getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
694 : }
695 :
696 690 : ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
697 690 : unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
698 :
699 : // SubReg carries the "Kill" flag when SubReg == SuperReg.
700 690 : unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
701 1981 : for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
702 1291 : unsigned SubReg = NumSubRegs == 1 ?
703 1580 : SuperReg : getSubReg(SuperReg, SplitParts[i]);
704 :
705 1291 : if (SpillToSMEM) {
706 : int64_t FrOffset = FrameInfo.getObjectOffset(Index);
707 :
708 : // The allocated memory size is really the wavefront size * the frame
709 : // index size. The widest register class is 64 bytes, so a 4-byte scratch
710 : // allocation is enough to spill this in a single stack object.
711 : //
712 : // FIXME: Frame size/offsets are computed earlier than this, so the extra
713 : // space is still unnecessarily allocated.
714 :
715 : unsigned Align = FrameInfo.getObjectAlignment(Index);
716 : MachinePointerInfo PtrInfo
717 15 : = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
718 : MachineMemOperand *MMO
719 15 : = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
720 : EltSize, MinAlign(Align, EltSize * i));
721 :
722 : // SMEM instructions only support a single offset, so increment the wave
723 : // offset.
724 :
725 15 : int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
726 15 : if (Offset != 0) {
727 45 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
728 15 : .addReg(MFI->getFrameOffsetReg())
729 : .addImm(Offset);
730 : } else {
731 0 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
732 0 : .addReg(MFI->getFrameOffsetReg());
733 : }
734 :
735 45 : BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
736 15 : .addReg(SubReg, getKillRegState(IsKill)) // sdata
737 15 : .addReg(MFI->getScratchRSrcReg()) // sbase
738 15 : .addReg(OffsetReg, RegState::Kill) // soff
739 : .addImm(0) // glc
740 : .addMemOperand(MMO);
741 :
742 : continue;
743 : }
744 :
745 1276 : if (SpillToVGPR) {
746 2388 : SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
747 :
748 : // During SGPR spilling to VGPR, determine if the VGPR is defined. The
749 : // only circumstance in which we say it is undefined is when it is the
750 : // first spill to this VGPR in the first basic block.
751 : bool VGPRDefined = true;
752 1194 : if (MBB == &MF->front())
753 1146 : VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
754 :
755 : // Mark the "old value of vgpr" input undef only if this is the first sgpr
756 : // spill to this specific vgpr in the first basic block.
757 1194 : BuildMI(*MBB, MI, DL,
758 : TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
759 1194 : Spill.VGPR)
760 1194 : .addReg(SubReg, getKillRegState(IsKill))
761 1194 : .addImm(Spill.Lane)
762 1813 : .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
763 :
764 : // FIXME: Since this spills to another register instead of an actual
765 : // frame index, we should delete the frame index when all references to
766 : // it are fixed.
767 : } else {
768 : // XXX - Can to VGPR spill fail for some subregisters but not others?
769 82 : if (OnlyToVGPR)
770 0 : return false;
771 :
772 : // Spill SGPR to a frame index.
773 : // TODO: Should VI try to spill to VGPR and then spill to SMEM?
774 82 : unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
775 : // TODO: Should VI try to spill to VGPR and then spill to SMEM?
776 :
777 : MachineInstrBuilder Mov
778 164 : = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
779 82 : .addReg(SubReg, SubKillState);
780 :
781 :
782 : // There could be undef components of a spilled super register.
783 : // TODO: Can we detect this and skip the spill?
784 82 : if (NumSubRegs > 1) {
785 : // The last implicit use of the SuperReg carries the "Kill" flag.
786 : unsigned SuperKillState = 0;
787 76 : if (i + 1 == e)
788 : SuperKillState |= getKillRegState(IsKill);
789 76 : Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
790 : }
791 :
792 : unsigned Align = FrameInfo.getObjectAlignment(Index);
793 : MachinePointerInfo PtrInfo
794 82 : = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
795 : MachineMemOperand *MMO
796 82 : = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
797 : EltSize, MinAlign(Align, EltSize * i));
798 246 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
799 82 : .addReg(TmpReg, RegState::Kill) // src
800 : .addFrameIndex(Index) // vaddr
801 82 : .addReg(MFI->getScratchRSrcReg()) // srrsrc
802 82 : .addReg(MFI->getFrameOffsetReg()) // soffset
803 82 : .addImm(i * 4) // offset
804 : .addMemOperand(MMO);
805 : }
806 : }
807 :
808 690 : if (M0CopyReg != AMDGPU::NoRegister) {
809 42 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
810 14 : .addReg(M0CopyReg, RegState::Kill);
811 : }
812 :
813 690 : MI->eraseFromParent();
814 : MFI->addToSpilledSGPRs(NumSubRegs);
815 690 : return true;
816 : }
817 :
818 680 : bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
819 : int Index,
820 : RegScavenger *RS,
821 : bool OnlyToVGPR) const {
822 680 : MachineFunction *MF = MI->getParent()->getParent();
823 680 : MachineRegisterInfo &MRI = MF->getRegInfo();
824 : MachineBasicBlock *MBB = MI->getParent();
825 680 : SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
826 :
827 : ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
828 680 : = MFI->getSGPRToVGPRSpills(Index);
829 680 : bool SpillToVGPR = !VGPRSpills.empty();
830 680 : if (OnlyToVGPR && !SpillToVGPR)
831 : return false;
832 :
833 680 : MachineFrameInfo &FrameInfo = MF->getFrameInfo();
834 680 : const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
835 680 : const SIInstrInfo *TII = ST.getInstrInfo();
836 : const DebugLoc &DL = MI->getDebugLoc();
837 :
838 680 : unsigned SuperReg = MI->getOperand(0).getReg();
839 680 : bool SpillToSMEM = spillSGPRToSMEM();
840 680 : if (SpillToSMEM && OnlyToVGPR)
841 : return false;
842 :
843 : assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
844 :
845 : unsigned OffsetReg = AMDGPU::M0;
846 : unsigned M0CopyReg = AMDGPU::NoRegister;
847 :
848 680 : if (SpillToSMEM) {
849 14 : if (RS->isRegUsed(AMDGPU::M0)) {
850 14 : M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
851 42 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
852 14 : .addReg(AMDGPU::M0);
853 : }
854 : }
855 :
856 680 : unsigned EltSize = 4;
857 : unsigned ScalarLoadOp;
858 :
859 680 : const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
860 694 : if (SpillToSMEM && isSGPRClass(RC)) {
861 : // XXX - if private_element_size is larger than 4 it might be useful to be
862 : // able to spill wider vmem spills.
863 : std::tie(EltSize, ScalarLoadOp) =
864 14 : getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
865 : }
866 :
867 680 : ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
868 680 : unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
869 :
870 : // SubReg carries the "Kill" flag when SubReg == SuperReg.
871 : int64_t FrOffset = FrameInfo.getObjectOffset(Index);
872 :
873 1956 : for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
874 1276 : unsigned SubReg = NumSubRegs == 1 ?
875 1560 : SuperReg : getSubReg(SuperReg, SplitParts[i]);
876 :
877 1276 : if (SpillToSMEM) {
878 : // FIXME: Size may be > 4 but extra bytes wasted.
879 : unsigned Align = FrameInfo.getObjectAlignment(Index);
880 : MachinePointerInfo PtrInfo
881 15 : = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
882 : MachineMemOperand *MMO
883 15 : = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
884 : EltSize, MinAlign(Align, EltSize * i));
885 :
886 : // Add i * 4 offset
887 15 : int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
888 15 : if (Offset != 0) {
889 45 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
890 15 : .addReg(MFI->getFrameOffsetReg())
891 : .addImm(Offset);
892 : } else {
893 0 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
894 0 : .addReg(MFI->getFrameOffsetReg());
895 : }
896 :
897 : auto MIB =
898 30 : BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
899 15 : .addReg(MFI->getScratchRSrcReg()) // sbase
900 15 : .addReg(OffsetReg, RegState::Kill) // soff
901 : .addImm(0) // glc
902 15 : .addMemOperand(MMO);
903 :
904 15 : if (NumSubRegs > 1)
905 2 : MIB.addReg(SuperReg, RegState::ImplicitDefine);
906 :
907 : continue;
908 : }
909 :
910 1261 : if (SpillToVGPR) {
911 2358 : SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
912 : auto MIB =
913 1179 : BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
914 1179 : SubReg)
915 1179 : .addReg(Spill.VGPR)
916 1179 : .addImm(Spill.Lane);
917 :
918 1179 : if (NumSubRegs > 1)
919 702 : MIB.addReg(SuperReg, RegState::ImplicitDefine);
920 : } else {
921 82 : if (OnlyToVGPR)
922 0 : return false;
923 :
924 : // Restore SGPR from a stack slot.
925 : // FIXME: We should use S_LOAD_DWORD here for VI.
926 82 : unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
927 : unsigned Align = FrameInfo.getObjectAlignment(Index);
928 :
929 : MachinePointerInfo PtrInfo
930 82 : = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
931 :
932 82 : MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
933 : MachineMemOperand::MOLoad, EltSize,
934 : MinAlign(Align, EltSize * i));
935 :
936 164 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
937 : .addFrameIndex(Index) // vaddr
938 82 : .addReg(MFI->getScratchRSrcReg()) // srsrc
939 82 : .addReg(MFI->getFrameOffsetReg()) // soffset
940 82 : .addImm(i * 4) // offset
941 : .addMemOperand(MMO);
942 :
943 : auto MIB =
944 164 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
945 82 : .addReg(TmpReg, RegState::Kill);
946 :
947 82 : if (NumSubRegs > 1)
948 76 : MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
949 : }
950 : }
951 :
952 680 : if (M0CopyReg != AMDGPU::NoRegister) {
953 42 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
954 14 : .addReg(M0CopyReg, RegState::Kill);
955 : }
956 :
957 680 : MI->eraseFromParent();
958 680 : return true;
959 : }
960 :
961 : /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
962 : /// a VGPR and the stack slot can be safely eliminated when all other users are
963 : /// handled.
964 1278 : bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
965 : MachineBasicBlock::iterator MI,
966 : int FI,
967 : RegScavenger *RS) const {
968 1278 : switch (MI->getOpcode()) {
969 644 : case AMDGPU::SI_SPILL_S512_SAVE:
970 : case AMDGPU::SI_SPILL_S256_SAVE:
971 : case AMDGPU::SI_SPILL_S128_SAVE:
972 : case AMDGPU::SI_SPILL_S64_SAVE:
973 : case AMDGPU::SI_SPILL_S32_SAVE:
974 644 : return spillSGPR(MI, FI, RS, true);
975 634 : case AMDGPU::SI_SPILL_S512_RESTORE:
976 : case AMDGPU::SI_SPILL_S256_RESTORE:
977 : case AMDGPU::SI_SPILL_S128_RESTORE:
978 : case AMDGPU::SI_SPILL_S64_RESTORE:
979 : case AMDGPU::SI_SPILL_S32_RESTORE:
980 634 : return restoreSGPR(MI, FI, RS, true);
981 0 : default:
982 0 : llvm_unreachable("not an SGPR spill instruction");
983 : }
984 : }
985 :
986 7834 : void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
987 : int SPAdj, unsigned FIOperandNum,
988 : RegScavenger *RS) const {
989 7834 : MachineFunction *MF = MI->getParent()->getParent();
990 7834 : MachineRegisterInfo &MRI = MF->getRegInfo();
991 : MachineBasicBlock *MBB = MI->getParent();
992 7834 : SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
993 7834 : MachineFrameInfo &FrameInfo = MF->getFrameInfo();
994 7834 : const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
995 7834 : const SIInstrInfo *TII = ST.getInstrInfo();
996 : DebugLoc DL = MI->getDebugLoc();
997 :
998 7834 : MachineOperand &FIOp = MI->getOperand(FIOperandNum);
999 7834 : int Index = MI->getOperand(FIOperandNum).getIndex();
1000 :
1001 15668 : switch (MI->getOpcode()) {
1002 : // SGPR register spill
1003 46 : case AMDGPU::SI_SPILL_S512_SAVE:
1004 : case AMDGPU::SI_SPILL_S256_SAVE:
1005 : case AMDGPU::SI_SPILL_S128_SAVE:
1006 : case AMDGPU::SI_SPILL_S64_SAVE:
1007 : case AMDGPU::SI_SPILL_S32_SAVE: {
1008 46 : spillSGPR(MI, Index, RS);
1009 46 : break;
1010 : }
1011 :
1012 : // SGPR register restore
1013 46 : case AMDGPU::SI_SPILL_S512_RESTORE:
1014 : case AMDGPU::SI_SPILL_S256_RESTORE:
1015 : case AMDGPU::SI_SPILL_S128_RESTORE:
1016 : case AMDGPU::SI_SPILL_S64_RESTORE:
1017 : case AMDGPU::SI_SPILL_S32_RESTORE: {
1018 46 : restoreSGPR(MI, Index, RS);
1019 46 : break;
1020 : }
1021 :
1022 : // VGPR register spill
1023 : case AMDGPU::SI_SPILL_V512_SAVE:
1024 : case AMDGPU::SI_SPILL_V256_SAVE:
1025 : case AMDGPU::SI_SPILL_V128_SAVE:
1026 : case AMDGPU::SI_SPILL_V96_SAVE:
1027 : case AMDGPU::SI_SPILL_V64_SAVE:
1028 : case AMDGPU::SI_SPILL_V32_SAVE: {
1029 1316 : const MachineOperand *VData = TII->getNamedOperand(*MI,
1030 : AMDGPU::OpName::vdata);
1031 2632 : buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1032 : Index,
1033 : VData->getReg(), VData->isKill(),
1034 : TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1035 : TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1036 : TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1037 : *MI->memoperands_begin(),
1038 : RS);
1039 1316 : MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1040 1316 : MI->eraseFromParent();
1041 1316 : break;
1042 : }
1043 : case AMDGPU::SI_SPILL_V32_RESTORE:
1044 : case AMDGPU::SI_SPILL_V64_RESTORE:
1045 : case AMDGPU::SI_SPILL_V96_RESTORE:
1046 : case AMDGPU::SI_SPILL_V128_RESTORE:
1047 : case AMDGPU::SI_SPILL_V256_RESTORE:
1048 : case AMDGPU::SI_SPILL_V512_RESTORE: {
1049 1227 : const MachineOperand *VData = TII->getNamedOperand(*MI,
1050 : AMDGPU::OpName::vdata);
1051 :
1052 2454 : buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1053 : Index,
1054 : VData->getReg(), VData->isKill(),
1055 : TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1056 : TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
1057 : TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1058 : *MI->memoperands_begin(),
1059 : RS);
1060 1227 : MI->eraseFromParent();
1061 1227 : break;
1062 : }
1063 :
1064 : default: {
1065 : const DebugLoc &DL = MI->getDebugLoc();
1066 : bool IsMUBUF = TII->isMUBUF(*MI);
1067 :
1068 5199 : if (!IsMUBUF &&
1069 343 : MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
1070 : // Convert to an absolute stack address by finding the offset from the
1071 : // scratch wave base and scaling by the wave size.
1072 : //
1073 : // In an entry function/kernel the stack address is already the
1074 : // absolute address relative to the scratch wave offset.
1075 :
1076 : unsigned DiffReg
1077 36 : = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1078 :
1079 36 : bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1080 36 : unsigned ResultReg = IsCopy ?
1081 31 : MI->getOperand(0).getReg() :
1082 5 : MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1083 :
1084 108 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1085 36 : .addReg(MFI->getFrameOffsetReg())
1086 36 : .addReg(MFI->getScratchWaveOffsetReg());
1087 :
1088 : int64_t Offset = FrameInfo.getObjectOffset(Index);
1089 36 : if (Offset == 0) {
1090 : // XXX - This never happens because of emergency scavenging slot at 0?
1091 0 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1092 0 : .addImm(Log2_32(ST.getWavefrontSize()))
1093 0 : .addReg(DiffReg);
1094 : } else {
1095 : unsigned ScaledReg
1096 36 : = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1097 :
1098 72 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1099 72 : .addImm(Log2_32(ST.getWavefrontSize()))
1100 36 : .addReg(DiffReg, RegState::Kill);
1101 :
1102 : // TODO: Fold if use instruction is another add of a constant.
1103 36 : if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1104 64 : TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1105 : .addImm(Offset)
1106 32 : .addReg(ScaledReg, RegState::Kill);
1107 : } else {
1108 : unsigned ConstOffsetReg
1109 4 : = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1110 :
1111 8 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1112 : .addImm(Offset);
1113 8 : TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1114 4 : .addReg(ConstOffsetReg, RegState::Kill)
1115 4 : .addReg(ScaledReg, RegState::Kill);
1116 : }
1117 : }
1118 :
1119 : // Don't introduce an extra copy if we're just materializing in a mov.
1120 36 : if (IsCopy)
1121 31 : MI->eraseFromParent();
1122 : else
1123 5 : FIOp.ChangeToRegister(ResultReg, false, false, true);
1124 36 : return;
1125 : }
1126 :
1127 5163 : if (IsMUBUF) {
1128 : // Disable offen so we don't need a 0 vgpr base.
1129 : assert(static_cast<int>(FIOperandNum) ==
1130 : AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1131 : AMDGPU::OpName::vaddr));
1132 :
1133 : assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
1134 : == MFI->getFrameOffsetReg());
1135 :
1136 : int64_t Offset = FrameInfo.getObjectOffset(Index);
1137 : int64_t OldImm
1138 4856 : = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1139 4856 : int64_t NewOffset = OldImm + Offset;
1140 :
1141 9696 : if (isUInt<12>(NewOffset) &&
1142 4840 : buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1143 4840 : MI->eraseFromParent();
1144 4840 : return;
1145 : }
1146 : }
1147 :
1148 : // If the offset is simply too big, don't convert to a scratch wave offset
1149 : // relative index.
1150 :
1151 : int64_t Offset = FrameInfo.getObjectOffset(Index);
1152 323 : FIOp.ChangeToImmediate(Offset);
1153 323 : if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1154 16 : unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1155 32 : BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1156 : .addImm(Offset);
1157 16 : FIOp.ChangeToRegister(TmpReg, false, false, true);
1158 : }
1159 : }
1160 : }
1161 : }
1162 :
1163 4304619 : StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1164 : #define AMDGPU_REG_ASM_NAMES
1165 : #include "AMDGPURegAsmNames.inc.cpp"
1166 :
1167 : #define REG_RANGE(BeginReg, EndReg, RegTable) \
1168 : if (Reg >= BeginReg && Reg <= EndReg) { \
1169 : unsigned Index = Reg - BeginReg; \
1170 : assert(Index < array_lengthof(RegTable)); \
1171 : return RegTable[Index]; \
1172 : }
1173 :
1174 5354437 : REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
1175 4194963 : REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
1176 2943589 : REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
1177 1941229 : REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
1178 1430149 : REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
1179 : VGPR96RegNames);
1180 :
1181 1737570 : REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
1182 : AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
1183 : VGPR128RegNames);
1184 1185810 : REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
1185 : AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
1186 : SGPR128RegNames);
1187 :
1188 1361187 : REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
1189 : AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1190 : VGPR256RegNames);
1191 :
1192 1048994 : REG_RANGE(
1193 : AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
1194 : AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
1195 : VGPR512RegNames);
1196 :
1197 527672 : REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
1198 : AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1199 : SGPR256RegNames);
1200 :
1201 462200 : REG_RANGE(
1202 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
1203 : AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
1204 : SGPR512RegNames
1205 : );
1206 :
1207 : #undef REG_RANGE
1208 :
1209 : // FIXME: Rename flat_scr so we don't need to special case this.
1210 351320 : switch (Reg) {
1211 : case AMDGPU::FLAT_SCR:
1212 3679 : return "flat_scratch";
1213 : case AMDGPU::FLAT_SCR_LO:
1214 7653 : return "flat_scratch_lo";
1215 : case AMDGPU::FLAT_SCR_HI:
1216 7653 : return "flat_scratch_hi";
1217 332335 : default:
1218 : // For the special named registers the default is fine.
1219 332335 : return TargetRegisterInfo::getRegAsmName(Reg);
1220 : }
1221 : }
1222 :
1223 : // FIXME: This is very slow. It might be worth creating a map from physreg to
1224 : // register class.
1225 5681815 : const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
1226 : assert(!TargetRegisterInfo::isVirtualRegister(Reg));
1227 :
1228 : static const TargetRegisterClass *const BaseClasses[] = {
1229 : &AMDGPU::VGPR_32RegClass,
1230 : &AMDGPU::SReg_32RegClass,
1231 : &AMDGPU::VReg_64RegClass,
1232 : &AMDGPU::SReg_64RegClass,
1233 : &AMDGPU::VReg_96RegClass,
1234 : &AMDGPU::VReg_128RegClass,
1235 : &AMDGPU::SReg_128RegClass,
1236 : &AMDGPU::VReg_256RegClass,
1237 : &AMDGPU::SReg_256RegClass,
1238 : &AMDGPU::VReg_512RegClass,
1239 : &AMDGPU::SReg_512RegClass,
1240 : &AMDGPU::SCC_CLASSRegClass,
1241 : &AMDGPU::Pseudo_SReg_32RegClass,
1242 : &AMDGPU::Pseudo_SReg_128RegClass,
1243 : };
1244 :
1245 17779884 : for (const TargetRegisterClass *BaseClass : BaseClasses) {
1246 17779884 : if (BaseClass->contains(Reg)) {
1247 5681815 : return BaseClass;
1248 : }
1249 : }
1250 : return nullptr;
1251 : }
1252 :
1253 : // TODO: It might be helpful to have some target specific flags in
1254 : // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1255 12948201 : bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1256 : unsigned Size = getRegSizeInBits(*RC);
1257 12948201 : if (Size < 32)
1258 : return false;
1259 12939600 : switch (Size) {
1260 6908467 : case 32:
1261 6908467 : return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1262 4120946 : case 64:
1263 4120946 : return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1264 1293 : case 96:
1265 1293 : return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1266 1671759 : case 128:
1267 1671759 : return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1268 192506 : case 256:
1269 192506 : return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1270 44629 : case 512:
1271 44629 : return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1272 0 : default:
1273 0 : llvm_unreachable("Invalid register class size");
1274 : }
1275 : }
1276 :
1277 152473 : const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1278 : const TargetRegisterClass *SRC) const {
1279 152473 : switch (getRegSizeInBits(*SRC)) {
1280 : case 32:
1281 : return &AMDGPU::VGPR_32RegClass;
1282 17178 : case 64:
1283 17178 : return &AMDGPU::VReg_64RegClass;
1284 0 : case 96:
1285 0 : return &AMDGPU::VReg_96RegClass;
1286 5977 : case 128:
1287 5977 : return &AMDGPU::VReg_128RegClass;
1288 98 : case 256:
1289 98 : return &AMDGPU::VReg_256RegClass;
1290 17 : case 512:
1291 17 : return &AMDGPU::VReg_512RegClass;
1292 0 : default:
1293 0 : llvm_unreachable("Invalid register class size");
1294 : }
1295 : }
1296 :
1297 2383 : const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1298 : const TargetRegisterClass *VRC) const {
1299 2383 : switch (getRegSizeInBits(*VRC)) {
1300 : case 32:
1301 : return &AMDGPU::SGPR_32RegClass;
1302 224 : case 64:
1303 224 : return &AMDGPU::SReg_64RegClass;
1304 12 : case 128:
1305 12 : return &AMDGPU::SReg_128RegClass;
1306 2 : case 256:
1307 2 : return &AMDGPU::SReg_256RegClass;
1308 0 : case 512:
1309 0 : return &AMDGPU::SReg_512RegClass;
1310 0 : default:
1311 0 : llvm_unreachable("Invalid register class size");
1312 : }
1313 : }
1314 :
1315 416677 : const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1316 : const TargetRegisterClass *RC, unsigned SubIdx) const {
1317 416677 : if (SubIdx == AMDGPU::NoSubRegister)
1318 : return RC;
1319 :
1320 : // We can assume that each lane corresponds to one 32-bit register.
1321 69895 : unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1322 69895 : if (isSGPRClass(RC)) {
1323 32161 : switch (Count) {
1324 : case 1:
1325 : return &AMDGPU::SGPR_32RegClass;
1326 20 : case 2:
1327 20 : return &AMDGPU::SReg_64RegClass;
1328 0 : case 4:
1329 0 : return &AMDGPU::SReg_128RegClass;
1330 0 : case 8:
1331 0 : return &AMDGPU::SReg_256RegClass;
1332 0 : case 16: /* fall-through */
1333 : default:
1334 0 : llvm_unreachable("Invalid sub-register class size");
1335 : }
1336 : } else {
1337 37734 : switch (Count) {
1338 : case 1:
1339 : return &AMDGPU::VGPR_32RegClass;
1340 78 : case 2:
1341 78 : return &AMDGPU::VReg_64RegClass;
1342 0 : case 3:
1343 0 : return &AMDGPU::VReg_96RegClass;
1344 0 : case 4:
1345 0 : return &AMDGPU::VReg_128RegClass;
1346 0 : case 8:
1347 0 : return &AMDGPU::VReg_256RegClass;
1348 0 : case 16: /* fall-through */
1349 : default:
1350 0 : llvm_unreachable("Invalid sub-register class size");
1351 : }
1352 : }
1353 : }
1354 :
1355 561380 : bool SIRegisterInfo::shouldRewriteCopySrc(
1356 : const TargetRegisterClass *DefRC,
1357 : unsigned DefSubReg,
1358 : const TargetRegisterClass *SrcRC,
1359 : unsigned SrcSubReg) const {
1360 : // We want to prefer the smallest register class possible, so we don't want to
1361 : // stop and rewrite on anything that looks like a subregister
1362 : // extract. Operations mostly don't care about the super register class, so we
1363 : // only want to stop on the most basic of copies between the same register
1364 : // class.
1365 : //
1366 : // e.g. if we have something like
1367 : // %0 = ...
1368 : // %1 = ...
1369 : // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1370 : // %3 = COPY %2, sub0
1371 : //
1372 : // We want to look through the COPY to find:
1373 : // => %3 = COPY %0
1374 :
1375 : // Plain copy.
1376 561380 : return getCommonSubClass(DefRC, SrcRC) != nullptr;
1377 : }
1378 :
1379 : /// Returns a register that is not used at any point in the function.
1380 : /// If all registers are used, then this function will return
1381 : // AMDGPU::NoRegister.
1382 : unsigned
1383 150 : SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1384 : const TargetRegisterClass *RC,
1385 : const MachineFunction &MF) const {
1386 :
1387 4609 : for (unsigned Reg : *RC)
1388 4605 : if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1389 146 : return Reg;
1390 : return AMDGPU::NoRegister;
1391 : }
1392 :
1393 6586 : ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1394 : unsigned EltSize) const {
1395 6586 : if (EltSize == 4) {
1396 : static const int16_t Sub0_15[] = {
1397 : AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1398 : AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1399 : AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1400 : AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1401 : };
1402 :
1403 : static const int16_t Sub0_7[] = {
1404 : AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1405 : AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1406 : };
1407 :
1408 : static const int16_t Sub0_3[] = {
1409 : AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1410 : };
1411 :
1412 : static const int16_t Sub0_2[] = {
1413 : AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1414 : };
1415 :
1416 : static const int16_t Sub0_1[] = {
1417 : AMDGPU::sub0, AMDGPU::sub1,
1418 : };
1419 :
1420 6372 : switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1421 977 : case 32:
1422 977 : return {};
1423 : case 64:
1424 : return makeArrayRef(Sub0_1);
1425 : case 96:
1426 : return makeArrayRef(Sub0_2);
1427 : case 128:
1428 : return makeArrayRef(Sub0_3);
1429 : case 256:
1430 : return makeArrayRef(Sub0_7);
1431 : case 512:
1432 : return makeArrayRef(Sub0_15);
1433 0 : default:
1434 0 : llvm_unreachable("unhandled register size");
1435 : }
1436 : }
1437 :
1438 214 : if (EltSize == 8) {
1439 : static const int16_t Sub0_15_64[] = {
1440 : AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1441 : AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1442 : AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1443 : AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1444 : };
1445 :
1446 : static const int16_t Sub0_7_64[] = {
1447 : AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1448 : AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1449 : };
1450 :
1451 :
1452 : static const int16_t Sub0_3_64[] = {
1453 : AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1454 : };
1455 :
1456 208 : switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1457 16 : case 64:
1458 16 : return {};
1459 : case 128:
1460 : return makeArrayRef(Sub0_3_64);
1461 : case 256:
1462 : return makeArrayRef(Sub0_7_64);
1463 : case 512:
1464 : return makeArrayRef(Sub0_15_64);
1465 0 : default:
1466 0 : llvm_unreachable("unhandled register size");
1467 : }
1468 : }
1469 :
1470 : assert(EltSize == 16 && "unhandled register spill split size");
1471 :
1472 : static const int16_t Sub0_15_128[] = {
1473 : AMDGPU::sub0_sub1_sub2_sub3,
1474 : AMDGPU::sub4_sub5_sub6_sub7,
1475 : AMDGPU::sub8_sub9_sub10_sub11,
1476 : AMDGPU::sub12_sub13_sub14_sub15
1477 : };
1478 :
1479 : static const int16_t Sub0_7_128[] = {
1480 : AMDGPU::sub0_sub1_sub2_sub3,
1481 : AMDGPU::sub4_sub5_sub6_sub7
1482 : };
1483 :
1484 6 : switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1485 4 : case 128:
1486 4 : return {};
1487 : case 256:
1488 : return makeArrayRef(Sub0_7_128);
1489 : case 512:
1490 : return makeArrayRef(Sub0_15_128);
1491 0 : default:
1492 0 : llvm_unreachable("unhandled register size");
1493 : }
1494 : }
1495 :
1496 : const TargetRegisterClass*
1497 5027337 : SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1498 : unsigned Reg) const {
1499 5027337 : if (TargetRegisterInfo::isVirtualRegister(Reg))
1500 738565 : return MRI.getRegClass(Reg);
1501 :
1502 4288772 : return getPhysRegClass(Reg);
1503 : }
1504 :
1505 4825149 : bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1506 : unsigned Reg) const {
1507 4825149 : const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1508 : assert(RC && "Register class for the reg not found");
1509 4825149 : return hasVGPRs(RC);
1510 : }
1511 :
1512 191998 : bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1513 : const TargetRegisterClass *SrcRC,
1514 : unsigned SubReg,
1515 : const TargetRegisterClass *DstRC,
1516 : unsigned DstSubReg,
1517 : const TargetRegisterClass *NewRC,
1518 : LiveIntervals &LIS) const {
1519 : unsigned SrcSize = getRegSizeInBits(*SrcRC);
1520 : unsigned DstSize = getRegSizeInBits(*DstRC);
1521 : unsigned NewSize = getRegSizeInBits(*NewRC);
1522 :
1523 : // Do not increase size of registers beyond dword, we would need to allocate
1524 : // adjacent registers and constraint regalloc more than needed.
1525 :
1526 : // Always allow dword coalescing.
1527 191998 : if (SrcSize <= 32 || DstSize <= 32)
1528 : return true;
1529 :
1530 52011 : return NewSize <= DstSize || NewSize <= SrcSize;
1531 : }
1532 :
1533 124674 : unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1534 : MachineFunction &MF) const {
1535 :
1536 124674 : const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1537 124674 : const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1538 :
1539 124674 : unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1540 : MF.getFunction());
1541 249348 : switch (RC->getID()) {
1542 : default:
1543 : return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1544 62337 : case AMDGPU::VGPR_32RegClassID:
1545 62347 : return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1546 62337 : case AMDGPU::SGPR_32RegClassID:
1547 62450 : return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1548 : }
1549 : }
1550 :
1551 248352 : unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1552 : unsigned Idx) const {
1553 248352 : if (Idx == getVGPRPressureSet())
1554 62337 : return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1555 62337 : const_cast<MachineFunction &>(MF));
1556 :
1557 186015 : if (Idx == getSGPRPressureSet())
1558 62337 : return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1559 62337 : const_cast<MachineFunction &>(MF));
1560 :
1561 123678 : return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1562 : }
1563 :
1564 1868475 : const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1565 : static const int Empty[] = { -1 };
1566 :
1567 1868475 : if (hasRegUnit(AMDGPU::M0, RegUnit))
1568 : return Empty;
1569 1865983 : return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1570 : }
1571 :
1572 4085 : unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1573 : // Not a callee saved register.
1574 4085 : return AMDGPU::SGPR30_SGPR31;
1575 : }
1576 :
1577 : const TargetRegisterClass *
1578 287 : SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1579 : const MachineRegisterInfo &MRI) const {
1580 287 : unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
1581 287 : const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
1582 119 : if (!RB)
1583 : return nullptr;
1584 :
1585 119 : switch (Size) {
1586 84 : case 32:
1587 84 : return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1588 : &AMDGPU::SReg_32_XM0RegClass;
1589 35 : case 64:
1590 35 : return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1591 : &AMDGPU::SReg_64_XEXECRegClass;
1592 0 : case 96:
1593 0 : return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1594 : nullptr;
1595 0 : case 128:
1596 0 : return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1597 : &AMDGPU::SReg_128RegClass;
1598 0 : default:
1599 0 : llvm_unreachable("not implemented");
1600 : }
1601 : }
|