diff options
author | Ilia Mirkin <imirkin@alum.mit.edu> | 2016-05-28 14:28:07 -0400 |
---|---|---|
committer | Ilia Mirkin <imirkin@alum.mit.edu> | 2016-05-30 18:15:10 -0400 |
commit | 1f895caba0accc0af3e637d6193ac0b673ce98bc (patch) | |
tree | d5e5a40333339d1f84080ee5c6f14bb3a49eb16d | |
parent | 27a51ff9b420909334898785cf194b5998776e88 (diff) |
nvc0/ir: limit max number of regs based on availability in SM
This effectively limits registers to 32 and 64 for fermi and kepler when
1024 threads are used, but allows the full amount to be used with
smaller thread sizes.
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 4 |
2 files changed, 4 insertions, 2 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index da2fa4bdf1..689fecfea4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -455,7 +455,7 @@ NVC0LegalizePostRA::visit(Function *fn) pOne = new_LValue(fn, FILE_PREDICATE); carry = new_LValue(fn, FILE_FLAGS); - rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); + rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63; carry->reg.data.id = 0; pOne->reg.data.id = 7; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp index fd0f8942ca..932ec39745 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp @@ -238,9 +238,11 @@ void TargetNVC0::initOpInfo() unsigned int TargetNVC0::getFileSize(DataFile file) const { + const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; + const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768; switch (file) { case FILE_NULL: return 0; - case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; + case FILE_GPR: return MIN2(gprs, smregs / threads); case FILE_PREDICATE: return 7; case FILE_FLAGS: return 1; case FILE_ADDRESS: return 0; |