summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp76
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h4
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp44
4 files changed, 114 insertions, 23 deletions
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 826f9a54a..4d81e9336 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -7,6 +7,7 @@
#include <type_traits>
#include "common/bit_cast.h"
+#include "shader_recompiler/environment.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
@@ -515,6 +516,9 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
case IR::Attribute::PrimitiveId:
case IR::Attribute::InstanceId:
case IR::Attribute::VertexId:
+ case IR::Attribute::BaseVertex:
+ case IR::Attribute::BaseInstance:
+ case IR::Attribute::DrawID:
break;
default:
return;
@@ -644,7 +648,63 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
}
}
-void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
+void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
+ const IR::Value bank{inst.Arg(0)};
+ const IR::Value offset{inst.Arg(1)};
+ if (!bank.IsImmediate() || !offset.IsImmediate()) {
+ return;
+ }
+ const auto bank_value = bank.U32();
+ const auto offset_value = offset.U32();
+ auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
+ if (!replacement) {
+ return;
+ }
+ const auto new_attribute = [replacement]() {
+ switch (*replacement) {
+ case ReplaceConstant::BaseInstance:
+ return IR::Attribute::BaseInstance;
+ case ReplaceConstant::BaseVertex:
+ return IR::Attribute::BaseVertex;
+ case ReplaceConstant::DrawID:
+ return IR::Attribute::DrawID;
+ default:
+ throw NotImplementedException("Not implemented replacement variable {}", *replacement);
+ }
+ }();
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
+ inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
+ } else {
+ inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
+ }
+}
+
+void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
+ u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
+ const IR::Value bank{inst.Arg(0)};
+ const IR::Value offset{inst.Arg(1)};
+ if (!bank.IsImmediate() || !offset.IsImmediate()) {
+ return;
+ }
+ const auto bank_value = bank.U32();
+ if (bank_value != which_bank) {
+ return;
+ }
+ const auto offset_value = offset.U32();
+ if (offset_value < offset_start || offset_value >= offset_end) {
+ return;
+ }
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
+ inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
+ } else {
+ inst.ReplaceUsesWith(
+ IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
+ }
+}
+
+void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetRegister:
return FoldGetRegister(inst);
@@ -789,18 +849,28 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
IR::Opcode::CompositeInsertF16x4);
case IR::Opcode::FSwizzleAdd:
return FoldFSwizzleAdd(block, inst);
+ case IR::Opcode::GetCbufF32:
+ case IR::Opcode::GetCbufU32:
+ if (env.HasHLEMacroState()) {
+ FoldConstBuffer(env, block, inst);
+ }
+ if (env.IsPropietaryDriver()) {
+ FoldDriverConstBuffer(env, block, inst, 1);
+ }
+ break;
default:
break;
}
}
+
} // Anonymous namespace
-void ConstantPropagationPass(IR::Program& program) {
+void ConstantPropagationPass(Environment& env, IR::Program& program) {
const auto end{program.post_order_blocks.rend()};
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
IR::Block* const block{*it};
for (IR::Inst& inst : block->Instructions()) {
- ConstantPropagation(*block, inst);
+ ConstantPropagation(env, *block, inst);
}
}
}
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 336338e62..9101722ba 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
-IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
- const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+ IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
+
+ // Align the offset base to match the host alignment requirements
+ low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
}
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
}
} // Anonymous namespace
-void GlobalMemoryToStorageBufferPass(IR::Program& program) {
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
StorageInfo info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
- const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
+ const IR::U32 offset{
+ StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
Replace(*block, *inst, index, offset);
}
}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 11bfe801a..4ffad1172 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -13,9 +13,9 @@ struct HostTranslateInfo;
namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
-void ConstantPropagationPass(IR::Program& program);
+void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
-void GlobalMemoryToStorageBufferPass(IR::Program& program);
+void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index f5c86fcb1..d374c976a 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -355,21 +355,21 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
};
}
-TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) {
const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left};
const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)
<< cbuf.secondary_shift_left};
- return env.ReadTextureType(lhs_raw | rhs_raw);
+ return lhs_raw | rhs_raw;
+}
+
+TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+ return env.ReadTextureType(GetTextureHandle(env, cbuf));
}
TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
- const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
- const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
- const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
- const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
- return env.ReadTexturePixelFormat(lhs_raw | rhs_raw);
+ return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
}
class Descriptors {
@@ -386,8 +386,10 @@ public:
return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
return desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
+ desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.secondary_shift_left == existing.secondary_shift_left &&
desc.count == existing.count && desc.size_shift == existing.size_shift &&
desc.has_secondary == existing.has_secondary;
});
@@ -405,15 +407,20 @@ public:
}
u32 Add(const TextureDescriptor& desc) {
- return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+ const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
return desc.type == existing.type && desc.is_depth == existing.is_depth &&
desc.has_secondary == existing.has_secondary &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset &&
+ desc.shift_left == existing.shift_left &&
desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+ desc.secondary_shift_left == existing.secondary_shift_left &&
desc.count == existing.count && desc.size_shift == existing.size_shift;
- });
+ })};
+ // TODO: Read this from TIC
+ texture_descriptors[index].is_multisample |= desc.is_multisample;
+ return index;
}
u32 Add(const ImageDescriptor& desc) {
@@ -452,7 +459,8 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
const IR::Value coord(inst.Arg(1));
const IR::Value handle(ir.Imm32(0));
const IR::U32 lod{ir.Imm32(0)};
- const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, info);
+ const IR::U1 skip_mips{ir.Imm1(true)};
+ const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, skip_mips, info);
inst.SetArg(
1, ir.CompositeConstruct(
ir.FPMul(IR::F32(ir.CompositeExtract(coord, 0)),
@@ -486,10 +494,10 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_
const IR::F32 w(ir.CompositeExtract(new_inst, 3));
const IR::F16F32F64 max_value(ir.Imm32(get_max_value()));
const IR::Value converted =
- ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value),
- ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value),
- ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value),
- ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value));
+ ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(x)), max_value),
+ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(y)), max_value),
+ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(z)), max_value),
+ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value));
inst.ReplaceUsesWith(converted);
}
} // Anonymous namespace
@@ -524,6 +532,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
const auto& cbuf{texture_inst.cbuf};
auto flags{inst->Flags<IR::TextureInstInfo>()};
+ bool is_multisample{false};
switch (inst->GetOpcode()) {
case IR::Opcode::ImageQueryDimensions:
flags.type.Assign(ReadTextureType(env, cbuf));
@@ -538,6 +547,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
}
break;
case IR::Opcode::ImageFetch:
+ if (flags.type == TextureType::Color2D || flags.type == TextureType::Color2DRect ||
+ flags.type == TextureType::ColorArray2D) {
+ is_multisample = !inst->Arg(4).IsEmpty();
+ } else {
+ inst->SetArg(4, IR::U32{});
+ }
if (flags.type != TextureType::Color1D) {
break;
}
@@ -613,6 +628,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
index = descriptors.Add(TextureDescriptor{
.type = flags.type,
.is_depth = flags.is_depth != 0,
+ .is_multisample = is_multisample,
.has_secondary = cbuf.has_secondary,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,