From 2d48a7b4d0666ad16d03a22d85712617a0849046 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 9 Jan 2021 03:30:07 -0300 Subject: shader: Initial recompiler work --- src/shader_recompiler/frontend/ir/attribute.cpp | 447 ++++++++ src/shader_recompiler/frontend/ir/attribute.h | 242 +++++ src/shader_recompiler/frontend/ir/basic_block.cpp | 142 +++ src/shader_recompiler/frontend/ir/basic_block.h | 134 +++ src/shader_recompiler/frontend/ir/condition.cpp | 31 + src/shader_recompiler/frontend/ir/condition.h | 60 ++ src/shader_recompiler/frontend/ir/flow_test.cpp | 83 ++ src/shader_recompiler/frontend/ir/flow_test.h | 61 ++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 533 ++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 123 +++ .../frontend/ir/microinstruction.cpp | 189 ++++ .../frontend/ir/microinstruction.h | 82 ++ src/shader_recompiler/frontend/ir/opcode.cpp | 67 ++ src/shader_recompiler/frontend/ir/opcode.h | 44 + src/shader_recompiler/frontend/ir/opcode.inc | 142 +++ src/shader_recompiler/frontend/ir/pred.h | 28 + src/shader_recompiler/frontend/ir/reg.h | 314 ++++++ src/shader_recompiler/frontend/ir/type.cpp | 36 + src/shader_recompiler/frontend/ir/type.h | 47 + src/shader_recompiler/frontend/ir/value.cpp | 124 +++ src/shader_recompiler/frontend/ir/value.h | 98 ++ .../frontend/maxwell/control_flow.cpp | 531 ++++++++++ .../frontend/maxwell/control_flow.h | 137 +++ src/shader_recompiler/frontend/maxwell/decode.cpp | 149 +++ src/shader_recompiler/frontend/maxwell/decode.h | 14 + .../frontend/maxwell/instruction.h | 62 ++ src/shader_recompiler/frontend/maxwell/location.h | 106 ++ src/shader_recompiler/frontend/maxwell/maxwell.inc | 285 +++++ src/shader_recompiler/frontend/maxwell/opcode.cpp | 26 + src/shader_recompiler/frontend/maxwell/opcode.h | 30 + src/shader_recompiler/frontend/maxwell/program.cpp | 69 ++ src/shader_recompiler/frontend/maxwell/program.h | 39 + .../frontend/maxwell/termination_code.cpp | 79 ++ .../frontend/maxwell/termination_code.h | 16 + .../frontend/maxwell/translate/impl/exit.cpp | 15 + .../impl/floating_point_conversion_integer.cpp | 133 +++ .../impl/floating_point_multi_function.cpp | 71 ++ .../frontend/maxwell/translate/impl/impl.cpp | 79 ++ .../frontend/maxwell/translate/impl/impl.h | 316 ++++++ .../translate/impl/load_store_attribute.cpp | 92 ++ .../maxwell/translate/impl/load_store_memory.cpp | 90 ++ .../maxwell/translate/impl/not_implemented.cpp | 1105 ++++++++++++++++++++ .../maxwell/translate/impl/register_move.cpp | 45 + .../frontend/maxwell/translate/translate.cpp | 50 + .../frontend/maxwell/translate/translate.h | 16 + 45 files changed, 6582 insertions(+) create mode 100644 src/shader_recompiler/frontend/ir/attribute.cpp create mode 100644 src/shader_recompiler/frontend/ir/attribute.h create mode 100644 src/shader_recompiler/frontend/ir/basic_block.cpp create mode 100644 src/shader_recompiler/frontend/ir/basic_block.h create mode 100644 src/shader_recompiler/frontend/ir/condition.cpp create mode 100644 src/shader_recompiler/frontend/ir/condition.h create mode 100644 src/shader_recompiler/frontend/ir/flow_test.cpp create mode 100644 src/shader_recompiler/frontend/ir/flow_test.h create mode 100644 src/shader_recompiler/frontend/ir/ir_emitter.cpp create mode 100644 src/shader_recompiler/frontend/ir/ir_emitter.h create mode 100644 src/shader_recompiler/frontend/ir/microinstruction.cpp create mode 100644 src/shader_recompiler/frontend/ir/microinstruction.h create mode 100644 src/shader_recompiler/frontend/ir/opcode.cpp create mode 100644 src/shader_recompiler/frontend/ir/opcode.h create mode 100644 src/shader_recompiler/frontend/ir/opcode.inc create mode 100644 src/shader_recompiler/frontend/ir/pred.h create mode 100644 src/shader_recompiler/frontend/ir/reg.h create mode 100644 src/shader_recompiler/frontend/ir/type.cpp create mode 100644 src/shader_recompiler/frontend/ir/type.h create mode 100644 src/shader_recompiler/frontend/ir/value.cpp create mode 100644 src/shader_recompiler/frontend/ir/value.h create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/control_flow.h create mode 100644 src/shader_recompiler/frontend/maxwell/decode.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/decode.h create mode 100644 src/shader_recompiler/frontend/maxwell/instruction.h create mode 100644 src/shader_recompiler/frontend/maxwell/location.h create mode 100644 src/shader_recompiler/frontend/maxwell/maxwell.inc create mode 100644 src/shader_recompiler/frontend/maxwell/opcode.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/opcode.h create mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/impl.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/translate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/translate.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp new file mode 100644 index 000000000..2fb7d576f --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -0,0 +1,447 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader::IR { + +bool IsGeneric(Attribute attribute) noexcept { + return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; +} + +int GenericAttributeIndex(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4; +} + +std::string NameOf(Attribute attribute) { + switch (attribute) { + case Attribute::PrimitiveId: + return "PrimitiveId"; + case Attribute::Layer: + return "Layer"; + case Attribute::ViewportIndex: + return "ViewportIndex"; + case Attribute::PointSize: + return "PointSize"; + case Attribute::PositionX: + return "Position.X"; + case Attribute::PositionY: + return "Position.Y"; + case Attribute::PositionZ: + return "Position.Z"; + case Attribute::PositionW: + return "Position.W"; + case Attribute::Generic0X: + return "Generic[0].X"; + case Attribute::Generic0Y: + return "Generic[0].Y"; + case Attribute::Generic0Z: + return "Generic[0].Z"; + case Attribute::Generic0W: + return "Generic[0].W"; + case Attribute::Generic1X: + return "Generic[1].X"; + case Attribute::Generic1Y: + return "Generic[1].Y"; + case Attribute::Generic1Z: + return "Generic[1].Z"; + case Attribute::Generic1W: + return "Generic[1].W"; + case Attribute::Generic2X: + return "Generic[2].X"; + case Attribute::Generic2Y: + return "Generic[2].Y"; + case Attribute::Generic2Z: + return "Generic[2].Z"; + case Attribute::Generic2W: + return "Generic[2].W"; + case Attribute::Generic3X: + return "Generic[3].X"; + case Attribute::Generic3Y: + return "Generic[3].Y"; + case Attribute::Generic3Z: + return "Generic[3].Z"; + case Attribute::Generic3W: + return "Generic[3].W"; + case Attribute::Generic4X: + return "Generic[4].X"; + case Attribute::Generic4Y: + return "Generic[4].Y"; + case Attribute::Generic4Z: + return "Generic[4].Z"; + case Attribute::Generic4W: + return "Generic[4].W"; + case Attribute::Generic5X: + return "Generic[5].X"; + case Attribute::Generic5Y: + return "Generic[5].Y"; + case Attribute::Generic5Z: + return "Generic[5].Z"; + case Attribute::Generic5W: + return "Generic[5].W"; + case Attribute::Generic6X: + return "Generic[6].X"; + case Attribute::Generic6Y: + return "Generic[6].Y"; + case Attribute::Generic6Z: + return "Generic[6].Z"; + case Attribute::Generic6W: + return "Generic[6].W"; + case Attribute::Generic7X: + return "Generic[7].X"; + case Attribute::Generic7Y: + return "Generic[7].Y"; + case Attribute::Generic7Z: + return "Generic[7].Z"; + case Attribute::Generic7W: + return "Generic[7].W"; + case Attribute::Generic8X: + return "Generic[8].X"; + case Attribute::Generic8Y: + return "Generic[8].Y"; + case Attribute::Generic8Z: + return "Generic[8].Z"; + case Attribute::Generic8W: + return "Generic[8].W"; + case Attribute::Generic9X: + return "Generic[9].X"; + case Attribute::Generic9Y: + return "Generic[9].Y"; + case Attribute::Generic9Z: + return "Generic[9].Z"; + case Attribute::Generic9W: + return "Generic[9].W"; + case Attribute::Generic10X: + return "Generic[10].X"; + case Attribute::Generic10Y: + return "Generic[10].Y"; + case Attribute::Generic10Z: + return "Generic[10].Z"; + case Attribute::Generic10W: + return "Generic[10].W"; + case Attribute::Generic11X: + return "Generic[11].X"; + case Attribute::Generic11Y: + return "Generic[11].Y"; + case Attribute::Generic11Z: + return "Generic[11].Z"; + case Attribute::Generic11W: + return "Generic[11].W"; + case Attribute::Generic12X: + return "Generic[12].X"; + case Attribute::Generic12Y: + return "Generic[12].Y"; + case Attribute::Generic12Z: + return "Generic[12].Z"; + case Attribute::Generic12W: + return "Generic[12].W"; + case Attribute::Generic13X: + return "Generic[13].X"; + case Attribute::Generic13Y: + return "Generic[13].Y"; + case Attribute::Generic13Z: + return "Generic[13].Z"; + case Attribute::Generic13W: + return "Generic[13].W"; + case Attribute::Generic14X: + return "Generic[14].X"; + case Attribute::Generic14Y: + return "Generic[14].Y"; + case Attribute::Generic14Z: + return "Generic[14].Z"; + case Attribute::Generic14W: + return "Generic[14].W"; + case Attribute::Generic15X: + return "Generic[15].X"; + case Attribute::Generic15Y: + return "Generic[15].Y"; + case Attribute::Generic15Z: + return "Generic[15].Z"; + case Attribute::Generic15W: + return "Generic[15].W"; + case Attribute::Generic16X: + return "Generic[16].X"; + case Attribute::Generic16Y: + return "Generic[16].Y"; + case Attribute::Generic16Z: + return "Generic[16].Z"; + case Attribute::Generic16W: + return "Generic[16].W"; + case Attribute::Generic17X: + return "Generic[17].X"; + case Attribute::Generic17Y: + return "Generic[17].Y"; + case Attribute::Generic17Z: + return "Generic[17].Z"; + case Attribute::Generic17W: + return "Generic[17].W"; + case Attribute::Generic18X: + return "Generic[18].X"; + case Attribute::Generic18Y: + return "Generic[18].Y"; + case Attribute::Generic18Z: + return "Generic[18].Z"; + case Attribute::Generic18W: + return "Generic[18].W"; + case Attribute::Generic19X: + return "Generic[19].X"; + case Attribute::Generic19Y: + return "Generic[19].Y"; + case Attribute::Generic19Z: + return "Generic[19].Z"; + case Attribute::Generic19W: + return "Generic[19].W"; + case Attribute::Generic20X: + return "Generic[20].X"; + case Attribute::Generic20Y: + return "Generic[20].Y"; + case Attribute::Generic20Z: + return "Generic[20].Z"; + case Attribute::Generic20W: + return "Generic[20].W"; + case Attribute::Generic21X: + return "Generic[21].X"; + case Attribute::Generic21Y: + return "Generic[21].Y"; + case Attribute::Generic21Z: + return "Generic[21].Z"; + case Attribute::Generic21W: + return "Generic[21].W"; + case Attribute::Generic22X: + return "Generic[22].X"; + case Attribute::Generic22Y: + return "Generic[22].Y"; + case Attribute::Generic22Z: + return "Generic[22].Z"; + case Attribute::Generic22W: + return "Generic[22].W"; + case Attribute::Generic23X: + return "Generic[23].X"; + case Attribute::Generic23Y: + return "Generic[23].Y"; + case Attribute::Generic23Z: + return "Generic[23].Z"; + case Attribute::Generic23W: + return "Generic[23].W"; + case Attribute::Generic24X: + return "Generic[24].X"; + case Attribute::Generic24Y: + return "Generic[24].Y"; + case Attribute::Generic24Z: + return "Generic[24].Z"; + case Attribute::Generic24W: + return "Generic[24].W"; + case Attribute::Generic25X: + return "Generic[25].X"; + case Attribute::Generic25Y: + return "Generic[25].Y"; + case Attribute::Generic25Z: + return "Generic[25].Z"; + case Attribute::Generic25W: + return "Generic[25].W"; + case Attribute::Generic26X: + return "Generic[26].X"; + case Attribute::Generic26Y: + return "Generic[26].Y"; + case Attribute::Generic26Z: + return "Generic[26].Z"; + case Attribute::Generic26W: + return "Generic[26].W"; + case Attribute::Generic27X: + return "Generic[27].X"; + case Attribute::Generic27Y: + return "Generic[27].Y"; + case Attribute::Generic27Z: + return "Generic[27].Z"; + case Attribute::Generic27W: + return "Generic[27].W"; + case Attribute::Generic28X: + return "Generic[28].X"; + case Attribute::Generic28Y: + return "Generic[28].Y"; + case Attribute::Generic28Z: + return "Generic[28].Z"; + case Attribute::Generic28W: + return "Generic[28].W"; + case Attribute::Generic29X: + return "Generic[29].X"; + case Attribute::Generic29Y: + return "Generic[29].Y"; + case Attribute::Generic29Z: + return "Generic[29].Z"; + case Attribute::Generic29W: + return "Generic[29].W"; + case Attribute::Generic30X: + return "Generic[30].X"; + case Attribute::Generic30Y: + return "Generic[30].Y"; + case Attribute::Generic30Z: + return "Generic[30].Z"; + case Attribute::Generic30W: + return "Generic[30].W"; + case Attribute::Generic31X: + return "Generic[31].X"; + case Attribute::Generic31Y: + return "Generic[31].Y"; + case Attribute::Generic31Z: + return "Generic[31].Z"; + case Attribute::Generic31W: + return "Generic[31].W"; + case Attribute::ColorFrontDiffuseR: + return "ColorFrontDiffuse.R"; + case Attribute::ColorFrontDiffuseG: + return "ColorFrontDiffuse.G"; + case Attribute::ColorFrontDiffuseB: + return "ColorFrontDiffuse.B"; + case Attribute::ColorFrontDiffuseA: + return "ColorFrontDiffuse.A"; + case Attribute::ColorFrontSpecularR: + return "ColorFrontSpecular.R"; + case Attribute::ColorFrontSpecularG: + return "ColorFrontSpecular.G"; + case Attribute::ColorFrontSpecularB: + return "ColorFrontSpecular.B"; + case Attribute::ColorFrontSpecularA: + return "ColorFrontSpecular.A"; + case Attribute::ColorBackDiffuseR: + return "ColorBackDiffuse.R"; + case Attribute::ColorBackDiffuseG: + return "ColorBackDiffuse.G"; + case Attribute::ColorBackDiffuseB: + return "ColorBackDiffuse.B"; + case Attribute::ColorBackDiffuseA: + return "ColorBackDiffuse.A"; + case Attribute::ColorBackSpecularR: + return "ColorBackSpecular.R"; + case Attribute::ColorBackSpecularG: + return "ColorBackSpecular.G"; + case Attribute::ColorBackSpecularB: + return "ColorBackSpecular.B"; + case Attribute::ColorBackSpecularA: + return "ColorBackSpecular.A"; + case Attribute::ClipDistance0: + return "ClipDistance[0]"; + case Attribute::ClipDistance1: + return "ClipDistance[1]"; + case Attribute::ClipDistance2: + return "ClipDistance[2]"; + case Attribute::ClipDistance3: + return "ClipDistance[3]"; + case Attribute::ClipDistance4: + return "ClipDistance[4]"; + case Attribute::ClipDistance5: + return "ClipDistance[5]"; + case Attribute::ClipDistance6: + return "ClipDistance[6]"; + case Attribute::ClipDistance7: + return "ClipDistance[7]"; + case Attribute::PointSpriteS: + return "PointSprite.S"; + case Attribute::PointSpriteT: + return "PointSprite.T"; + case Attribute::FogCoordinate: + return "FogCoordinate"; + case Attribute::TessellationEvaluationPointU: + return "TessellationEvaluationPoint.U"; + case Attribute::TessellationEvaluationPointV: + return "TessellationEvaluationPoint.V"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::VertexId: + return "VertexId"; + case Attribute::FixedFncTexture0S: + return "FixedFncTexture[0].S"; + case Attribute::FixedFncTexture0T: + return "FixedFncTexture[0].T"; + case Attribute::FixedFncTexture0R: + return "FixedFncTexture[0].R"; + case Attribute::FixedFncTexture0Q: + return "FixedFncTexture[0].Q"; + case Attribute::FixedFncTexture1S: + return "FixedFncTexture[1].S"; + case Attribute::FixedFncTexture1T: + return "FixedFncTexture[1].T"; + case Attribute::FixedFncTexture1R: + return "FixedFncTexture[1].R"; + case Attribute::FixedFncTexture1Q: + return "FixedFncTexture[1].Q"; + case Attribute::FixedFncTexture2S: + return "FixedFncTexture[2].S"; + case Attribute::FixedFncTexture2T: + return "FixedFncTexture[2].T"; + case Attribute::FixedFncTexture2R: + return "FixedFncTexture[2].R"; + case Attribute::FixedFncTexture2Q: + return "FixedFncTexture[2].Q"; + case Attribute::FixedFncTexture3S: + return "FixedFncTexture[3].S"; + case Attribute::FixedFncTexture3T: + return "FixedFncTexture[3].T"; + case Attribute::FixedFncTexture3R: + return "FixedFncTexture[3].R"; + case Attribute::FixedFncTexture3Q: + return "FixedFncTexture[3].Q"; + case Attribute::FixedFncTexture4S: + return "FixedFncTexture[4].S"; + case Attribute::FixedFncTexture4T: + return "FixedFncTexture[4].T"; + case Attribute::FixedFncTexture4R: + return "FixedFncTexture[4].R"; + case Attribute::FixedFncTexture4Q: + return "FixedFncTexture[4].Q"; + case Attribute::FixedFncTexture5S: + return "FixedFncTexture[5].S"; + case Attribute::FixedFncTexture5T: + return "FixedFncTexture[5].T"; + case Attribute::FixedFncTexture5R: + return "FixedFncTexture[5].R"; + case Attribute::FixedFncTexture5Q: + return "FixedFncTexture[5].Q"; + case Attribute::FixedFncTexture6S: + return "FixedFncTexture[6].S"; + case Attribute::FixedFncTexture6T: + return "FixedFncTexture[6].T"; + case Attribute::FixedFncTexture6R: + return "FixedFncTexture[6].R"; + case Attribute::FixedFncTexture6Q: + return "FixedFncTexture[6].Q"; + case Attribute::FixedFncTexture7S: + return "FixedFncTexture[7].S"; + case Attribute::FixedFncTexture7T: + return "FixedFncTexture[7].T"; + case Attribute::FixedFncTexture7R: + return "FixedFncTexture[7].R"; + case Attribute::FixedFncTexture7Q: + return "FixedFncTexture[7].Q"; + case Attribute::FixedFncTexture8S: + return "FixedFncTexture[8].S"; + case Attribute::FixedFncTexture8T: + return "FixedFncTexture[8].T"; + case Attribute::FixedFncTexture8R: + return "FixedFncTexture[8].R"; + case Attribute::FixedFncTexture8Q: + return "FixedFncTexture[8].Q"; + case Attribute::FixedFncTexture9S: + return "FixedFncTexture[9].S"; + case Attribute::FixedFncTexture9T: + return "FixedFncTexture[9].T"; + case Attribute::FixedFncTexture9R: + return "FixedFncTexture[9].R"; + case Attribute::FixedFncTexture9Q: + return "FixedFncTexture[9].Q"; + case Attribute::ViewportMask: + return "ViewportMask"; + case Attribute::FrontFace: + return "FrontFace"; + } + return fmt::format("", static_cast(attribute)); +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h new file mode 100644 index 000000000..bb2cad6af --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -0,0 +1,242 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Attribute : u64 { + PrimitiveId = 24, + Layer = 25, + ViewportIndex = 26, + PointSize = 27, + PositionX = 28, + PositionY = 29, + PositionZ = 30, + PositionW = 31, + Generic0X = 32, + Generic0Y = 33, + Generic0Z = 34, + Generic0W = 35, + Generic1X = 36, + Generic1Y = 37, + Generic1Z = 38, + Generic1W = 39, + Generic2X = 40, + Generic2Y = 41, + Generic2Z = 42, + Generic2W = 43, + Generic3X = 44, + Generic3Y = 45, + Generic3Z = 46, + Generic3W = 47, + Generic4X = 48, + Generic4Y = 49, + Generic4Z = 50, + Generic4W = 51, + Generic5X = 52, + Generic5Y = 53, + Generic5Z = 54, + Generic5W = 55, + Generic6X = 56, + Generic6Y = 57, + Generic6Z = 58, + Generic6W = 59, + Generic7X = 60, + Generic7Y = 61, + Generic7Z = 62, + Generic7W = 63, + Generic8X = 64, + Generic8Y = 65, + Generic8Z = 66, + Generic8W = 67, + Generic9X = 68, + Generic9Y = 69, + Generic9Z = 70, + Generic9W = 71, + Generic10X = 72, + Generic10Y = 73, + Generic10Z = 74, + Generic10W = 75, + Generic11X = 76, + Generic11Y = 77, + Generic11Z = 78, + Generic11W = 79, + Generic12X = 80, + Generic12Y = 81, + Generic12Z = 82, + Generic12W = 83, + Generic13X = 84, + Generic13Y = 85, + Generic13Z = 86, + Generic13W = 87, + Generic14X = 88, + Generic14Y = 89, + Generic14Z = 90, + Generic14W = 91, + Generic15X = 92, + Generic15Y = 93, + Generic15Z = 94, + Generic15W = 95, + Generic16X = 96, + Generic16Y = 97, + Generic16Z = 98, + Generic16W = 99, + Generic17X = 100, + Generic17Y = 101, + Generic17Z = 102, + Generic17W = 103, + Generic18X = 104, + Generic18Y = 105, + Generic18Z = 106, + Generic18W = 107, + Generic19X = 108, + Generic19Y = 109, + Generic19Z = 110, + Generic19W = 111, + Generic20X = 112, + Generic20Y = 113, + Generic20Z = 114, + Generic20W = 115, + Generic21X = 116, + Generic21Y = 117, + Generic21Z = 118, + Generic21W = 119, + Generic22X = 120, + Generic22Y = 121, + Generic22Z = 122, + Generic22W = 123, + Generic23X = 124, + Generic23Y = 125, + Generic23Z = 126, + Generic23W = 127, + Generic24X = 128, + Generic24Y = 129, + Generic24Z = 130, + Generic24W = 131, + Generic25X = 132, + Generic25Y = 133, + Generic25Z = 134, + Generic25W = 135, + Generic26X = 136, + Generic26Y = 137, + Generic26Z = 138, + Generic26W = 139, + Generic27X = 140, + Generic27Y = 141, + Generic27Z = 142, + Generic27W = 143, + Generic28X = 144, + Generic28Y = 145, + Generic28Z = 146, + Generic28W = 147, + Generic29X = 148, + Generic29Y = 149, + Generic29Z = 150, + Generic29W = 151, + Generic30X = 152, + Generic30Y = 153, + Generic30Z = 154, + Generic30W = 155, + Generic31X = 156, + Generic31Y = 157, + Generic31Z = 158, + Generic31W = 159, + ColorFrontDiffuseR = 160, + ColorFrontDiffuseG = 161, + ColorFrontDiffuseB = 162, + ColorFrontDiffuseA = 163, + ColorFrontSpecularR = 164, + ColorFrontSpecularG = 165, + ColorFrontSpecularB = 166, + ColorFrontSpecularA = 167, + ColorBackDiffuseR = 168, + ColorBackDiffuseG = 169, + ColorBackDiffuseB = 170, + ColorBackDiffuseA = 171, + ColorBackSpecularR = 172, + ColorBackSpecularG = 173, + ColorBackSpecularB = 174, + ColorBackSpecularA = 175, + ClipDistance0 = 176, + ClipDistance1 = 177, + ClipDistance2 = 178, + ClipDistance3 = 179, + ClipDistance4 = 180, + ClipDistance5 = 181, + ClipDistance6 = 182, + ClipDistance7 = 183, + PointSpriteS = 184, + PointSpriteT = 185, + FogCoordinate = 186, + TessellationEvaluationPointU = 188, + TessellationEvaluationPointV = 189, + InstanceId = 190, + VertexId = 191, + FixedFncTexture0S = 192, + FixedFncTexture0T = 193, + FixedFncTexture0R = 194, + FixedFncTexture0Q = 195, + FixedFncTexture1S = 196, + FixedFncTexture1T = 197, + FixedFncTexture1R = 198, + FixedFncTexture1Q = 199, + FixedFncTexture2S = 200, + FixedFncTexture2T = 201, + FixedFncTexture2R = 202, + FixedFncTexture2Q = 203, + FixedFncTexture3S = 204, + FixedFncTexture3T = 205, + FixedFncTexture3R = 206, + FixedFncTexture3Q = 207, + FixedFncTexture4S = 208, + FixedFncTexture4T = 209, + FixedFncTexture4R = 210, + FixedFncTexture4Q = 211, + FixedFncTexture5S = 212, + FixedFncTexture5T = 213, + FixedFncTexture5R = 214, + FixedFncTexture5Q = 215, + FixedFncTexture6S = 216, + FixedFncTexture6T = 217, + FixedFncTexture6R = 218, + FixedFncTexture6Q = 219, + FixedFncTexture7S = 220, + FixedFncTexture7T = 221, + FixedFncTexture7R = 222, + FixedFncTexture7Q = 223, + FixedFncTexture8S = 224, + FixedFncTexture8T = 225, + FixedFncTexture8R = 226, + FixedFncTexture8Q = 227, + FixedFncTexture9S = 228, + FixedFncTexture9T = 229, + FixedFncTexture9R = 230, + FixedFncTexture9Q = 231, + ViewportMask = 232, + FrontFace = 255, +}; + +[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; + +[[nodiscard]] int GenericAttributeIndex(Attribute attribute); + +[[nodiscard]] std::string NameOf(Attribute attribute); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp new file mode 100644 index 000000000..0406726ad --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -0,0 +1,142 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/bit_cast.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Block::Block(u32 begin, u32 end) : location_begin{begin}, location_end{end} {} + +Block::~Block() = default; + +void Block::AppendNewInst(Opcode op, std::initializer_list args) { + PrependNewInst(end(), op, args); +} + +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list args) { + Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; + const auto result_it{instructions.insert(insertion_point, *inst)}; + + if (inst->NumArgs() != args.size()) { + throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); + } + std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { + inst->SetArg(index, arg); + ++index; + }); + return result_it; +} + +u32 Block::LocationBegin() const noexcept { + return location_begin; +} + +u32 Block::LocationEnd() const noexcept { + return location_end; +} + +Block::InstructionList& Block::Instructions() noexcept { + return instructions; +} + +const Block::InstructionList& Block::Instructions() const noexcept { + return instructions; +} + +static std::string ArgToIndex(const std::map& block_to_index, + const std::map& inst_to_index, + const Value& arg) { + if (arg.IsEmpty()) { + return ""; + } + if (arg.IsLabel()) { + if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$", reinterpret_cast(arg.Label())); + } + if (!arg.IsImmediate()) { + if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { + return fmt::format("%{}", it->second); + } + return fmt::format("%", reinterpret_cast(arg.Inst())); + } + switch (arg.Type()) { + case Type::U1: + return fmt::format("#{}", arg.U1() ? '1' : '0'); + case Type::U8: + return fmt::format("#{}", arg.U8()); + case Type::U16: + return fmt::format("#{}", arg.U16()); + case Type::U32: + return fmt::format("#{}", arg.U32()); + case Type::U64: + return fmt::format("#{}", arg.U64()); + case Type::Reg: + return fmt::format("{}", arg.Reg()); + case Type::Pred: + return fmt::format("{}", arg.Pred()); + case Type::Attribute: + return fmt::format("{}", arg.Attribute()); + default: + return ""; + } +} + +std::string DumpBlock(const Block& block) { + size_t inst_index{0}; + std::map inst_to_index; + return DumpBlock(block, {}, inst_to_index, inst_index); +} + +std::string DumpBlock(const Block& block, const std::map& block_to_index, + std::map& inst_to_index, size_t& inst_index) { + std::string ret{"Block"}; + if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { + ret += fmt::format(" ${}", it->second); + } + ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); + + for (const Inst& inst : block) { + const Opcode op{inst.Opcode()}; + ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); + if (TypeOf(op) != Type::Void) { + ret += fmt::format("%{:<5} = {}", inst_index, op); + } else { + ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces + } + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); + + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } + } + if (TypeOf(op) != Type::Void) { + ret += fmt::format(" (uses: {})\n", inst.UseCount()); + } else { + ret += '\n'; + } + + inst_to_index.emplace(&inst, inst_index); + ++inst_index; + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h new file mode 100644 index 000000000..3ed2eb957 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -0,0 +1,134 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include +#include + +#include "shader_recompiler/frontend/ir/microinstruction.h" + +namespace Shader::IR { + +class Block { +public: + using InstructionList = boost::intrusive::list; + using size_type = InstructionList::size_type; + using iterator = InstructionList::iterator; + using const_iterator = InstructionList::const_iterator; + using reverse_iterator = InstructionList::reverse_iterator; + using const_reverse_iterator = InstructionList::const_reverse_iterator; + + explicit Block(u32 begin, u32 end); + ~Block(); + + Block(const Block&) = delete; + Block& operator=(const Block&) = delete; + + Block(Block&&) = default; + Block& operator=(Block&&) = default; + + /// Appends a new instruction to the end of this basic block. + void AppendNewInst(Opcode op, std::initializer_list args); + + /// Prepends a new instruction to this basic block before the insertion point. + iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args); + + /// Gets the starting location of this basic block. + [[nodiscard]] u32 LocationBegin() const noexcept; + /// Gets the end location for this basic block. + [[nodiscard]] u32 LocationEnd() const noexcept; + + /// Gets a mutable reference to the instruction list for this basic block. + InstructionList& Instructions() noexcept; + /// Gets an immutable reference to the instruction list for this basic block. + const InstructionList& Instructions() const noexcept; + + [[nodiscard]] bool empty() const { + return instructions.empty(); + } + [[nodiscard]] size_type size() const { + return instructions.size(); + } + + [[nodiscard]] Inst& front() { + return instructions.front(); + } + [[nodiscard]] const Inst& front() const { + return instructions.front(); + } + + [[nodiscard]] Inst& back() { + return instructions.back(); + } + [[nodiscard]] const Inst& back() const { + return instructions.back(); + } + + [[nodiscard]] iterator begin() { + return instructions.begin(); + } + [[nodiscard]] const_iterator begin() const { + return instructions.begin(); + } + [[nodiscard]] iterator end() { + return instructions.end(); + } + [[nodiscard]] const_iterator end() const { + return instructions.end(); + } + + [[nodiscard]] reverse_iterator rbegin() { + return instructions.rbegin(); + } + [[nodiscard]] const_reverse_iterator rbegin() const { + return instructions.rbegin(); + } + [[nodiscard]] reverse_iterator rend() { + return instructions.rend(); + } + [[nodiscard]] const_reverse_iterator rend() const { + return instructions.rend(); + } + + [[nodiscard]] const_iterator cbegin() const { + return instructions.cbegin(); + } + [[nodiscard]] const_iterator cend() const { + return instructions.cend(); + } + + [[nodiscard]] const_reverse_iterator crbegin() const { + return instructions.crbegin(); + } + [[nodiscard]] const_reverse_iterator crend() const { + return instructions.crend(); + } + +private: + /// Starting location of this block + u32 location_begin; + /// End location of this block + u32 location_end; + + /// List of instructions in this block. + InstructionList instructions; + + /// Memory pool for instruction list + boost::fast_pool_allocator + instruction_alloc_pool; +}; + +[[nodiscard]] std::string DumpBlock(const Block& block); + +[[nodiscard]] std::string DumpBlock(const Block& block, + const std::map& block_to_index, + std::map& inst_to_index, + size_t& inst_index); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp new file mode 100644 index 000000000..edff35dc7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -0,0 +1,31 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include + +#include "shader_recompiler/frontend/ir/condition.h" + +namespace Shader::IR { + +std::string NameOf(Condition condition) { + std::string ret; + if (condition.FlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.FlowTest()); + } + const auto [pred, negated]{condition.Pred()}; + if (pred != Pred::PT || negated) { + if (!ret.empty()) { + ret += '&'; + } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); + } + return ret; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h new file mode 100644 index 000000000..52737025c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/pred.h" + +namespace Shader::IR { + +class Condition { +public: + Condition() noexcept = default; + + explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept + : flow_test{static_cast(flow_test_)}, pred{static_cast(pred_)}, + pred_negated{pred_negated_ ? u8{1} : u8{0}} {} + + explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept + : Condition(FlowTest::T, pred_, pred_negated_) {} + + Condition(bool value) : Condition(Pred::PT, !value) {} + + auto operator<=>(const Condition&) const noexcept = default; + + [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + return static_cast(flow_test); + } + + [[nodiscard]] std::pair Pred() const noexcept { + return {static_cast(pred), pred_negated != 0}; + } + +private: + u16 flow_test; + u8 pred; + u8 pred_negated; +}; + +std::string NameOf(Condition condition); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Condition& cond, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp new file mode 100644 index 000000000..6ebb4ad89 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.cpp @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include + +#include "shader_recompiler/frontend/ir/flow_test.h" + +namespace Shader::IR { + +std::string NameOf(FlowTest flow_test) { + switch (flow_test) { + case FlowTest::F: + return "F"; + case FlowTest::LT: + return "LT"; + case FlowTest::EQ: + return "EQ"; + case FlowTest::LE: + return "LE"; + case FlowTest::GT: + return "GT"; + case FlowTest::NE: + return "NE"; + case FlowTest::GE: + return "GE"; + case FlowTest::NUM: + return "NUM"; + case FlowTest::NaN: + return "NAN"; + case FlowTest::LTU: + return "LTU"; + case FlowTest::EQU: + return "EQU"; + case FlowTest::LEU: + return "LEU"; + case FlowTest::GTU: + return "GTU"; + case FlowTest::NEU: + return "NEU"; + case FlowTest::GEU: + return "GEU"; + case FlowTest::T: + return "T"; + case FlowTest::OFF: + return "OFF"; + case FlowTest::LO: + return "LO"; + case FlowTest::SFF: + return "SFF"; + case FlowTest::LS: + return "LS"; + case FlowTest::HI: + return "HI"; + case FlowTest::SFT: + return "SFT"; + case FlowTest::HS: + return "HS"; + case FlowTest::OFT: + return "OFT"; + case FlowTest::CSM_TA: + return "CSM_TA"; + case FlowTest::CSM_TR: + return "CSM_TR"; + case FlowTest::CSM_MX: + return "CSM_MX"; + case FlowTest::FCSM_TA: + return "FCSM_TA"; + case FlowTest::FCSM_TR: + return "FCSM_TR"; + case FlowTest::FCSM_MX: + return "FCSM_MX"; + case FlowTest::RLE: + return "RLE"; + case FlowTest::RGT: + return "RGT"; + } + return fmt::format("", static_cast(flow_test)); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h new file mode 100644 index 000000000..ac883da13 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.h @@ -0,0 +1,61 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +namespace Shader::IR { + +enum class FlowTest { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + NaN, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, + OFF, + LO, + SFF, + LS, + HI, + SFT, + HS, + OFT, + CSM_TA, + CSM_TR, + CSM_MX, + FCSM_TA, + FCSM_TR, + FCSM_MX, + RLE, + RGT, +}; + +[[nodiscard]] std::string NameOf(FlowTest flow_test); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp new file mode 100644 index 000000000..6450e4b2c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -0,0 +1,533 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_cast.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +[[noreturn]] static void ThrowInvalidType(Type type) { + throw InvalidArgument("Invalid type {}", type); +} + +U1 IREmitter::Imm1(bool value) const { + return U1{Value{value}}; +} + +U8 IREmitter::Imm8(u8 value) const { + return U8{Value{value}}; +} + +U16 IREmitter::Imm16(u16 value) const { + return U16{Value{value}}; +} + +U32 IREmitter::Imm32(u32 value) const { + return U32{Value{value}}; +} + +U32 IREmitter::Imm32(s32 value) const { + return U32{Value{static_cast(value)}}; +} + +U32 IREmitter::Imm32(f32 value) const { + return U32{Value{Common::BitCast(value)}}; +} + +U64 IREmitter::Imm64(u64 value) const { + return U64{Value{value}}; +} + +U64 IREmitter::Imm64(f64 value) const { + return U64{Value{Common::BitCast(value)}}; +} + +void IREmitter::Branch(IR::Block* label) { + Inst(Opcode::Branch, label); +} + +void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) { + Inst(Opcode::BranchConditional, cond, true_label, false_label); +} + +void IREmitter::Exit() { + Inst(Opcode::Exit); +} + +void IREmitter::Return() { + Inst(Opcode::Return); +} + +void IREmitter::Unreachable() { + Inst(Opcode::Unreachable); +} + +U32 IREmitter::GetReg(IR::Reg reg) { + return Inst(Opcode::GetRegister, reg); +} + +void IREmitter::SetReg(IR::Reg reg, const U32& value) { + Inst(Opcode::SetRegister, reg, value); +} + +U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { + const U1 value{Inst(Opcode::GetPred, pred)}; + if (is_negated) { + return Inst(Opcode::LogicalNot, value); + } else { + return value; + } +} + +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + Inst(Opcode::SetPred, pred, value); +} + +U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { + return Inst(Opcode::GetCbuf, binding, byte_offset); +} + +U1 IREmitter::GetZFlag() { + return Inst(Opcode::GetZFlag); +} + +U1 IREmitter::GetSFlag() { + return Inst(Opcode::GetSFlag); +} + +U1 IREmitter::GetCFlag() { + return Inst(Opcode::GetCFlag); +} + +U1 IREmitter::GetOFlag() { + return Inst(Opcode::GetOFlag); +} + +void IREmitter::SetZFlag(const U1& value) { + Inst(Opcode::SetZFlag, value); +} + +void IREmitter::SetSFlag(const U1& value) { + Inst(Opcode::SetSFlag, value); +} + +void IREmitter::SetCFlag(const U1& value) { + Inst(Opcode::SetCFlag, value); +} + +void IREmitter::SetOFlag(const U1& value) { + Inst(Opcode::SetOFlag, value); +} + +U32 IREmitter::GetAttribute(IR::Attribute attribute) { + return Inst(Opcode::GetAttribute, attribute); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { + Inst(Opcode::SetAttribute, attribute, value); +} + +void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalU8, address, value); +} + +void IREmitter::WriteGlobalS8(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalS8, address, value); +} + +void IREmitter::WriteGlobalU16(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalU16, address, value); +} + +void IREmitter::WriteGlobalS16(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobalS16, address, value); +} + +void IREmitter::WriteGlobal32(const U64& address, const U32& value) { + Inst(Opcode::WriteGlobal32, address, value); +} + +void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) { + Inst(Opcode::WriteGlobal64, address, vector); +} + +void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { + Inst(Opcode::WriteGlobal128, address, vector); +} + +U1 IREmitter::GetZeroFromOp(const Value& op) { + return Inst(Opcode::GetZeroFromOp, op); +} + +U1 IREmitter::GetSignFromOp(const Value& op) { + return Inst(Opcode::GetSignFromOp, op); +} + +U1 IREmitter::GetCarryFromOp(const Value& op) { + return Inst(Opcode::GetCarryFromOp, op); +} + +U1 IREmitter::GetOverflowFromOp(const Value& op) { + return Inst(Opcode::GetOverflowFromOp, op); +} + +U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { + if (a.Type() != a.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst(Opcode::FPAdd16, a, b); + case Type::U32: + return Inst(Opcode::FPAdd32, a, b); + case Type::U64: + return Inst(Opcode::FPAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { + if (e1.Type() != e2.Type()) { + throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); + } + return Inst(Opcode::CompositeConstruct2, e1, e2); +} + +Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { + throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); + } + return Inst(Opcode::CompositeConstruct3, e1, e2, e3); +} + +Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, + const UAny& e4) { + if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { + throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), + e4.Type()); + } + return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); +} + +UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { + if (element >= 4) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(Opcode::CompositeExtract, vector, Imm32(static_cast(element))); +} + +U64 IREmitter::PackUint2x32(const Value& vector) { + return Inst(Opcode::PackUint2x32, vector); +} + +Value IREmitter::UnpackUint2x32(const U64& value) { + return Inst(Opcode::UnpackUint2x32, value); +} + +U32 IREmitter::PackFloat2x16(const Value& vector) { + return Inst(Opcode::PackFloat2x16, vector); +} + +Value IREmitter::UnpackFloat2x16(const U32& value) { + return Inst(Opcode::UnpackFloat2x16, value); +} + +U64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst(Opcode::PackDouble2x32, vector); +} + +Value IREmitter::UnpackDouble2x32(const U64& value) { + return Inst(Opcode::UnpackDouble2x32, value); +} + +U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst(Opcode::FPMul16, a, b); + case Type::U32: + return Inst(Opcode::FPMul32, a, b); + case Type::U64: + return Inst(Opcode::FPMul64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U16U32U64 IREmitter::FPAbs(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPAbs16, value); + case Type::U32: + return Inst(Opcode::FPAbs32, value); + case Type::U64: + return Inst(Opcode::FPAbs64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPNeg(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPNeg16, value); + case Type::U32: + return Inst(Opcode::FPNeg32, value); + case Type::U64: + return Inst(Opcode::FPNeg64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { + U16U32U64 result{value}; + if (abs) { + result = FPAbs(value); + } + if (neg) { + result = FPNeg(value); + } + return result; +} + +U32 IREmitter::FPCosNotReduced(const U32& value) { + return Inst(Opcode::FPCosNotReduced, value); +} + +U32 IREmitter::FPExp2NotReduced(const U32& value) { + return Inst(Opcode::FPExp2NotReduced, value); +} + +U32 IREmitter::FPLog2(const U32& value) { + return Inst(Opcode::FPLog2, value); +} + +U32U64 IREmitter::FPRecip(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::FPRecip32, value); + case Type::U64: + return Inst(Opcode::FPRecip64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::FPRecipSqrt(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::FPRecipSqrt32, value); + case Type::U64: + return Inst(Opcode::FPRecipSqrt64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32 IREmitter::FPSinNotReduced(const U32& value) { + return Inst(Opcode::FPSinNotReduced, value); +} + +U32 IREmitter::FPSqrt(const U32& value) { + return Inst(Opcode::FPSqrt, value); +} + +U16U32U64 IREmitter::FPSaturate(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPSaturate16, value); + case Type::U32: + return Inst(Opcode::FPSaturate32, value); + case Type::U64: + return Inst(Opcode::FPSaturate64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPRoundEven(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPRoundEven16, value); + case Type::U32: + return Inst(Opcode::FPRoundEven32, value); + case Type::U64: + return Inst(Opcode::FPRoundEven64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPFloor(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPFloor16, value); + case Type::U32: + return Inst(Opcode::FPFloor32, value); + case Type::U64: + return Inst(Opcode::FPFloor64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPCeil(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPCeil16, value); + case Type::U32: + return Inst(Opcode::FPCeil32, value); + case Type::U64: + return Inst(Opcode::FPCeil64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::FPTrunc16, value); + case Type::U32: + return Inst(Opcode::FPTrunc32, value); + case Type::U64: + return Inst(Opcode::FPTrunc64, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U1 IREmitter::LogicalOr(const U1& a, const U1& b) { + return Inst(Opcode::LogicalOr, a, b); +} + +U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { + return Inst(Opcode::LogicalAnd, a, b); +} + +U1 IREmitter::LogicalNot(const U1& value) { + return Inst(Opcode::LogicalNot, value); +} + +U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertS16F16, value); + case Type::U32: + return Inst(Opcode::ConvertS16F32, value); + case Type::U64: + return Inst(Opcode::ConvertS16F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertS32F16, value); + case Type::U32: + return Inst(Opcode::ConvertS32F32, value); + case Type::U64: + return Inst(Opcode::ConvertS32F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertS64F16, value); + case Type::U32: + return Inst(Opcode::ConvertS64F32, value); + case Type::U64: + return Inst(Opcode::ConvertS64F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU16F16, value); + case Type::U32: + return Inst(Opcode::ConvertU16F32, value); + case Type::U64: + return Inst(Opcode::ConvertU16F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU32F16, value); + case Type::U32: + return Inst(Opcode::ConvertU32F32, value); + case Type::U64: + return Inst(Opcode::ConvertU32F64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U16: + return Inst(Opcode::ConvertU64F16, value); + case Type::U32: + return Inst(Opcode::ConvertU64F32, value); + case Type::U64: + return Inst(Opcode::ConvertU64F64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value) { + if (is_signed) { + return ConvertFToS(bitsize, value); + } else { + return ConvertFToU(bitsize, value); + } +} + +U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 32: + switch (value.Type()) { + case Type::U32: + // Nothing to do + return value; + case Type::U64: + return Inst(Opcode::ConvertU32U64, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::U32: + // Nothing to do + return value; + case Type::U64: + return Inst(Opcode::ConvertU64U32, value); + default: + break; + } + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h new file mode 100644 index 000000000..1af79f41c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -0,0 +1,123 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class IREmitter { +public: + explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + + Block& block; + + [[nodiscard]] U1 Imm1(bool value) const; + [[nodiscard]] U8 Imm8(u8 value) const; + [[nodiscard]] U16 Imm16(u16 value) const; + [[nodiscard]] U32 Imm32(u32 value) const; + [[nodiscard]] U32 Imm32(s32 value) const; + [[nodiscard]] U32 Imm32(f32 value) const; + [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(f64 value) const; + + void Branch(IR::Block* label); + void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); + void Exit(); + void Return(); + void Unreachable(); + + [[nodiscard]] U32 GetReg(IR::Reg reg); + void SetReg(IR::Reg reg, const U32& value); + + [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); + void SetPred(IR::Pred pred, const U1& value); + + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); + + [[nodiscard]] U1 GetZFlag(); + [[nodiscard]] U1 GetSFlag(); + [[nodiscard]] U1 GetCFlag(); + [[nodiscard]] U1 GetOFlag(); + + void SetZFlag(const U1& value); + void SetSFlag(const U1& value); + void SetCFlag(const U1& value); + void SetOFlag(const U1& value); + + [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); + void SetAttribute(IR::Attribute attribute, const U32& value); + + void WriteGlobalU8(const U64& address, const U32& value); + void WriteGlobalS8(const U64& address, const U32& value); + void WriteGlobalU16(const U64& address, const U32& value); + void WriteGlobalS16(const U64& address, const U32& value); + void WriteGlobal32(const U64& address, const U32& value); + void WriteGlobal64(const U64& address, const IR::Value& vector); + void WriteGlobal128(const U64& address, const IR::Value& vector); + + [[nodiscard]] U1 GetZeroFromOp(const Value& op); + [[nodiscard]] U1 GetSignFromOp(const Value& op); + [[nodiscard]] U1 GetCarryFromOp(const Value& op); + [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + + [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2); + [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3); + [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, + const UAny& e4); + [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + + [[nodiscard]] U64 PackUint2x32(const Value& vector); + [[nodiscard]] Value UnpackUint2x32(const U64& value); + + [[nodiscard]] U32 PackFloat2x16(const Value& vector); + [[nodiscard]] Value UnpackFloat2x16(const U32& value); + + [[nodiscard]] U64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const U64& value); + + [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); + [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); + + [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPAbsNeg(const U16U32U64& value, bool abs, bool neg); + + [[nodiscard]] U32 FPCosNotReduced(const U32& value); + [[nodiscard]] U32 FPExp2NotReduced(const U32& value); + [[nodiscard]] U32 FPLog2(const U32& value); + [[nodiscard]] U32U64 FPRecip(const U32U64& value); + [[nodiscard]] U32U64 FPRecipSqrt(const U32U64& value); + [[nodiscard]] U32 FPSinNotReduced(const U32& value); + [[nodiscard]] U32 FPSqrt(const U32& value); + [[nodiscard]] U16U32U64 FPSaturate(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPRoundEven(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPFloor(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); + [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalNot(const U1& value); + + [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); + [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); + [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); + + [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value); + +private: + IR::Block::iterator insertion_point; + + template + T Inst(Opcode op, Args... args) { + auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; + return T{Value{&*it}}; + } +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp new file mode 100644 index 000000000..553fec3b7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -0,0 +1,189 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { + if (inst && inst->Opcode() != opcode) { + throw LogicError("Invalid pseudo-instruction"); + } +} + +static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { + if (dest_inst) { + throw LogicError("Only one of each type of pseudo-op allowed"); + } + dest_inst = pseudo_inst; +} + +static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { + if (inst->Opcode() != expected_opcode) { + throw LogicError("Undoing use of invalid pseudo-op"); + } + inst = nullptr; +} + +bool Inst::MayHaveSideEffects() const noexcept { + switch (op) { + case Opcode::SetAttribute: + case Opcode::SetAttributeIndexed: + case Opcode::WriteGlobalU8: + case Opcode::WriteGlobalS8: + case Opcode::WriteGlobalU16: + case Opcode::WriteGlobalS16: + case Opcode::WriteGlobal32: + case Opcode::WriteGlobal64: + case Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + +bool Inst::IsPseudoInstruction() const noexcept { + switch (op) { + case Opcode::GetZeroFromOp: + case Opcode::GetSignFromOp: + case Opcode::GetCarryFromOp: + case Opcode::GetOverflowFromOp: + case Opcode::GetZSCOFromOp: + return true; + default: + return false; + } +} + +bool Inst::HasAssociatedPseudoOperation() const noexcept { + return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst; +} + +Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { + // This is faster than doing a search through the block. + switch (opcode) { + case Opcode::GetZeroFromOp: + CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); + return zero_inst; + case Opcode::GetSignFromOp: + CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); + return sign_inst; + case Opcode::GetCarryFromOp: + CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); + return carry_inst; + case Opcode::GetOverflowFromOp: + CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); + return overflow_inst; + case Opcode::GetZSCOFromOp: + CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp); + return zsco_inst; + default: + throw InvalidArgument("{} is not a pseudo-instruction", opcode); + } +} + +size_t Inst::NumArgs() const { + return NumArgsOf(op); +} + +IR::Type Inst::Type() const { + return TypeOf(op); +} + +Value Inst::Arg(size_t index) const { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + return args[index]; +} + +void Inst::SetArg(size_t index, Value value) { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + if (!args[index].IsImmediate()) { + UndoUse(args[index]); + } + if (!value.IsImmediate()) { + Use(value); + } + args[index] = value; +} + +void Inst::Invalidate() { + ClearArgs(); + op = Opcode::Void; +} + +void Inst::ClearArgs() { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + value = {}; + } +} + +void Inst::ReplaceUsesWith(Value replacement) { + Invalidate(); + + op = Opcode::Identity; + + if (!replacement.IsImmediate()) { + Use(replacement); + } + args[0] = replacement; +} + +void Inst::Use(const Value& value) { + ++value.Inst()->use_count; + + switch (op) { + case Opcode::GetZeroFromOp: + SetPseudoInstruction(value.Inst()->zero_inst, this); + break; + case Opcode::GetSignFromOp: + SetPseudoInstruction(value.Inst()->sign_inst, this); + break; + case Opcode::GetCarryFromOp: + SetPseudoInstruction(value.Inst()->carry_inst, this); + break; + case Opcode::GetOverflowFromOp: + SetPseudoInstruction(value.Inst()->overflow_inst, this); + break; + case Opcode::GetZSCOFromOp: + SetPseudoInstruction(value.Inst()->zsco_inst, this); + break; + default: + break; + } +} + +void Inst::UndoUse(const Value& value) { + --value.Inst()->use_count; + + switch (op) { + case Opcode::GetZeroFromOp: + RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp); + break; + case Opcode::GetSignFromOp: + RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp); + break; + case Opcode::GetCarryFromOp: + RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp); + break; + case Opcode::GetOverflowFromOp: + RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); + break; + case Opcode::GetZSCOFromOp: + RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp); + break; + default: + break; + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h new file mode 100644 index 000000000..43460b950 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -0,0 +1,82 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +constexpr size_t MAX_ARG_COUNT = 4; + +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(Opcode op_) noexcept : op(op_) {} + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode Opcode() const noexcept { + return op; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines whether or not this instruction is a pseudo-instruction. + /// Pseudo-instructions depend on their parent instructions for their semantics. + [[nodiscard]] bool IsPseudoInstruction() const noexcept; + + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; + /// Gets a pseudo-operation associated with this instruction + [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const; + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const; + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + +private: + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + std::array args{}; + Inst* zero_inst{}; + Inst* sign_inst{}; + Inst* carry_inst{}; + Inst* overflow_inst{}; + Inst* zsco_inst{}; + u64 flags{}; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.cpp b/src/shader_recompiler/frontend/ir/opcode.cpp new file mode 100644 index 000000000..65d074029 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcode.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/opcode.h" + +namespace Shader::IR { +namespace { +struct OpcodeMeta { + std::string_view name; + Type type; + std::array arg_types; +}; + +using enum Type; + +constexpr std::array META_TABLE{ +#define OPCODE(name_token, type_token, ...) \ + OpcodeMeta{ \ + .name{#name_token}, \ + .type{type_token}, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcode.inc" +#undef OPCODE +}; + +void ValidateOpcode(Opcode op) { + const size_t raw{static_cast(op)}; + if (raw >= META_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", raw); + } +} +} // Anonymous namespace + +Type TypeOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast(op)].type; +} + +size_t NumArgsOf(Opcode op) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; + return static_cast(distance); +} + +Type ArgTypeOf(Opcode op, size_t arg_index) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { + throw InvalidArgument("Out of bounds argument"); + } + return arg_types[arg_index]; +} + +std::string_view NameOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast(op)].name; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.h b/src/shader_recompiler/frontend/ir/opcode.h new file mode 100644 index 000000000..1f4440379 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcode.h @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +enum class Opcode { +#define OPCODE(name, ...) name, +#include "opcode.inc" +#undef OPCODE +}; + +/// Get return type of an opcode +[[nodiscard]] Type TypeOf(Opcode op); + +/// Get the number of arguments an opcode accepts +[[nodiscard]] size_t NumArgsOf(Opcode op); + +/// Get the required type of an argument of an opcode +[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); + +/// Get the name of an opcode +[[nodiscard]] std::string_view NameOf(Opcode op); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { + return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc new file mode 100644 index 000000000..371064bf3 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -0,0 +1,142 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Void, Void, ) +OPCODE(Identity, Opaque, Opaque, ) + +// Control flow +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(Exit, Void, ) +OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) + +// Context getters/setters +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(GetZSCORaw, U32, ) +OPCODE(SetZSCORaw, Void, U32, ) +OPCODE(SetZSCO, Void, ZSCO, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) + +// Memory operations +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, Opaque, ) +OPCODE(WriteGlobal128, Void, U64, Opaque, ) + +// Vector utility +OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) +OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(CompositeExtract, Opaque, Opaque, U32, ) + +// Bitwise conversions +OPCODE(PackUint2x32, U64, Opaque, ) +OPCODE(UnpackUint2x32, Opaque, U64, ) +OPCODE(PackFloat2x16, U32, Opaque, ) +OPCODE(UnpackFloat2x16, Opaque, U32, ) +OPCODE(PackDouble2x32, U64, Opaque, ) +OPCODE(UnpackDouble2x32, Opaque, U64, ) + +// Pseudo-operation, handled specially at final emit +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) + +// Floating-point operations +OPCODE(FPAbs16, U16, U16 ) +OPCODE(FPAbs32, U32, U32 ) +OPCODE(FPAbs64, U64, U64 ) +OPCODE(FPAdd16, U16, U16, U16 ) +OPCODE(FPAdd32, U32, U32, U32 ) +OPCODE(FPAdd64, U64, U64, U64 ) +OPCODE(FPFma16, U16, U16, U16 ) +OPCODE(FPFma32, U32, U32, U32 ) +OPCODE(FPFma64, U64, U64, U64 ) +OPCODE(FPMax32, U32, U32, U32 ) +OPCODE(FPMax64, U64, U64, U64 ) +OPCODE(FPMin32, U32, U32, U32 ) +OPCODE(FPMin64, U64, U64, U64 ) +OPCODE(FPMul16, U16, U16, U16 ) +OPCODE(FPMul32, U32, U32, U32 ) +OPCODE(FPMul64, U64, U64, U64 ) +OPCODE(FPNeg16, U16, U16 ) +OPCODE(FPNeg32, U32, U32 ) +OPCODE(FPNeg64, U64, U64 ) +OPCODE(FPRecip32, U32, U32 ) +OPCODE(FPRecip64, U64, U64 ) +OPCODE(FPRecipSqrt32, U32, U32 ) +OPCODE(FPRecipSqrt64, U64, U64 ) +OPCODE(FPSqrt, U32, U32 ) +OPCODE(FPSin, U32, U32 ) +OPCODE(FPSinNotReduced, U32, U32 ) +OPCODE(FPExp2, U32, U32 ) +OPCODE(FPExp2NotReduced, U32, U32 ) +OPCODE(FPCos, U32, U32 ) +OPCODE(FPCosNotReduced, U32, U32 ) +OPCODE(FPLog2, U32, U32 ) +OPCODE(FPSaturate16, U16, U16 ) +OPCODE(FPSaturate32, U32, U32 ) +OPCODE(FPSaturate64, U64, U64 ) +OPCODE(FPRoundEven16, U16, U16 ) +OPCODE(FPRoundEven32, U32, U32 ) +OPCODE(FPRoundEven64, U64, U64 ) +OPCODE(FPFloor16, U16, U16 ) +OPCODE(FPFloor32, U32, U32 ) +OPCODE(FPFloor64, U64, U64 ) +OPCODE(FPCeil16, U16, U16 ) +OPCODE(FPCeil32, U32, U32 ) +OPCODE(FPCeil64, U64, U64 ) +OPCODE(FPTrunc16, U16, U16 ) +OPCODE(FPTrunc32, U32, U32 ) +OPCODE(FPTrunc64, U64, U64 ) + +// Logical operations +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) + +// Conversion operations +OPCODE(ConvertS16F16, U32, U16, ) +OPCODE(ConvertS16F32, U32, U32, ) +OPCODE(ConvertS16F64, U32, U64, ) +OPCODE(ConvertS32F16, U32, U16, ) +OPCODE(ConvertS32F32, U32, U32, ) +OPCODE(ConvertS32F64, U32, U64, ) +OPCODE(ConvertS64F16, U64, U16, ) +OPCODE(ConvertS64F32, U64, U32, ) +OPCODE(ConvertS64F64, U64, U64, ) +OPCODE(ConvertU16F16, U32, U16, ) +OPCODE(ConvertU16F32, U32, U32, ) +OPCODE(ConvertU16F64, U32, U64, ) +OPCODE(ConvertU32F16, U32, U16, ) +OPCODE(ConvertU32F32, U32, U32, ) +OPCODE(ConvertU32F64, U32, U64, ) +OPCODE(ConvertU64F16, U64, U16, ) +OPCODE(ConvertU64F32, U64, U32, ) +OPCODE(ConvertU64F64, U64, U64, ) + +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h new file mode 100644 index 000000000..37cc53006 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::IR { + +enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Pred& pred, FormatContext& ctx) { + if (pred == Shader::IR::Pred::PT) { + return fmt::format_to(ctx.out(), "PT"); + } else { + return fmt::format_to(ctx.out(), "P{}", static_cast(pred)); + } + } +}; diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h new file mode 100644 index 000000000..316fc4be8 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -0,0 +1,314 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Reg : u64 { + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, + R29, + R30, + R31, + R32, + R33, + R34, + R35, + R36, + R37, + R38, + R39, + R40, + R41, + R42, + R43, + R44, + R45, + R46, + R47, + R48, + R49, + R50, + R51, + R52, + R53, + R54, + R55, + R56, + R57, + R58, + R59, + R60, + R61, + R62, + R63, + R64, + R65, + R66, + R67, + R68, + R69, + R70, + R71, + R72, + R73, + R74, + R75, + R76, + R77, + R78, + R79, + R80, + R81, + R82, + R83, + R84, + R85, + R86, + R87, + R88, + R89, + R90, + R91, + R92, + R93, + R94, + R95, + R96, + R97, + R98, + R99, + R100, + R101, + R102, + R103, + R104, + R105, + R106, + R107, + R108, + R109, + R110, + R111, + R112, + R113, + R114, + R115, + R116, + R117, + R118, + R119, + R120, + R121, + R122, + R123, + R124, + R125, + R126, + R127, + R128, + R129, + R130, + R131, + R132, + R133, + R134, + R135, + R136, + R137, + R138, + R139, + R140, + R141, + R142, + R143, + R144, + R145, + R146, + R147, + R148, + R149, + R150, + R151, + R152, + R153, + R154, + R155, + R156, + R157, + R158, + R159, + R160, + R161, + R162, + R163, + R164, + R165, + R166, + R167, + R168, + R169, + R170, + R171, + R172, + R173, + R174, + R175, + R176, + R177, + R178, + R179, + R180, + R181, + R182, + R183, + R184, + R185, + R186, + R187, + R188, + R189, + R190, + R191, + R192, + R193, + R194, + R195, + R196, + R197, + R198, + R199, + R200, + R201, + R202, + R203, + R204, + R205, + R206, + R207, + R208, + R209, + R210, + R211, + R212, + R213, + R214, + R215, + R216, + R217, + R218, + R219, + R220, + R221, + R222, + R223, + R224, + R225, + R226, + R227, + R228, + R229, + R230, + R231, + R232, + R233, + R234, + R235, + R236, + R237, + R238, + R239, + R240, + R241, + R242, + R243, + R244, + R245, + R246, + R247, + R248, + R249, + R250, + R251, + R252, + R253, + R254, + RZ, +}; +static_assert(static_cast(Reg::RZ) == 255); + +[[nodiscard]] constexpr Reg operator+(Reg reg, int num) { + if (reg == Reg::RZ) { + // Adding or subtracting registers from RZ yields RZ + return Reg::RZ; + } + const int result{static_cast(reg) + num}; + if (result >= static_cast(Reg::RZ)) { + throw LogicError("Overflow on register arithmetic"); + } + if (result < 0) { + throw LogicError("Underflow on register arithmetic"); + } + return static_cast(result); +} + +[[nodiscard]] constexpr Reg operator-(Reg reg, int num) { + return reg + (-num); +} + +[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { + return (static_cast(reg) / align) * align == static_cast(reg); +} + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Reg& reg, FormatContext& ctx) { + if (reg == Shader::IR::Reg::RZ) { + return fmt::format_to(ctx.out(), "RZ"); + } else if (static_cast(reg) >= 0 && static_cast(reg) < 255) { + return fmt::format_to(ctx.out(), "R{}", static_cast(reg)); + } else { + throw Shader::LogicError("Invalid register with raw value {}", static_cast(reg)); + } + } +}; diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp new file mode 100644 index 000000000..da1e2a0f6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +std::string NameOf(Type type) { + static constexpr std::array names{ + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO", + }; + const size_t bits{static_cast(type)}; + if (bits == 0) { + return "Void"; + } + std::string result; + for (size_t i = 0; i < names.size(); i++) { + if ((bits & (size_t{1} << i)) != 0) { + if (!result.empty()) { + result += '|'; + } + result += names[i]; + } + } + return result; +} + +bool AreTypesCompatible(Type lhs, Type rhs) noexcept { + return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h new file mode 100644 index 000000000..f753628e8 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.h @@ -0,0 +1,47 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/common_funcs.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +enum class Type { + Void = 0, + Opaque = 1 << 0, + Label = 1 << 1, + Reg = 1 << 2, + Pred = 1 << 3, + Attribute = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + ZSCO = 1 << 10, +}; +DECLARE_ENUM_FLAG_OPERATORS(Type) + +[[nodiscard]] std::string NameOf(Type type); + +[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept; + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Type& type, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", NameOf(type)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp new file mode 100644 index 000000000..7b5b35d6c --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -0,0 +1,124 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} + +Value::Value(IR::Block* value) noexcept : type{Type::Label}, label{value} {} + +Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} + +Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} + +Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} + +Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} + +Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} + +Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} + +Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} + +Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} + +bool Value::IsIdentity() const noexcept { + return type == Type::Opaque && inst->Opcode() == Opcode::Identity; +} + +bool Value::IsEmpty() const noexcept { + return type == Type::Void; +} + +bool Value::IsImmediate() const noexcept { + if (IsIdentity()) { + return inst->Arg(0).IsImmediate(); + } + return type != Type::Opaque; +} + +bool Value::IsLabel() const noexcept { + return type == Type::Label; +} + +IR::Type Value::Type() const noexcept { + if (IsIdentity()) { + return inst->Arg(0).Type(); + } + if (type == Type::Opaque) { + return inst->Type(); + } + return type; +} + +IR::Inst* Value::Inst() const { + ValidateAccess(Type::Opaque); + return inst; +} + +IR::Block* Value::Label() const { + ValidateAccess(Type::Label); + return label; +} + +IR::Inst* Value::InstRecursive() const { + ValidateAccess(Type::Opaque); + if (IsIdentity()) { + return inst->Arg(0).InstRecursive(); + } + return inst; +} + +IR::Reg Value::Reg() const { + ValidateAccess(Type::Reg); + return reg; +} + +IR::Pred Value::Pred() const { + ValidateAccess(Type::Pred); + return pred; +} + +IR::Attribute Value::Attribute() const { + ValidateAccess(Type::Attribute); + return attribute; +} + +bool Value::U1() const { + ValidateAccess(Type::U1); + return imm_u1; +} + +u8 Value::U8() const { + ValidateAccess(Type::U8); + return imm_u8; +} + +u16 Value::U16() const { + ValidateAccess(Type::U16); + return imm_u16; +} + +u32 Value::U32() const { + ValidateAccess(Type::U32); + return imm_u32; +} + +u64 Value::U64() const { + ValidateAccess(Type::U64); + return imm_u64; +} + +void Value::ValidateAccess(IR::Type expected) const { + if (type != expected) { + throw LogicError("Reading {} out of {}", expected, type); + } +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h new file mode 100644 index 000000000..664dacf9d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.h @@ -0,0 +1,98 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/pred.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +class Block; +class Inst; + +class Value { +public: + Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} + explicit Value(IR::Inst* value) noexcept; + explicit Value(IR::Block* value) noexcept; + explicit Value(IR::Reg value) noexcept; + explicit Value(IR::Pred value) noexcept; + explicit Value(IR::Attribute value) noexcept; + explicit Value(bool value) noexcept; + explicit Value(u8 value) noexcept; + explicit Value(u16 value) noexcept; + explicit Value(u32 value) noexcept; + explicit Value(u64 value) noexcept; + + [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsEmpty() const noexcept; + [[nodiscard]] bool IsImmediate() const noexcept; + [[nodiscard]] bool IsLabel() const noexcept; + [[nodiscard]] IR::Type Type() const noexcept; + + [[nodiscard]] IR::Inst* Inst() const; + [[nodiscard]] IR::Block* Label() const; + [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Reg Reg() const; + [[nodiscard]] IR::Pred Pred() const; + [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] bool U1() const; + [[nodiscard]] u8 U8() const; + [[nodiscard]] u16 U16() const; + [[nodiscard]] u32 U32() const; + [[nodiscard]] u64 U64() const; + +private: + void ValidateAccess(IR::Type expected) const; + + IR::Type type; + union { + IR::Inst* inst; + IR::Block* label; + IR::Reg reg; + IR::Pred pred; + IR::Attribute attribute; + bool imm_u1; + u8 imm_u8; + u16 imm_u16; + u32 imm_u32; + u64 imm_u64; + }; +}; + +template +class TypedValue : public Value { +public: + TypedValue() = default; + + template + requires((other_type & type_) != IR::Type::Void) explicit(false) + TypedValue(const TypedValue& value) + : Value(value) {} + + explicit TypedValue(const Value& value) : Value(value) { + if ((value.Type() & type_) == IR::Type::Void) { + throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); + } + } + + explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} +}; + +using U1 = TypedValue; +using U8 = TypedValue; +using U16 = TypedValue; +using U32 = TypedValue; +using U64 = TypedValue; +using U32U64 = TypedValue; +using U16U32U64 = TypedValue; +using UAny = TypedValue; +using ZSCO = TypedValue; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..fc4dba826 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -0,0 +1,531 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell::Flow { + +static u32 BranchOffset(Location pc, Instruction inst) { + return pc.Offset() + inst.branch.Offset() + 8; +} + +static std::array Split(Block&& block, Location pc, BlockId new_id) { + if (pc <= block.begin || pc >= block.end) { + throw InvalidArgument("Invalid address to split={}", pc); + } + return { + Block{ + .begin{block.begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{block.id}, + .stack{block.stack}, + .cond{true}, + .branch_true{new_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }, + Block{ + .begin{pc}, + .end{block.end}, + .end_class{block.end_class}, + .id{new_id}, + .stack{std::move(block.stack)}, + .cond{block.cond}, + .branch_true{block.branch_true}, + .branch_false{block.branch_false}, + }, + }; +} + +static Token OpcodeToken(Opcode opcode) { + switch (opcode) { + case Opcode::PBK: + case Opcode::BRK: + return Token::PBK; + case Opcode::PCNT: + case Opcode::CONT: + return Token::PBK; + case Opcode::PEXIT: + case Opcode::EXIT: + return Token::PEXIT; + case Opcode::PLONGJMP: + case Opcode::LONGJMP: + return Token::PLONGJMP; + case Opcode::PRET: + case Opcode::RET: + case Opcode::CAL: + return Token::PRET; + case Opcode::SSY: + case Opcode::SYNC: + return Token::SSY; + default: + throw InvalidArgument("{}", opcode); + } +} + +static bool IsAbsoluteJump(Opcode opcode) { + switch (opcode) { + case Opcode::JCAL: + case Opcode::JMP: + case Opcode::JMX: + return true; + default: + return false; + } +} + +static bool HasFlowTest(Opcode opcode) { + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::EXIT: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::RET: + case Opcode::SYNC: + return true; + case Opcode::CAL: + case Opcode::JCAL: + return false; + default: + throw InvalidArgument("Invalid branch {}", opcode); + } +} + +static std::string Name(const Block& block) { + if (block.begin.IsVirtual()) { + return fmt::format("\"Virtual {}\"", block.id); + } else { + return fmt::format("\"{}\"", block.begin); + } +} + +void Stack::Push(Token token, Location target) { + entries.push_back({ + .token{token}, + .target{target}, + }); +} + +std::pair Stack::Pop(Token token) const { + const std::optional pc{Peek(token)}; + if (!pc) { + throw LogicError("Token could not be found"); + } + return {*pc, Remove(token)}; +} + +std::optional Stack::Peek(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + if (it == reverse_entries.end()) { + return std::nullopt; + } + return it->target; +} + +Stack Stack::Remove(Token token) const { + const auto reverse_entries{entries | std::views::reverse}; + const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; + const auto pos{std::distance(reverse_entries.begin(), it)}; + Stack result; + result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); + return result; +} + +bool Block::Contains(Location pc) const noexcept { + return pc >= begin && pc < end; +} + +Function::Function(Location start_address) + : entrypoint{start_address}, labels{Label{ + .address{start_address}, + .block_id{0}, + .stack{}, + }} {} + +CFG::CFG(Environment& env_, Location start_address) : env{env_} { + functions.emplace_back(start_address); + for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { + while (!functions[function_id].labels.empty()) { + Function& function{functions[function_id]}; + Label label{function.labels.back()}; + function.labels.pop_back(); + AnalyzeLabel(function_id, label); + } + } +} + +void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { + if (InspectVisitedBlocks(function_id, label)) { + // Label address has been visited + return; + } + // Try to find the next block + Function* function{&functions[function_id]}; + Location pc{label.address}; + const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, + [function](Location pc, u32 block_index) { + return pc < function->blocks_data[block_index].begin; + })}; + const auto next_index{std::distance(function->blocks.begin(), next)}; + const bool is_last{next == function->blocks.end()}; + Location next_pc; + BlockId next_id{UNREACHABLE_BLOCK_ID}; + if (!is_last) { + next_pc = function->blocks_data[*next].begin; + next_id = function->blocks_data[*next].id; + } + // Insert before the next block + Block block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .id{label.block_id}, + .stack{std::move(label.stack)}, + .cond{true}, + .branch_true{UNREACHABLE_BLOCK_ID}, + .branch_false{UNREACHABLE_BLOCK_ID}, + }; + // Analyze instructions until it reaches an already visited block or there's a branch + bool is_branch{false}; + while (is_last || pc < next_pc) { + is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; + if (is_branch) { + break; + } + ++pc; + } + if (!is_branch) { + // If the block finished without a branch, + // it means that the next instruction is already visited, jump to it + block.end = pc; + block.cond = true; + block.branch_true = next_id; + block.branch_false = UNREACHABLE_BLOCK_ID; + } + // Function's pointer might be invalid, resolve it again + function = &functions[function_id]; + const u32 new_block_index = static_cast(function->blocks_data.size()); + function->blocks.insert(function->blocks.begin() + next_index, new_block_index); + function->blocks_data.push_back(std::move(block)); +} + +bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { + const Location pc{label.address}; + Function& function{functions[function_id]}; + const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { + return function.blocks_data[block_index].Contains(pc); + })}; + if (it == function.blocks.end()) { + // Address has not been visited + return false; + } + Block& block{function.blocks_data[*it]}; + if (block.begin == pc) { + throw LogicError("Dangling branch"); + } + const u32 first_index{*it}; + const u32 second_index{static_cast(function.blocks_data.size())}; + const std::array new_indices{first_index, second_index}; + std::array split_blocks{Split(std::move(block), pc, label.block_id)}; + function.blocks_data[*it] = std::move(split_blocks[0]); + function.blocks_data.push_back(std::move(split_blocks[1])); + function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + return true; +} + +CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { + const Instruction inst{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(inst.raw)}; + switch (opcode) { + case Opcode::BRA: + case Opcode::BRX: + case Opcode::JMP: + case Opcode::JMX: + case Opcode::RET: + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + switch (opcode) { + case Opcode::BRA: + case Opcode::JMP: + AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::BRX: + case Opcode::JMX: + AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); + break; + case Opcode::RET: + block.end_class = EndClass::Return; + break; + default: + break; + } + block.end = pc; + return AnalysisState::Branch; + case Opcode::BRK: + case Opcode::CONT: + case Opcode::LONGJMP: + case Opcode::SYNC: { + if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { + return AnalysisState::Continue; + } + const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; + block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block.end = pc; + return AnalysisState::Branch; + } + case Opcode::PBK: + case Opcode::PCNT: + case Opcode::PEXIT: + case Opcode::PLONGJMP: + case Opcode::SSY: + block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + return AnalysisState::Continue; + case Opcode::EXIT: + return AnalyzeEXIT(block, function_id, pc, inst); + case Opcode::PRET: + throw NotImplementedException("PRET flow analysis"); + case Opcode::CAL: + case Opcode::JCAL: { + const bool is_absolute{IsAbsoluteJump(opcode)}; + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function into the list if it doesn't exist + if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { + functions.push_back(cal_pc); + } + // Handle CAL like a regular instruction + break; + } + default: + break; + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{true} || pred == Predicate{false}) { + return AnalysisState::Continue; + } + const IR::Condition cond{static_cast(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + return AnalysisState::Branch; +} + +void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, + EndClass insn_end_class, IR::Condition cond) { + if (block.begin != pc) { + // If the block doesn't start in the conditional instruction + // mark it as a label to visit it later + block.end = pc; + block.cond = true; + block.branch_true = AddLabel(block, block.stack, pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return; + } + // Impersonate the visited block with a virtual block + // Jump from this virtual to the real conditional instruction and the next instruction + Function& function{functions[function_id]}; + const BlockId conditional_block_id{++function.current_block_id}; + function.blocks.push_back(static_cast(function.blocks_data.size())); + Block& virtual_block{function.blocks_data.emplace_back(Block{ + .begin{}, // Virtual block + .end{}, + .end_class{EndClass::Branch}, + .id{block.id}, // Impersonating + .stack{block.stack}, + .cond{cond}, + .branch_true{conditional_block_id}, + .branch_false{UNREACHABLE_BLOCK_ID}, + })}; + // Set the end properties of the conditional instruction and give it a new identity + Block& conditional_block{block}; + conditional_block.end = pc; + conditional_block.end_class = insn_end_class; + conditional_block.id = conditional_block_id; + // Add a label to the instruction after the conditional instruction + const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + // Branch to the next instruction from the virtual block + virtual_block.branch_false = endif_block_id; + // And branch to it from the conditional instruction if it is a branch + if (insn_end_class == EndClass::Branch) { + conditional_block.cond = true; + conditional_block.branch_true = endif_block_id; + conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + } +} + +bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode) { + if (inst.branch.is_cbuf) { + throw NotImplementedException("Branch with constant buffer offset"); + } + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false}) { + return false; + } + const bool has_flow_test{HasFlowTest(opcode)}; + const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + block.cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); + block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + } else { + block.cond = true; + } + return true; +} + +void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute) { + const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); +} + +void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { + throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +} + +void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { + const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; + // Technically CAL pushes into PRET, but that's implicit in the function call for us + // Insert the function to the function list if it doesn't exist + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + if (it == functions.end()) { + functions.emplace_back(cal_pc); + } +} + +CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, + Instruction inst) { + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { + // EXIT will never be taken + return AnalysisState::Continue; + } + if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { + if (block.stack.Peek(Token::PEXIT).has_value()) { + throw NotImplementedException("Conditional EXIT with PEXIT token"); + } + const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + return AnalysisState::Branch; + } + if (const std::optional exit_pc{block.stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; + block.cond = true; + block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block.branch_false = UNREACHABLE_BLOCK_ID; + return AnalysisState::Branch; + } + block.end = pc; + block.end_class = EndClass::Exit; + return AnalysisState::Branch; +} + +BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { + Function& function{functions[function_id]}; + if (block.begin == pc) { + return block.id; + } + const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; + if (target != function.blocks_data.end()) { + return target->id; + } + const BlockId block_id{++function.current_block_id}; + function.labels.push_back(Label{ + .address{pc}, + .block_id{block_id}, + .stack{std::move(stack)}, + }); + return block_id; +} + +std::string CFG::Dot() const { + int node_uid{0}; + + std::string dot{"digraph shader {\n"}; + for (const Function& function : functions) { + dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); + dot += fmt::format("\t\tnode [style=filled];\n"); + for (const u32 block_index : function.blocks) { + const Block& block{function.blocks_data[block_index]}; + const std::string name{Name(block)}; + const auto add_branch = [&](BlockId branch_id, bool add_label) { + const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; + dot += fmt::format("\t\t{}->", name); + if (it == function.blocks_data.end()) { + dot += fmt::format("\"Unknown label {}\"", branch_id); + } else { + dot += Name(*it); + }; + if (add_label && block.cond != true && block.cond != false) { + dot += fmt::format(" [label=\"{}\"]", block.cond); + } + dot += '\n'; + }; + dot += fmt::format("\t\t{};\n", name); + switch (block.end_class) { + case EndClass::Branch: + if (block.cond != false) { + add_branch(block.branch_true, true); + } + if (block.cond != true) { + add_branch(block.branch_false, false); + } + break; + case EndClass::Exit: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Return: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; + case EndClass::Unreachable: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format( + "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); + ++node_uid; + break; + } + } + if (function.entrypoint == 8) { + dot += fmt::format("\t\tlabel = \"main\";\n"); + } else { + dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); + } + dot += "\t}\n"; + } + if (!functions.empty()) { + if (functions.front().blocks.empty()) { + dot += "Start;\n"; + } else { + dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + } + dot += fmt::format("\tStart [shape=diamond];\n"); + } + dot += "}\n"; + return dot; +} + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..b2ab0cdc3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -0,0 +1,137 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/condition.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell::Flow { + +using BlockId = u32; +using FunctionId = size_t; + +constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast(-1)}; + +enum class EndClass { + Branch, + Exit, + Return, + Unreachable, +}; + +enum class Token { + SSY, + PBK, + PEXIT, + PRET, + PCNT, + PLONGJMP, +}; + +struct StackEntry { + auto operator<=>(const StackEntry&) const noexcept = default; + + Token token; + Location target; +}; + +class Stack { +public: + void Push(Token token, Location target); + [[nodiscard]] std::pair Pop(Token token) const; + [[nodiscard]] std::optional Peek(Token token) const; + [[nodiscard]] Stack Remove(Token token) const; + +private: + boost::container::small_vector entries; +}; + +struct Block { + [[nodiscard]] bool Contains(Location pc) const noexcept; + + Location begin; + Location end; + EndClass end_class; + BlockId id; + Stack stack; + IR::Condition cond; + BlockId branch_true; + BlockId branch_false; +}; + +struct Label { + Location address; + BlockId block_id; + Stack stack; +}; + +struct Function { + Function(Location start_address); + + Location entrypoint; + BlockId current_block_id{0}; + boost::container::small_vector labels; + boost::container::small_vector blocks; + boost::container::small_vector blocks_data; +}; + +class CFG { + enum class AnalysisState { + Branch, + Continue, + }; + +public: + explicit CFG(Environment& env, Location start_address); + + [[nodiscard]] std::string Dot() const; + + [[nodiscard]] std::span Functions() const noexcept { + return std::span(functions.data(), functions.size()); + } + +private: + void AnalyzeLabel(FunctionId function_id, Label& label); + + /// Inspect already visited blocks. + /// Return true when the block has already been visited + [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + + [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + + void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + IR::Condition cond); + + /// Return true when the branch instruction is confirmed to be a branch + [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, + Instruction inst, Opcode opcode); + + void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool is_absolute); + void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + + /// Return the branch target block id + [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, + FunctionId function_id); + + Environment& env; + boost::container::small_vector functions; + FunctionId current_function_id{0}; +}; + +} // namespace Shader::Maxwell::Flow diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..ab1cc6c8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { +namespace { +struct MaskValue { + u64 mask; + u64 value; +}; + +constexpr MaskValue MaskValueFromEncoding(const char* encoding) { + u64 mask{}; + u64 value{}; + u64 bit{u64(1) << 63}; + while (*encoding) { + switch (*encoding) { + case '0': + mask |= bit; + break; + case '1': + mask |= bit; + value |= bit; + break; + case '-': + break; + case ' ': + break; + default: + throw LogicError("Invalid encoding character '{}'", *encoding); + } + ++encoding; + if (*encoding != ' ') { + bit >>= 1; + } + } + return MaskValue{.mask{mask}, .value{value}}; +} + +struct InstEncoding { + MaskValue mask_value; + Opcode opcode; +}; +constexpr std::array UNORDERED_ENCODINGS{ +#define INST(name, cute, encode) \ + InstEncoding{ \ + .mask_value{MaskValueFromEncoding(encode)}, \ + .opcode{Opcode::name}, \ + }, +#include "maxwell.inc" +#undef INST +}; + +constexpr auto SortedEncodings() { + std::array encodings{UNORDERED_ENCODINGS}; + std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { + return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); + }); + return encodings; +} +constexpr auto ENCODINGS{SortedEncodings()}; + +constexpr int WidestLeftBits() { + int bits{64}; + for (const InstEncoding& encoding : ENCODINGS) { + bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); + } + return 64 - bits; +} +constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; +constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; + +constexpr size_t ToFastLookupIndex(u64 value) { + return static_cast(value >> MASK_SHIFT); +} + +constexpr size_t FastLookupSize() { + size_t max_width{}; + for (const InstEncoding& encoding : ENCODINGS) { + max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); + } + return max_width + 1; +} +constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; + +struct InstInfo { + [[nodiscard]] u64 Mask() const noexcept { + return static_cast(high_mask) << MASK_SHIFT; + } + + [[nodiscard]] u64 Value() const noexcept { + return static_cast(high_value) << MASK_SHIFT; + } + + u16 high_mask; + u16 high_value; + Opcode opcode; +}; + +constexpr auto MakeFastLookupTableIndex(size_t index) { + std::array encodings{}; + size_t element{}; + for (const auto& encoding : ENCODINGS) { + const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; + const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; + if ((index & mask) == value) { + encodings.at(element) = InstInfo{ + .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, + .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, + .opcode{encoding.opcode}, + }; + ++element; + } + } + return encodings; +} + +/*constexpr*/ auto MakeFastLookupTable() { + auto encodings{std::make_unique, FAST_LOOKUP_SIZE>>()}; + for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { + (*encodings)[index] = MakeFastLookupTableIndex(index); + } + return encodings; +} +const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; +} // Anonymous namespace + +Opcode Decode(u64 insn) { + const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; + const auto it{std::ranges::find_if( + table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; + if (it == table.end()) { + throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); + } + return it->opcode; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..2a3dd28e8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -0,0 +1,14 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { + +[[nodiscard]] Opcode Decode(u64 insn); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..57fd531f2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/flow_test.h" + +namespace Shader::Maxwell { + +struct Predicate { + Predicate() = default; + Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} + Predicate(bool value) : index{7}, negated{!value} {} + Predicate(u64 raw) : index{static_cast(raw & 7)}, negated{(raw & 8) != 0} {} + + unsigned index; + bool negated; +}; + +inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { + return lhs.index == rhs.index && lhs.negated == rhs.negated; +} + +inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { + return !(lhs == rhs); +} + +union Instruction { + Instruction(u64 raw_) : raw{raw_} {} + + u64 raw; + + union { + BitField<5, 1, u64> is_cbuf; + BitField<0, 5, IR::FlowTest> flow_test; + + [[nodiscard]] u32 Absolute() const noexcept { + return static_cast(absolute); + } + + [[nodiscard]] s32 Offset() const noexcept { + return static_cast(offset); + } + + private: + BitField<20, 24, s64> offset; + BitField<20, 32, u64> absolute; + } branch; + + [[nodiscard]] Predicate Pred() const noexcept { + return Predicate{pred}; + } + +private: + BitField<16, 4, u64> pred; +}; +static_assert(std::is_trivially_copyable_v); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..66b51a19e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" + +namespace Shader::Maxwell { + +class Location { + static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits::max()}; + +public: + constexpr Location() = default; + + constexpr Location(u32 initial_offset) : offset{initial_offset} { + if (initial_offset % 8 != 0) { + throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); + } + Align(); + } + + [[nodiscard]] constexpr u32 Offset() const noexcept { + return offset; + } + + [[nodiscard]] constexpr bool IsVirtual() const { + return offset == VIRTUAL_OFFSET; + } + + constexpr auto operator<=>(const Location&) const noexcept = default; + + constexpr Location operator++() noexcept { + const Location copy{*this}; + Step(); + return copy; + } + + constexpr Location operator++(int) noexcept { + Step(); + return *this; + } + + constexpr Location operator--() noexcept { + const Location copy{*this}; + Back(); + return copy; + } + + constexpr Location operator--(int) noexcept { + Back(); + return *this; + } + + constexpr Location operator+(int number) const { + Location new_pc{*this}; + while (number > 0) { + --number; + ++new_pc; + } + while (number < 0) { + ++number; + --new_pc; + } + return new_pc; + } + + constexpr Location operator-(int number) const { + return operator+(-number); + } + +private: + constexpr void Align() { + offset += offset % 32 == 0 ? 8 : 0; + } + + constexpr void Step() { + offset += 8 + (offset % 32 == 24 ? 8 : 0); + } + + constexpr void Back() { + offset -= 8 + (offset % 32 == 8 ? 8 : 0); + } + + u32 offset{VIRTUAL_OFFSET}; +}; + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..1515285bf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -0,0 +1,285 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +INST(AL2P, "AL2P", "1110 1111 1010 0---") +INST(ALD, "ALD", "1110 1111 1101 1---") +INST(AST, "AST", "1110 1111 1111 0---") +INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") +INST(ATOM, "ATOM", "1110 1101 ---- ----") +INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") +INST(ATOMS, "ATOMS", "1110 1100 ---- ----") +INST(B2R, "B2R", "1111 0000 1011 1---") +INST(BAR, "BAR", "1111 0000 1010 1---") +INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") +INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") +INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") +INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") +INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") +INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") +INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") +INST(BPT, "BPT", "1110 0011 1010 ----") +INST(BRA, "BRA", "1110 0010 0100 ----") +INST(BRK, "BRK", "1110 0011 0100 ----") +INST(BRX, "BRX", "1110 0010 0101 ----") +INST(CAL, "CAL", "1110 0010 0110 ----") +INST(CCTL, "CCTL", "1110 1111 011- ----") +INST(CCTLL, "CCTLL", "1110 1111 100- ----") +INST(CONT, "CONT", "1110 0011 0101 ----") +INST(CS2R, "CS2R", "0101 0000 1100 1---") +INST(CSET, "CSET", "0101 0000 1001 1---") +INST(CSETP, "CSETP", "0101 0000 1010 0---") +INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") +INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") +INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") +INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") +INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") +INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0010 1011 0111 ----") +INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") +INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") +INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") +INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") +INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") +INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") +INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") +INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") +INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") +INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") +INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") +INST(EXIT, "EXIT", "1110 0011 0000 ----") +INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") +INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") +INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") +INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") +INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") +INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") +INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") +INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") +INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") +INST(FADD32I, "FADD32I", "0000 10-- ---- ----") +INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") +INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") +INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") +INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") +INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") +INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") +INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") +INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") +INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") +INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") +INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") +INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") +INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") +INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") +INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") +INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") +INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") +INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") +INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") +INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") +INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") +INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") +INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") +INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") +INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") +INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") +INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") +INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") +INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") +INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") +INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") +INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") +INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") +INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") +INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") +INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") +INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") +INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") +INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") +INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") +INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") +INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") +INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") +INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") +INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 1100 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 1100 0--- ----") +INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") +INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") +INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") +INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") +INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") +INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") +INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") +INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") +INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") +INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") +INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") +INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") +INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") +INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") +INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") +INST(IADD32I, "IADD32I", "0001 110- ---- ----") +INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") +INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") +INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") +INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") +INST(IDE, "IDE", "1110 0011 1001 ----") +INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") +INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") +INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") +INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") +INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") +INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") +INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") +INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") +INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") +INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") +INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") +INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") +INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") +INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") +INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") +INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") +INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") +INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") +INST(IPA, "IPA", "1110 0000 ---- ----") +INST(ISBERD, "ISBERD", "1110 1111 1101 0---") +INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") +INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") +INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") +INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") +INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") +INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") +INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") +INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") +INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") +INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") +INST(JCAL, "JCAL", "1110 0010 0010 ----") +INST(JMP, "JMP", "1110 0010 0001 ----") +INST(JMX, "JMX", "1110 0010 0000 ----") +INST(KIL, "KIL", "1110 0011 0011 ----") +INST(LD, "LD", "100- ---- ---- ----") +INST(LDC, "LDC", "1110 1111 1001 0---") +INST(LDG, "LDG", "1110 1110 1101 0---") +INST(LDL, "LDL", "1110 1111 0100 0---") +INST(LDS, "LDS", "1110 1111 0100 1---") +INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") +INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") +INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") +INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") +INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") +INST(LEPC, "LEPC", "0101 0000 1101 0---") +INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") +INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") +INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") +INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") +INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----") +INST(LOP32I, "LOP32I", "0000 01-- ---- ----") +INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") +INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") +INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") +INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") +INST(MOV32I, "MOV32I", "0000 0001 0000 ----") +INST(MUFU, "MUFU", "0101 0000 1000 0---") +INST(NOP, "NOP", "0101 0000 1011 0---") +INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") +INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") +INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") +INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") +INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") +INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") +INST(PBK, "PBK", "1110 0010 1010 ----") +INST(PCNT, "PCNT", "1110 0010 1011 ----") +INST(PEXIT, "PEXIT", "1110 0010 0011 ----") +INST(PIXLD, "PIXLD", "1110 1111 1110 1---") +INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") +INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") +INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") +INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") +INST(PRET, "PRET", "1110 0010 0111 ----") +INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") +INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") +INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") +INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") +INST(PSET, "PSET", "0101 0000 1000 1---") +INST(PSETP, "PSETP", "0101 0000 1001 0---") +INST(R2B, "R2B", "1111 0000 1100 0---") +INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") +INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") +INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") +INST(RAM, "RAM", "1110 0011 1000 ----") +INST(RED, "RED", "1110 1011 1111 1---") +INST(RET, "RET", "1110 0011 0010 ----") +INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") +INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") +INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") +INST(RTT, "RTT", "1110 0011 0110 ----") +INST(S2R, "S2R", "1111 0000 1100 1---") +INST(SAM, "SAM", "1110 0011 0111 ----") +INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") +INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") +INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") +INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") +INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") +INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") +INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") +INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") +INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") +INST(SHFL, "SHFL", "1110 1111 0001 0---") +INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") +INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") +INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") +INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") +INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") +INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") +INST(SSY, "SSY", "1110 0010 1001 ----") +INST(ST, "ST", "101- ---- ---- ----") +INST(STG, "STG", "1110 1110 1101 1---") +INST(STL, "STL", "1110 1111 0101 0---") +INST(STP, "STP", "1110 1110 1010 0---") +INST(STS, "STS", "1110 1111 0101 1---") +INST(SUATOM_cas, "SUATOM", "1110 1010 ---- ----") +INST(SULD, "SULD", "1110 1011 000- ----") +INST(SURED, "SURED", "1110 1011 010- ----") +INST(SUST, "SUST", "1110 1011 001- ----") +INST(SYNC, "SYNC", "1111 0000 1111 1---") +INST(TEX, "TEX", "1100 00-- --11 1---") +INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEXS, "TEXS", "1101 -00- ---- ----") +INST(TLD, "TLD", "1101 1100 --11 1---") +INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD4, "TLD4", "1100 10-- --11 1---") +INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") +INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") +INST(TLDS, "TLDS", "1101 -01- ---- ----") +INST(TMML, "TMML", "1101 1111 0101 1---") +INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") +INST(TXA, "TXA", "1101 1111 0100 0---") +INST(TXD, "TXD", "1101 1110 0011 10--") +INST(TXD_b, "TXD (b)", "1101 1110 0111 10--") +INST(TXQ, "TXQ", "1101 1111 0100 1---") +INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") +INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") +INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") +INST(VADD, "VADD", "0010 00-- ---- ----") +INST(VMAD, "VMAD", "0101 1111 ---- ----") +INST(VMNMX, "VMNMX", "0011 101- ---- ----") +INST(VOTE, "VOTE", "0101 0000 1101 1---") +INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") +INST(VSET, "VSET", "0100 000- ---- ----") +INST(VSETP, "VSETP", "0101 0000 1111 0---") +INST(VSHL, "VSHL", "0101 0111 ---- ----") +INST(VSHR, "VSHR", "0101 0110 ---- ----") +INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") +INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") +INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") +INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") + +// Removed due to its weird formatting making fast tables larger +// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcode.cpp new file mode 100644 index 000000000..8a7bdb611 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcode.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) #cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); + } + return NAME_TABLE[static_cast(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcode.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcode.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp new file mode 100644 index 000000000..67a98ba57 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/termination_code.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +Program::Function::~Function() { + std::ranges::for_each(blocks, &std::destroy_at); +} + +Program::Program(Environment& env, const Flow::CFG& cfg) { + std::vector block_map; + functions.reserve(cfg.Functions().size()); + + for (const Flow::Function& cfg_function : cfg.Functions()) { + Function& function{functions.emplace_back()}; + + const size_t num_blocks{cfg_function.blocks.size()}; + IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; + function.blocks.reserve(num_blocks); + + block_map.resize(cfg_function.blocks_data.size()); + + // Visit the instructions of all blocks + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + + IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; + ++block_memory; + function.blocks.push_back(block); + block_map[flow_block.id] = block; + } + // Now that all blocks are defined, emit the termination instructions + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } + } +} + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map inst_to_index; + std::map block_to_index; + + for (const Program::Function& function : program.functions) { + for (const IR::Block* const block : function.blocks) { + block_to_index.emplace(block, index); + ++index; + } + } + std::string ret; + for (const Program::Function& function : program.functions) { + ret += fmt::format("Function\n"); + for (const IR::Block* const block : function.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + } + return ret; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h new file mode 100644 index 000000000..7814b2c01 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +class Program { + friend std::string DumpProgram(const Program& program); + +public: + explicit Program(Environment& env, const Flow::CFG& cfg); + +private: + struct Function { + ~Function(); + + std::vector blocks; + }; + + boost::pool_allocator + block_alloc_pool; + std::vector functions; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp new file mode 100644 index 000000000..a4ea5c5e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/termination_code.h" + +namespace Shader::Maxwell { + +static void EmitExit(IR::IREmitter& ir) { + ir.Exit(); +} + +static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { + switch (flow_test) { + case IR::FlowTest::T: + return ir.Imm1(true); + case IR::FlowTest::F: + return ir.Imm1(false); + case IR::FlowTest::NE: + // FIXME: Verify this + return ir.LogicalNot(ir.GetZFlag()); + case IR::FlowTest::NaN: + // FIXME: Verify this + return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); + default: + throw NotImplementedException("Flow test {}", flow_test); + } +} + +static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { + const IR::FlowTest flow_test{cond.FlowTest()}; + const auto [pred, pred_negated]{cond.Pred()}; + if (pred == IR::Pred::PT && !pred_negated) { + return GetFlowTest(flow_test, ir); + } + if (flow_test == IR::FlowTest::T) { + return ir.GetPred(pred, pred_negated); + } + return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); +} + +static void EmitBranch(const Flow::Block& flow_block, std::span block_map, + IR::IREmitter& ir) { + if (flow_block.cond == true) { + return ir.Branch(block_map[flow_block.branch_true]); + } + if (flow_block.cond == false) { + return ir.Branch(block_map[flow_block.branch_false]); + } + return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], + block_map[flow_block.branch_false]); +} + +void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map) { + IR::Block* const block{block_map[flow_block.id]}; + IR::IREmitter ir(*block); + switch (flow_block.end_class) { + case Flow::EndClass::Branch: + EmitBranch(flow_block, block_map, ir); + break; + case Flow::EndClass::Exit: + EmitExit(ir); + break; + case Flow::EndClass::Return: + ir.Return(); + break; + case Flow::EndClass::Unreachable: + ir.Unreachable(); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h new file mode 100644 index 000000000..b0d667942 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp new file mode 100644 index 000000000..e98bbd0d1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp @@ -0,0 +1,15 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::EXIT(u64) { + ir.Exit(); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..c4288d9a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class DestFormat : u64 { + Invalid, + I16, + I32, + I64, +}; +enum class SrcFormat : u64 { + Invalid, + F16, + F32, + F64, +}; +enum class Rounding : u64 { + Round, + Floor, + Ceil, + Trunc, +}; + +union F2I { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, DestFormat> dest_format; + BitField<10, 2, SrcFormat> src_format; + BitField<12, 1, u64> is_signed; + BitField<39, 1, Rounding> rounding; + BitField<49, 1, u64> half; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> abs; + BitField<47, 1, u64> cc; + BitField<49, 1, u64> neg; +}; + +size_t BitSize(DestFormat dest_format) { + switch (dest_format) { + case DestFormat::I16: + return 16; + case DestFormat::I32: + return 32; + case DestFormat::I64: + return 64; + default: + throw NotImplementedException("Invalid destination format {}", dest_format); + } +} + +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { + // F2I is used to convert from a floating point value to an integer + const F2I f2i{insn}; + + const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::U16U32U64 rounded_value{[&] { + switch (f2i.rounding) { + case Rounding::Round: + return v.ir.FPRoundEven(float_value); + case Rounding::Floor: + return v.ir.FPFloor(float_value); + case Rounding::Ceil: + return v.ir.FPCeil(float_value); + case Rounding::Trunc: + return v.ir.FPTrunc(float_value); + default: + throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); + } + }()}; + + // TODO: Handle out of bounds conversions. + // For example converting F32 65537.0 to U16, the expected value is 0xffff, + + const bool is_signed{f2i.is_signed != 0}; + const size_t bitsize{BitSize(f2i.dest_format)}; + const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + + v.X(f2i.dest_reg, result); + + if (f2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + if (is_signed) { + v.SetSFlag(v.ir.GetSignFromOp(result)); + } else { + v.ResetSFlag(); + } + v.ResetCFlag(); + + // TODO: Investigate if out of bound conversions sets the overflow flag + v.ResetOFlag(); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2I_reg(u64 insn) { + union { + F2I base; + BitField<20, 8, IR::Reg> src_reg; + } const f2i{insn}; + + const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { + switch (f2i.base.src_format) { + case SrcFormat::F16: + return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); + case SrcFormat::F32: + return X(f2i.src_reg); + case SrcFormat::F64: + return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); + default: + throw NotImplementedException("Invalid F2I source format {}", + f2i.base.src_format.Value()); + } + }()}; + + TranslateF2I(*this, insn, op_a); +} + +void TranslatorVisitor::F2I_cbuf(u64) { + throw NotImplementedException("{}", Opcode::F2I_cbuf); +} + +void TranslatorVisitor::F2I_imm(u64) { + throw NotImplementedException("{}", Opcode::F2I_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..e2ab0dab2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Operation { + Cos = 0, + Sin = 1, + Ex2 = 2, // Base 2 exponent + Lg2 = 3, // Base 2 logarithm + Rcp = 4, // Reciprocal + Rsq = 5, // Reciprocal square root + Rcp64H = 6, // 64-bit reciprocal + Rsq64H = 7, // 64-bit reciprocal square root + Sqrt = 8, +}; +} // Anonymous namespace + +void TranslatorVisitor::MUFU(u64 insn) { + // MUFU is used to implement a bunch of special functions. See Operation. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 4, Operation> operation; + BitField<46, 1, u64> abs; + BitField<48, 1, u64> neg; + BitField<50, 1, u64> sat; + } const mufu{insn}; + + const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::U32 value{[&]() -> IR::U32 { + switch (mufu.operation) { + case Operation::Cos: + return ir.FPCosNotReduced(op_a); + case Operation::Sin: + return ir.FPSinNotReduced(op_a); + case Operation::Ex2: + return ir.FPExp2NotReduced(op_a); + case Operation::Lg2: + return ir.FPLog2(op_a); + case Operation::Rcp: + return ir.FPRecip(op_a); + case Operation::Rsq: + return ir.FPRecipSqrt(op_a); + case Operation::Rcp64H: + throw NotImplementedException("MUFU.RCP64H"); + case Operation::Rsq64H: + throw NotImplementedException("MUFU.RSQ64H"); + case Operation::Sqrt: + return ir.FPSqrt(op_a); + default: + throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); + } + }()}; + + if (mufu.sat) { + value = ir.FPSaturate(value); + } + + X(mufu.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..7bc7ce9f2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +IR::U32 TranslatorVisitor::X(IR::Reg reg) { + return ir.GetReg(reg); +} + +void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { + ir.SetReg(dest_reg, value); +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); + } + const IR::U32 binding{ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{ir.Imm32(static_cast(cbuf.offset) * 4)}; + return ir.GetCbuf(binding, byte_offset); +} + +IR::U32 TranslatorVisitor::GetImm(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const s32 positive_value{static_cast(imm.value)}; + const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + +void TranslatorVisitor::SetZFlag(const IR::U1& value) { + ir.SetZFlag(value); +} + +void TranslatorVisitor::SetSFlag(const IR::U1& value) { + ir.SetSFlag(value); +} + +void TranslatorVisitor::SetCFlag(const IR::U1& value) { + ir.SetCFlag(value); +} + +void TranslatorVisitor::SetOFlag(const IR::U1& value) { + ir.SetOFlag(value); +} + +void TranslatorVisitor::ResetZero() { + SetZFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetSFlag() { + SetSFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetCFlag() { + SetCFlag(ir.Imm1(false)); +} + +void TranslatorVisitor::ResetOFlag() { + SetOFlag(ir.Imm1(false)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..bc607b002 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -0,0 +1,316 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/instruction.h" + +namespace Shader::Maxwell { + +class TranslatorVisitor { +public: + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + + Environment& env; + IR::IREmitter ir; + + void AL2P(u64 insn); + void ALD(u64 insn); + void AST(u64 insn); + void ATOM_cas(u64 insn); + void ATOM(u64 insn); + void ATOMS_cas(u64 insn); + void ATOMS(u64 insn); + void B2R(u64 insn); + void BAR(u64 insn); + void BFE_reg(u64 insn); + void BFE_cbuf(u64 insn); + void BFE_imm(u64 insn); + void BFI_reg(u64 insn); + void BFI_rc(u64 insn); + void BFI_cr(u64 insn); + void BFI_imm(u64 insn); + void BPT(u64 insn); + void BRA(u64 insn); + void BRK(u64 insn); + void BRX(u64 insn); + void CAL(u64 insn); + void CCTL(u64 insn); + void CCTLL(u64 insn); + void CONT(u64 insn); + void CS2R(u64 insn); + void CSET(u64 insn); + void CSETP(u64 insn); + void DADD_reg(u64 insn); + void DADD_cbuf(u64 insn); + void DADD_imm(u64 insn); + void DEPBAR(u64 insn); + void DFMA_reg(u64 insn); + void DFMA_rc(u64 insn); + void DFMA_cr(u64 insn); + void DFMA_imm(u64 insn); + void DMNMX_reg(u64 insn); + void DMNMX_cbuf(u64 insn); + void DMNMX_imm(u64 insn); + void DMUL_reg(u64 insn); + void DMUL_cbuf(u64 insn); + void DMUL_imm(u64 insn); + void DSET_reg(u64 insn); + void DSET_cbuf(u64 insn); + void DSET_imm(u64 insn); + void DSETP_reg(u64 insn); + void DSETP_cbuf(u64 insn); + void DSETP_imm(u64 insn); + void EXIT(u64 insn); + void F2F_reg(u64 insn); + void F2F_cbuf(u64 insn); + void F2F_imm(u64 insn); + void F2I_reg(u64 insn); + void F2I_cbuf(u64 insn); + void F2I_imm(u64 insn); + void FADD_reg(u64 insn); + void FADD_cbuf(u64 insn); + void FADD_imm(u64 insn); + void FADD32I(u64 insn); + void FCHK_reg(u64 insn); + void FCHK_cbuf(u64 insn); + void FCHK_imm(u64 insn); + void FCMP_reg(u64 insn); + void FCMP_rc(u64 insn); + void FCMP_cr(u64 insn); + void FCMP_imm(u64 insn); + void FFMA_reg(u64 insn); + void FFMA_rc(u64 insn); + void FFMA_cr(u64 insn); + void FFMA_imm(u64 insn); + void FFMA32I(u64 insn); + void FLO_reg(u64 insn); + void FLO_cbuf(u64 insn); + void FLO_imm(u64 insn); + void FMNMX_reg(u64 insn); + void FMNMX_cbuf(u64 insn); + void FMNMX_imm(u64 insn); + void FMUL_reg(u64 insn); + void FMUL_cbuf(u64 insn); + void FMUL_imm(u64 insn); + void FMUL32I(u64 insn); + void FSET_reg(u64 insn); + void FSET_cbuf(u64 insn); + void FSET_imm(u64 insn); + void FSETP_reg(u64 insn); + void FSETP_cbuf(u64 insn); + void FSETP_imm(u64 insn); + void FSWZADD(u64 insn); + void GETCRSPTR(u64 insn); + void GETLMEMBASE(u64 insn); + void HADD2_reg(u64 insn); + void HADD2_cbuf(u64 insn); + void HADD2_imm(u64 insn); + void HADD2_32I(u64 insn); + void HFMA2_reg(u64 insn); + void HFMA2_rc(u64 insn); + void HFMA2_cr(u64 insn); + void HFMA2_imm(u64 insn); + void HFMA2_32I(u64 insn); + void HMUL2_reg(u64 insn); + void HMUL2_cbuf(u64 insn); + void HMUL2_imm(u64 insn); + void HMUL2_32I(u64 insn); + void HSET2_reg(u64 insn); + void HSET2_cbuf(u64 insn); + void HSET2_imm(u64 insn); + void HSETP2_reg(u64 insn); + void HSETP2_cbuf(u64 insn); + void HSETP2_imm(u64 insn); + void I2F_reg(u64 insn); + void I2F_cbuf(u64 insn); + void I2F_imm(u64 insn); + void I2I_reg(u64 insn); + void I2I_cbuf(u64 insn); + void I2I_imm(u64 insn); + void IADD_reg(u64 insn); + void IADD_cbuf(u64 insn); + void IADD_imm(u64 insn); + void IADD3_reg(u64 insn); + void IADD3_cbuf(u64 insn); + void IADD3_imm(u64 insn); + void IADD32I(u64 insn); + void ICMP_reg(u64 insn); + void ICMP_rc(u64 insn); + void ICMP_cr(u64 insn); + void ICMP_imm(u64 insn); + void IDE(u64 insn); + void IDP_reg(u64 insn); + void IDP_imm(u64 insn); + void IMAD_reg(u64 insn); + void IMAD_rc(u64 insn); + void IMAD_cr(u64 insn); + void IMAD_imm(u64 insn); + void IMAD32I(u64 insn); + void IMADSP_reg(u64 insn); + void IMADSP_rc(u64 insn); + void IMADSP_cr(u64 insn); + void IMADSP_imm(u64 insn); + void IMNMX_reg(u64 insn); + void IMNMX_cbuf(u64 insn); + void IMNMX_imm(u64 insn); + void IMUL_reg(u64 insn); + void IMUL_cbuf(u64 insn); + void IMUL_imm(u64 insn); + void IMUL32I(u64 insn); + void IPA(u64 insn); + void ISBERD(u64 insn); + void ISCADD_reg(u64 insn); + void ISCADD_cbuf(u64 insn); + void ISCADD_imm(u64 insn); + void ISCADD32I(u64 insn); + void ISET_reg(u64 insn); + void ISET_cbuf(u64 insn); + void ISET_imm(u64 insn); + void ISETP_reg(u64 insn); + void ISETP_cbuf(u64 insn); + void ISETP_imm(u64 insn); + void JCAL(u64 insn); + void JMP(u64 insn); + void JMX(u64 insn); + void KIL(u64 insn); + void LD(u64 insn); + void LDC(u64 insn); + void LDG(u64 insn); + void LDL(u64 insn); + void LDS(u64 insn); + void LEA_hi_reg(u64 insn); + void LEA_hi_cbuf(u64 insn); + void LEA_lo_reg(u64 insn); + void LEA_lo_cbuf(u64 insn); + void LEA_lo_imm(u64 insn); + void LEPC(u64 insn); + void LONGJMP(u64 insn); + void LOP_reg(u64 insn); + void LOP_cbuf(u64 insn); + void LOP_imm(u64 insn); + void LOP3_reg(u64 insn); + void LOP3_cbuf(u64 insn); + void LOP3_imm(u64 insn); + void LOP32I(u64 insn); + void MEMBAR(u64 insn); + void MOV_reg(u64 insn); + void MOV_cbuf(u64 insn); + void MOV_imm(u64 insn); + void MOV32I(u64 insn); + void MUFU(u64 insn); + void NOP(u64 insn); + void OUT_reg(u64 insn); + void OUT_cbuf(u64 insn); + void OUT_imm(u64 insn); + void P2R_reg(u64 insn); + void P2R_cbuf(u64 insn); + void P2R_imm(u64 insn); + void PBK(u64 insn); + void PCNT(u64 insn); + void PEXIT(u64 insn); + void PIXLD(u64 insn); + void PLONGJMP(u64 insn); + void POPC_reg(u64 insn); + void POPC_cbuf(u64 insn); + void POPC_imm(u64 insn); + void PRET(u64 insn); + void PRMT_reg(u64 insn); + void PRMT_rc(u64 insn); + void PRMT_cr(u64 insn); + void PRMT_imm(u64 insn); + void PSET(u64 insn); + void PSETP(u64 insn); + void R2B(u64 insn); + void R2P_reg(u64 insn); + void R2P_cbuf(u64 insn); + void R2P_imm(u64 insn); + void RAM(u64 insn); + void RED(u64 insn); + void RET(u64 insn); + void RRO_reg(u64 insn); + void RRO_cbuf(u64 insn); + void RRO_imm(u64 insn); + void RTT(u64 insn); + void S2R(u64 insn); + void SAM(u64 insn); + void SEL_reg(u64 insn); + void SEL_cbuf(u64 insn); + void SEL_imm(u64 insn); + void SETCRSPTR(u64 insn); + void SETLMEMBASE(u64 insn); + void SHF_l_reg(u64 insn); + void SHF_l_imm(u64 insn); + void SHF_r_reg(u64 insn); + void SHF_r_imm(u64 insn); + void SHFL(u64 insn); + void SHL_reg(u64 insn); + void SHL_cbuf(u64 insn); + void SHL_imm(u64 insn); + void SHR_reg(u64 insn); + void SHR_cbuf(u64 insn); + void SHR_imm(u64 insn); + void SSY(u64 insn); + void ST(u64 insn); + void STG(u64 insn); + void STL(u64 insn); + void STP(u64 insn); + void STS(u64 insn); + void SUATOM_cas(u64 insn); + void SULD(u64 insn); + void SURED(u64 insn); + void SUST(u64 insn); + void SYNC(u64 insn); + void TEX(u64 insn); + void TEX_b(u64 insn); + void TEXS(u64 insn); + void TLD(u64 insn); + void TLD_b(u64 insn); + void TLD4(u64 insn); + void TLD4_b(u64 insn); + void TLD4S(u64 insn); + void TLDS(u64 insn); + void TMML(u64 insn); + void TMML_b(u64 insn); + void TXA(u64 insn); + void TXD(u64 insn); + void TXD_b(u64 insn); + void TXQ(u64 insn); + void TXQ_b(u64 insn); + void VABSDIFF(u64 insn); + void VABSDIFF4(u64 insn); + void VADD(u64 insn); + void VMAD(u64 insn); + void VMNMX(u64 insn); + void VOTE(u64 insn); + void VOTE_vtg(u64 insn); + void VSET(u64 insn); + void VSETP(u64 insn); + void VSHL(u64 insn); + void VSHR(u64 insn); + void XMAD_reg(u64 insn); + void XMAD_rc(u64 insn); + void XMAD_cr(u64 insn); + void XMAD_imm(u64 insn); + + [[nodiscard]] IR::U32 X(IR::Reg reg); + void X(IR::Reg dest_reg, const IR::U32& value); + + [[nodiscard]] IR::U32 GetCbuf(u64 insn); + + [[nodiscard]] IR::U32 GetImm(u64 insn); + + void SetZFlag(const IR::U1& value); + void SetSFlag(const IR::U1& value); + void SetCFlag(const IR::U1& value); + void SetOFlag(const IR::U1& value); + + void ResetZero(); + void ResetSFlag(); + void ResetCFlag(); + void ResetOFlag(); +}; + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..23512db1a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class InterpolationMode : u64 { + Pass = 0, + Multiply = 1, + Constant = 2, + Sc = 3, +}; + +enum class SampleMode : u64 { + Default = 0, + Centroid = 1, + Offset = 2, +}; +} // Anonymous namespace + +void TranslatorVisitor::IPA(u64 insn) { + // IPA is the instruction used to read varyings from a fragment shader. + // gl_FragCoord is mapped to the gl_Position attribute. + // It yields unknown results when used outside of the fragment shader stage. + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 8, IR::Reg> multiplier; + BitField<30, 8, IR::Attribute> attribute; + BitField<38, 1, u64> idx; + BitField<51, 1, u64> sat; + BitField<52, 2, SampleMode> sample_mode; + BitField<54, 2, InterpolationMode> interpolation_mode; + } const ipa{insn}; + + // Indexed IPAs are used for indexed varyings. + // For example: + // + // in vec4 colors[4]; + // uniform int idx; + // void main() { + // gl_FragColor = colors[idx]; + // } + const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; + if (is_indexed) { + throw NotImplementedException("IPA.IDX"); + } + + const IR::Attribute attribute{ipa.attribute}; + IR::U32 value{ir.GetAttribute(attribute)}; + if (IR::IsGeneric(attribute)) { + // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; + const bool is_perspective{false}; + if (is_perspective) { + const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; + value = ir.FPMul(value, rcp_position_w); + } + } + + switch (ipa.interpolation_mode) { + case InterpolationMode::Pass: + break; + case InterpolationMode::Multiply: + value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); + break; + case InterpolationMode::Constant: + throw NotImplementedException("IPA.CONSTANT"); + case InterpolationMode::Sc: + throw NotImplementedException("IPA.SC"); + } + + // Saturated IPAs are generally generated out of clamped varyings. + // For example: clamp(some_varying, 0.0, 1.0) + const bool is_saturated{ipa.sat != 0}; + if (is_saturated) { + if (attribute == IR::Attribute::FrontFace) { + throw NotImplementedException("IPA.SAT on FrontFace"); + } + value = ir.FPSaturate(value); + } + + ir.SetReg(ipa.dest_reg, value); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..d8fd387cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -0,0 +1,90 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class StoreSize : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class StoreCache : u64 { + WB, // Cache write-back all coherent levels + CG, // Cache at global level + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory) +}; +} // Anonymous namespace + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<45, 1, u64> e; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (stg.e == 0) { + // STG without .E uses a 32-bit pointer, zero-extend it + return ir.ConvertU(64, X(stg.addr_reg)); + } + if (!IR::IsAligned(stg.addr_reg, 2)) { + throw NotImplementedException("Unaligned address register"); + } + // Pack two registers to build the 32-bit address + return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); + }()}; + + switch (stg.size) { + case StoreSize::U8: + ir.WriteGlobalU8(address, X(stg.data_reg)); + break; + case StoreSize::S8: + ir.WriteGlobalS8(address, X(stg.data_reg)); + break; + case StoreSize::U16: + ir.WriteGlobalU16(address, X(stg.data_reg)); + break; + case StoreSize::S16: + ir.WriteGlobalS16(address, X(stg.data_reg)); + break; + case StoreSize::B32: + ir.WriteGlobal32(address, X(stg.data_reg)); + break; + case StoreSize::B64: { + if (!IR::IsAligned(stg.data_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + ir.WriteGlobal64(address, vector); + break; + } + case StoreSize::B128: + if (!IR::IsAligned(stg.data_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), + X(stg.data_reg + 2), X(stg.data_reg + 3))}; + ir.WriteGlobal128(address, vector); + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..c907c1ffb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -0,0 +1,1105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { + +[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { + auto raw{IR::DumpBlock(block)}; + + Optimization::GetSetElimination(block); + Optimization::DeadCodeEliminationPass(block); + Optimization::IdentityRemovalPass(block); + auto dumped{IR::DumpBlock(block)}; + + fmt::print(stderr, "{}", dumped); +} + +[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { + throw NotImplementedException("Instruction {} is not implemented", opcode); +} + +void TranslatorVisitor::AL2P(u64) { + ThrowNotImplemented(Opcode::AL2P); +} + +void TranslatorVisitor::ALD(u64) { + ThrowNotImplemented(Opcode::ALD); +} + +void TranslatorVisitor::AST(u64) { + ThrowNotImplemented(Opcode::AST); +} + +void TranslatorVisitor::ATOM_cas(u64) { + ThrowNotImplemented(Opcode::ATOM_cas); +} + +void TranslatorVisitor::ATOM(u64) { + ThrowNotImplemented(Opcode::ATOM); +} + +void TranslatorVisitor::ATOMS_cas(u64) { + ThrowNotImplemented(Opcode::ATOMS_cas); +} + +void TranslatorVisitor::ATOMS(u64) { + ThrowNotImplemented(Opcode::ATOMS); +} + +void TranslatorVisitor::B2R(u64) { + ThrowNotImplemented(Opcode::B2R); +} + +void TranslatorVisitor::BAR(u64) { + ThrowNotImplemented(Opcode::BAR); +} + +void TranslatorVisitor::BFE_reg(u64) { + ThrowNotImplemented(Opcode::BFE_reg); +} + +void TranslatorVisitor::BFE_cbuf(u64) { + ThrowNotImplemented(Opcode::BFE_cbuf); +} + +void TranslatorVisitor::BFE_imm(u64) { + ThrowNotImplemented(Opcode::BFE_imm); +} + +void TranslatorVisitor::BFI_reg(u64) { + ThrowNotImplemented(Opcode::BFI_reg); +} + +void TranslatorVisitor::BFI_rc(u64) { + ThrowNotImplemented(Opcode::BFI_rc); +} + +void TranslatorVisitor::BFI_cr(u64) { + ThrowNotImplemented(Opcode::BFI_cr); +} + +void TranslatorVisitor::BFI_imm(u64) { + ThrowNotImplemented(Opcode::BFI_imm); +} + +void TranslatorVisitor::BPT(u64) { + ThrowNotImplemented(Opcode::BPT); +} + +void TranslatorVisitor::BRA(u64) { + ThrowNotImplemented(Opcode::BRA); +} + +void TranslatorVisitor::BRK(u64) { + ThrowNotImplemented(Opcode::BRK); +} + +void TranslatorVisitor::BRX(u64) { + ThrowNotImplemented(Opcode::BRX); +} + +void TranslatorVisitor::CAL(u64) { + ThrowNotImplemented(Opcode::CAL); +} + +void TranslatorVisitor::CCTL(u64) { + ThrowNotImplemented(Opcode::CCTL); +} + +void TranslatorVisitor::CCTLL(u64) { + ThrowNotImplemented(Opcode::CCTLL); +} + +void TranslatorVisitor::CONT(u64) { + ThrowNotImplemented(Opcode::CONT); +} + +void TranslatorVisitor::CS2R(u64) { + ThrowNotImplemented(Opcode::CS2R); +} + +void TranslatorVisitor::CSET(u64) { + ThrowNotImplemented(Opcode::CSET); +} + +void TranslatorVisitor::CSETP(u64) { + ThrowNotImplemented(Opcode::CSETP); +} + +void TranslatorVisitor::DADD_reg(u64) { + ThrowNotImplemented(Opcode::DADD_reg); +} + +void TranslatorVisitor::DADD_cbuf(u64) { + ThrowNotImplemented(Opcode::DADD_cbuf); +} + +void TranslatorVisitor::DADD_imm(u64) { + ThrowNotImplemented(Opcode::DADD_imm); +} + +void TranslatorVisitor::DEPBAR(u64) { + ThrowNotImplemented(Opcode::DEPBAR); +} + +void TranslatorVisitor::DFMA_reg(u64) { + ThrowNotImplemented(Opcode::DFMA_reg); +} + +void TranslatorVisitor::DFMA_rc(u64) { + ThrowNotImplemented(Opcode::DFMA_rc); +} + +void TranslatorVisitor::DFMA_cr(u64) { + ThrowNotImplemented(Opcode::DFMA_cr); +} + +void TranslatorVisitor::DFMA_imm(u64) { + ThrowNotImplemented(Opcode::DFMA_imm); +} + +void TranslatorVisitor::DMNMX_reg(u64) { + ThrowNotImplemented(Opcode::DMNMX_reg); +} + +void TranslatorVisitor::DMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::DMNMX_cbuf); +} + +void TranslatorVisitor::DMNMX_imm(u64) { + ThrowNotImplemented(Opcode::DMNMX_imm); +} + +void TranslatorVisitor::DMUL_reg(u64) { + ThrowNotImplemented(Opcode::DMUL_reg); +} + +void TranslatorVisitor::DMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::DMUL_cbuf); +} + +void TranslatorVisitor::DMUL_imm(u64) { + ThrowNotImplemented(Opcode::DMUL_imm); +} + +void TranslatorVisitor::DSET_reg(u64) { + ThrowNotImplemented(Opcode::DSET_reg); +} + +void TranslatorVisitor::DSET_cbuf(u64) { + ThrowNotImplemented(Opcode::DSET_cbuf); +} + +void TranslatorVisitor::DSET_imm(u64) { + ThrowNotImplemented(Opcode::DSET_imm); +} + +void TranslatorVisitor::DSETP_reg(u64) { + ThrowNotImplemented(Opcode::DSETP_reg); +} + +void TranslatorVisitor::DSETP_cbuf(u64) { + ThrowNotImplemented(Opcode::DSETP_cbuf); +} + +void TranslatorVisitor::DSETP_imm(u64) { + ThrowNotImplemented(Opcode::DSETP_imm); +} + +void TranslatorVisitor::EXIT(u64) { + throw LogicError("Visting EXIT instruction"); +} + +void TranslatorVisitor::F2F_reg(u64) { + ThrowNotImplemented(Opcode::F2F_reg); +} + +void TranslatorVisitor::F2F_cbuf(u64) { + ThrowNotImplemented(Opcode::F2F_cbuf); +} + +void TranslatorVisitor::F2F_imm(u64) { + ThrowNotImplemented(Opcode::F2F_imm); +} + +void TranslatorVisitor::FADD_reg(u64) { + ThrowNotImplemented(Opcode::FADD_reg); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + ThrowNotImplemented(Opcode::FADD_cbuf); +} + +void TranslatorVisitor::FADD_imm(u64) { + ThrowNotImplemented(Opcode::FADD_imm); +} + +void TranslatorVisitor::FADD32I(u64) { + ThrowNotImplemented(Opcode::FADD32I); +} + +void TranslatorVisitor::FCHK_reg(u64) { + ThrowNotImplemented(Opcode::FCHK_reg); +} + +void TranslatorVisitor::FCHK_cbuf(u64) { + ThrowNotImplemented(Opcode::FCHK_cbuf); +} + +void TranslatorVisitor::FCHK_imm(u64) { + ThrowNotImplemented(Opcode::FCHK_imm); +} + +void TranslatorVisitor::FCMP_reg(u64) { + ThrowNotImplemented(Opcode::FCMP_reg); +} + +void TranslatorVisitor::FCMP_rc(u64) { + ThrowNotImplemented(Opcode::FCMP_rc); +} + +void TranslatorVisitor::FCMP_cr(u64) { + ThrowNotImplemented(Opcode::FCMP_cr); +} + +void TranslatorVisitor::FCMP_imm(u64) { + ThrowNotImplemented(Opcode::FCMP_imm); +} + +void TranslatorVisitor::FFMA_reg(u64) { + ThrowNotImplemented(Opcode::FFMA_reg); +} + +void TranslatorVisitor::FFMA_rc(u64) { + ThrowNotImplemented(Opcode::FFMA_rc); +} + +void TranslatorVisitor::FFMA_cr(u64) { + ThrowNotImplemented(Opcode::FFMA_cr); +} + +void TranslatorVisitor::FFMA_imm(u64) { + ThrowNotImplemented(Opcode::FFMA_imm); +} + +void TranslatorVisitor::FFMA32I(u64) { + ThrowNotImplemented(Opcode::FFMA32I); +} + +void TranslatorVisitor::FLO_reg(u64) { + ThrowNotImplemented(Opcode::FLO_reg); +} + +void TranslatorVisitor::FLO_cbuf(u64) { + ThrowNotImplemented(Opcode::FLO_cbuf); +} + +void TranslatorVisitor::FLO_imm(u64) { + ThrowNotImplemented(Opcode::FLO_imm); +} + +void TranslatorVisitor::FMNMX_reg(u64) { + ThrowNotImplemented(Opcode::FMNMX_reg); +} + +void TranslatorVisitor::FMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::FMNMX_cbuf); +} + +void TranslatorVisitor::FMNMX_imm(u64) { + ThrowNotImplemented(Opcode::FMNMX_imm); +} + +void TranslatorVisitor::FMUL_reg(u64) { + ThrowNotImplemented(Opcode::FMUL_reg); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::FMUL_cbuf); +} + +void TranslatorVisitor::FMUL_imm(u64) { + ThrowNotImplemented(Opcode::FMUL_imm); +} + +void TranslatorVisitor::FMUL32I(u64) { + ThrowNotImplemented(Opcode::FMUL32I); +} + +void TranslatorVisitor::FSET_reg(u64) { + ThrowNotImplemented(Opcode::FSET_reg); +} + +void TranslatorVisitor::FSET_cbuf(u64) { + ThrowNotImplemented(Opcode::FSET_cbuf); +} + +void TranslatorVisitor::FSET_imm(u64) { + ThrowNotImplemented(Opcode::FSET_imm); +} + +void TranslatorVisitor::FSETP_reg(u64) { + ThrowNotImplemented(Opcode::FSETP_reg); +} + +void TranslatorVisitor::FSETP_cbuf(u64) { + ThrowNotImplemented(Opcode::FSETP_cbuf); +} + +void TranslatorVisitor::FSETP_imm(u64) { + ThrowNotImplemented(Opcode::FSETP_imm); +} + +void TranslatorVisitor::FSWZADD(u64) { + ThrowNotImplemented(Opcode::FSWZADD); +} + +void TranslatorVisitor::GETCRSPTR(u64) { + ThrowNotImplemented(Opcode::GETCRSPTR); +} + +void TranslatorVisitor::GETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::GETLMEMBASE); +} + +void TranslatorVisitor::HADD2_reg(u64) { + ThrowNotImplemented(Opcode::HADD2_reg); +} + +void TranslatorVisitor::HADD2_cbuf(u64) { + ThrowNotImplemented(Opcode::HADD2_cbuf); +} + +void TranslatorVisitor::HADD2_imm(u64) { + ThrowNotImplemented(Opcode::HADD2_imm); +} + +void TranslatorVisitor::HADD2_32I(u64) { + ThrowNotImplemented(Opcode::HADD2_32I); +} + +void TranslatorVisitor::HFMA2_reg(u64) { + ThrowNotImplemented(Opcode::HFMA2_reg); +} + +void TranslatorVisitor::HFMA2_rc(u64) { + ThrowNotImplemented(Opcode::HFMA2_rc); +} + +void TranslatorVisitor::HFMA2_cr(u64) { + ThrowNotImplemented(Opcode::HFMA2_cr); +} + +void TranslatorVisitor::HFMA2_imm(u64) { + ThrowNotImplemented(Opcode::HFMA2_imm); +} + +void TranslatorVisitor::HFMA2_32I(u64) { + ThrowNotImplemented(Opcode::HFMA2_32I); +} + +void TranslatorVisitor::HMUL2_reg(u64) { + ThrowNotImplemented(Opcode::HMUL2_reg); +} + +void TranslatorVisitor::HMUL2_cbuf(u64) { + ThrowNotImplemented(Opcode::HMUL2_cbuf); +} + +void TranslatorVisitor::HMUL2_imm(u64) { + ThrowNotImplemented(Opcode::HMUL2_imm); +} + +void TranslatorVisitor::HMUL2_32I(u64) { + ThrowNotImplemented(Opcode::HMUL2_32I); +} + +void TranslatorVisitor::HSET2_reg(u64) { + ThrowNotImplemented(Opcode::HSET2_reg); +} + +void TranslatorVisitor::HSET2_cbuf(u64) { + ThrowNotImplemented(Opcode::HSET2_cbuf); +} + +void TranslatorVisitor::HSET2_imm(u64) { + ThrowNotImplemented(Opcode::HSET2_imm); +} + +void TranslatorVisitor::HSETP2_reg(u64) { + ThrowNotImplemented(Opcode::HSETP2_reg); +} + +void TranslatorVisitor::HSETP2_cbuf(u64) { + ThrowNotImplemented(Opcode::HSETP2_cbuf); +} + +void TranslatorVisitor::HSETP2_imm(u64) { + ThrowNotImplemented(Opcode::HSETP2_imm); +} + +void TranslatorVisitor::I2F_reg(u64) { + ThrowNotImplemented(Opcode::I2F_reg); +} + +void TranslatorVisitor::I2F_cbuf(u64) { + ThrowNotImplemented(Opcode::I2F_cbuf); +} + +void TranslatorVisitor::I2F_imm(u64) { + ThrowNotImplemented(Opcode::I2F_imm); +} + +void TranslatorVisitor::I2I_reg(u64) { + ThrowNotImplemented(Opcode::I2I_reg); +} + +void TranslatorVisitor::I2I_cbuf(u64) { + ThrowNotImplemented(Opcode::I2I_cbuf); +} + +void TranslatorVisitor::I2I_imm(u64) { + ThrowNotImplemented(Opcode::I2I_imm); +} + +void TranslatorVisitor::IADD_reg(u64) { + ThrowNotImplemented(Opcode::IADD_reg); +} + +void TranslatorVisitor::IADD_cbuf(u64) { + ThrowNotImplemented(Opcode::IADD_cbuf); +} + +void TranslatorVisitor::IADD_imm(u64) { + ThrowNotImplemented(Opcode::IADD_imm); +} + +void TranslatorVisitor::IADD3_reg(u64) { + ThrowNotImplemented(Opcode::IADD3_reg); +} + +void TranslatorVisitor::IADD3_cbuf(u64) { + ThrowNotImplemented(Opcode::IADD3_cbuf); +} + +void TranslatorVisitor::IADD3_imm(u64) { + ThrowNotImplemented(Opcode::IADD3_imm); +} + +void TranslatorVisitor::IADD32I(u64) { + ThrowNotImplemented(Opcode::IADD32I); +} + +void TranslatorVisitor::ICMP_reg(u64) { + ThrowNotImplemented(Opcode::ICMP_reg); +} + +void TranslatorVisitor::ICMP_rc(u64) { + ThrowNotImplemented(Opcode::ICMP_rc); +} + +void TranslatorVisitor::ICMP_cr(u64) { + ThrowNotImplemented(Opcode::ICMP_cr); +} + +void TranslatorVisitor::ICMP_imm(u64) { + ThrowNotImplemented(Opcode::ICMP_imm); +} + +void TranslatorVisitor::IDE(u64) { + ThrowNotImplemented(Opcode::IDE); +} + +void TranslatorVisitor::IDP_reg(u64) { + ThrowNotImplemented(Opcode::IDP_reg); +} + +void TranslatorVisitor::IDP_imm(u64) { + ThrowNotImplemented(Opcode::IDP_imm); +} + +void TranslatorVisitor::IMAD_reg(u64) { + ThrowNotImplemented(Opcode::IMAD_reg); +} + +void TranslatorVisitor::IMAD_rc(u64) { + ThrowNotImplemented(Opcode::IMAD_rc); +} + +void TranslatorVisitor::IMAD_cr(u64) { + ThrowNotImplemented(Opcode::IMAD_cr); +} + +void TranslatorVisitor::IMAD_imm(u64) { + ThrowNotImplemented(Opcode::IMAD_imm); +} + +void TranslatorVisitor::IMAD32I(u64) { + ThrowNotImplemented(Opcode::IMAD32I); +} + +void TranslatorVisitor::IMADSP_reg(u64) { + ThrowNotImplemented(Opcode::IMADSP_reg); +} + +void TranslatorVisitor::IMADSP_rc(u64) { + ThrowNotImplemented(Opcode::IMADSP_rc); +} + +void TranslatorVisitor::IMADSP_cr(u64) { + ThrowNotImplemented(Opcode::IMADSP_cr); +} + +void TranslatorVisitor::IMADSP_imm(u64) { + ThrowNotImplemented(Opcode::IMADSP_imm); +} + +void TranslatorVisitor::IMNMX_reg(u64) { + ThrowNotImplemented(Opcode::IMNMX_reg); +} + +void TranslatorVisitor::IMNMX_cbuf(u64) { + ThrowNotImplemented(Opcode::IMNMX_cbuf); +} + +void TranslatorVisitor::IMNMX_imm(u64) { + ThrowNotImplemented(Opcode::IMNMX_imm); +} + +void TranslatorVisitor::IMUL_reg(u64) { + ThrowNotImplemented(Opcode::IMUL_reg); +} + +void TranslatorVisitor::IMUL_cbuf(u64) { + ThrowNotImplemented(Opcode::IMUL_cbuf); +} + +void TranslatorVisitor::IMUL_imm(u64) { + ThrowNotImplemented(Opcode::IMUL_imm); +} + +void TranslatorVisitor::IMUL32I(u64) { + ThrowNotImplemented(Opcode::IMUL32I); +} + +void TranslatorVisitor::ISBERD(u64) { + ThrowNotImplemented(Opcode::ISBERD); +} + +void TranslatorVisitor::ISCADD_reg(u64) { + ThrowNotImplemented(Opcode::ISCADD_reg); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + ThrowNotImplemented(Opcode::ISCADD_cbuf); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + ThrowNotImplemented(Opcode::ISCADD_imm); +} + +void TranslatorVisitor::ISCADD32I(u64) { + ThrowNotImplemented(Opcode::ISCADD32I); +} + +void TranslatorVisitor::ISET_reg(u64) { + ThrowNotImplemented(Opcode::ISET_reg); +} + +void TranslatorVisitor::ISET_cbuf(u64) { + ThrowNotImplemented(Opcode::ISET_cbuf); +} + +void TranslatorVisitor::ISET_imm(u64) { + ThrowNotImplemented(Opcode::ISET_imm); +} + +void TranslatorVisitor::ISETP_reg(u64) { + ThrowNotImplemented(Opcode::ISETP_reg); +} + +void TranslatorVisitor::ISETP_cbuf(u64) { + ThrowNotImplemented(Opcode::ISETP_cbuf); +} + +void TranslatorVisitor::ISETP_imm(u64) { + ThrowNotImplemented(Opcode::ISETP_imm); +} + +void TranslatorVisitor::JCAL(u64) { + ThrowNotImplemented(Opcode::JCAL); +} + +void TranslatorVisitor::JMP(u64) { + ThrowNotImplemented(Opcode::JMP); +} + +void TranslatorVisitor::JMX(u64) { + ThrowNotImplemented(Opcode::JMX); +} + +void TranslatorVisitor::KIL(u64) { + ThrowNotImplemented(Opcode::KIL); +} + +void TranslatorVisitor::LD(u64) { + ThrowNotImplemented(Opcode::LD); +} + +void TranslatorVisitor::LDC(u64) { + ThrowNotImplemented(Opcode::LDC); +} + +void TranslatorVisitor::LDG(u64) { + ThrowNotImplemented(Opcode::LDG); +} + +void TranslatorVisitor::LDL(u64) { + ThrowNotImplemented(Opcode::LDL); +} + +void TranslatorVisitor::LDS(u64) { + ThrowNotImplemented(Opcode::LDS); +} + +void TranslatorVisitor::LEA_hi_reg(u64) { + ThrowNotImplemented(Opcode::LEA_hi_reg); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64) { + ThrowNotImplemented(Opcode::LEA_hi_cbuf); +} + +void TranslatorVisitor::LEA_lo_reg(u64) { + ThrowNotImplemented(Opcode::LEA_lo_reg); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64) { + ThrowNotImplemented(Opcode::LEA_lo_cbuf); +} + +void TranslatorVisitor::LEA_lo_imm(u64) { + ThrowNotImplemented(Opcode::LEA_lo_imm); +} + +void TranslatorVisitor::LEPC(u64) { + ThrowNotImplemented(Opcode::LEPC); +} + +void TranslatorVisitor::LONGJMP(u64) { + ThrowNotImplemented(Opcode::LONGJMP); +} + +void TranslatorVisitor::LOP_reg(u64) { + ThrowNotImplemented(Opcode::LOP_reg); +} + +void TranslatorVisitor::LOP_cbuf(u64) { + ThrowNotImplemented(Opcode::LOP_cbuf); +} + +void TranslatorVisitor::LOP_imm(u64) { + ThrowNotImplemented(Opcode::LOP_imm); +} + +void TranslatorVisitor::LOP3_reg(u64) { + ThrowNotImplemented(Opcode::LOP3_reg); +} + +void TranslatorVisitor::LOP3_cbuf(u64) { + ThrowNotImplemented(Opcode::LOP3_cbuf); +} + +void TranslatorVisitor::LOP3_imm(u64) { + ThrowNotImplemented(Opcode::LOP3_imm); +} + +void TranslatorVisitor::LOP32I(u64) { + ThrowNotImplemented(Opcode::LOP32I); +} + +void TranslatorVisitor::MEMBAR(u64) { + ThrowNotImplemented(Opcode::MEMBAR); +} + +void TranslatorVisitor::MOV32I(u64) { + ThrowNotImplemented(Opcode::MOV32I); +} + +void TranslatorVisitor::NOP(u64) { + ThrowNotImplemented(Opcode::NOP); +} + +void TranslatorVisitor::OUT_reg(u64) { + ThrowNotImplemented(Opcode::OUT_reg); +} + +void TranslatorVisitor::OUT_cbuf(u64) { + ThrowNotImplemented(Opcode::OUT_cbuf); +} + +void TranslatorVisitor::OUT_imm(u64) { + ThrowNotImplemented(Opcode::OUT_imm); +} + +void TranslatorVisitor::P2R_reg(u64) { + ThrowNotImplemented(Opcode::P2R_reg); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + ThrowNotImplemented(Opcode::P2R_cbuf); +} + +void TranslatorVisitor::P2R_imm(u64) { + ThrowNotImplemented(Opcode::P2R_imm); +} + +void TranslatorVisitor::PBK(u64) { + // PBK is a no-op +} + +void TranslatorVisitor::PCNT(u64) { + ThrowNotImplemented(Opcode::PCNT); +} + +void TranslatorVisitor::PEXIT(u64) { + ThrowNotImplemented(Opcode::PEXIT); +} + +void TranslatorVisitor::PIXLD(u64) { + ThrowNotImplemented(Opcode::PIXLD); +} + +void TranslatorVisitor::PLONGJMP(u64) { + ThrowNotImplemented(Opcode::PLONGJMP); +} + +void TranslatorVisitor::POPC_reg(u64) { + ThrowNotImplemented(Opcode::POPC_reg); +} + +void TranslatorVisitor::POPC_cbuf(u64) { + ThrowNotImplemented(Opcode::POPC_cbuf); +} + +void TranslatorVisitor::POPC_imm(u64) { + ThrowNotImplemented(Opcode::POPC_imm); +} + +void TranslatorVisitor::PRET(u64) { + ThrowNotImplemented(Opcode::PRET); +} + +void TranslatorVisitor::PRMT_reg(u64) { + ThrowNotImplemented(Opcode::PRMT_reg); +} + +void TranslatorVisitor::PRMT_rc(u64) { + ThrowNotImplemented(Opcode::PRMT_rc); +} + +void TranslatorVisitor::PRMT_cr(u64) { + ThrowNotImplemented(Opcode::PRMT_cr); +} + +void TranslatorVisitor::PRMT_imm(u64) { + ThrowNotImplemented(Opcode::PRMT_imm); +} + +void TranslatorVisitor::PSET(u64) { + ThrowNotImplemented(Opcode::PSET); +} + +void TranslatorVisitor::PSETP(u64) { + ThrowNotImplemented(Opcode::PSETP); +} + +void TranslatorVisitor::R2B(u64) { + ThrowNotImplemented(Opcode::R2B); +} + +void TranslatorVisitor::R2P_reg(u64) { + ThrowNotImplemented(Opcode::R2P_reg); +} + +void TranslatorVisitor::R2P_cbuf(u64) { + ThrowNotImplemented(Opcode::R2P_cbuf); +} + +void TranslatorVisitor::R2P_imm(u64) { + ThrowNotImplemented(Opcode::R2P_imm); +} + +void TranslatorVisitor::RAM(u64) { + ThrowNotImplemented(Opcode::RAM); +} + +void TranslatorVisitor::RED(u64) { + ThrowNotImplemented(Opcode::RED); +} + +void TranslatorVisitor::RET(u64) { + ThrowNotImplemented(Opcode::RET); +} + +void TranslatorVisitor::RRO_reg(u64) { + ThrowNotImplemented(Opcode::RRO_reg); +} + +void TranslatorVisitor::RRO_cbuf(u64) { + ThrowNotImplemented(Opcode::RRO_cbuf); +} + +void TranslatorVisitor::RRO_imm(u64) { + ThrowNotImplemented(Opcode::RRO_imm); +} + +void TranslatorVisitor::RTT(u64) { + ThrowNotImplemented(Opcode::RTT); +} + +void TranslatorVisitor::S2R(u64) { + ThrowNotImplemented(Opcode::S2R); +} + +void TranslatorVisitor::SAM(u64) { + ThrowNotImplemented(Opcode::SAM); +} + +void TranslatorVisitor::SEL_reg(u64) { + ThrowNotImplemented(Opcode::SEL_reg); +} + +void TranslatorVisitor::SEL_cbuf(u64) { + ThrowNotImplemented(Opcode::SEL_cbuf); +} + +void TranslatorVisitor::SEL_imm(u64) { + ThrowNotImplemented(Opcode::SEL_imm); +} + +void TranslatorVisitor::SETCRSPTR(u64) { + ThrowNotImplemented(Opcode::SETCRSPTR); +} + +void TranslatorVisitor::SETLMEMBASE(u64) { + ThrowNotImplemented(Opcode::SETLMEMBASE); +} + +void TranslatorVisitor::SHF_l_reg(u64) { + ThrowNotImplemented(Opcode::SHF_l_reg); +} + +void TranslatorVisitor::SHF_l_imm(u64) { + ThrowNotImplemented(Opcode::SHF_l_imm); +} + +void TranslatorVisitor::SHF_r_reg(u64) { + ThrowNotImplemented(Opcode::SHF_r_reg); +} + +void TranslatorVisitor::SHF_r_imm(u64) { + ThrowNotImplemented(Opcode::SHF_r_imm); +} + +void TranslatorVisitor::SHFL(u64) { + ThrowNotImplemented(Opcode::SHFL); +} + +void TranslatorVisitor::SHL_reg(u64) { + ThrowNotImplemented(Opcode::SHL_reg); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + ThrowNotImplemented(Opcode::SHL_cbuf); +} + +void TranslatorVisitor::SHL_imm(u64) { + ThrowNotImplemented(Opcode::SHL_imm); +} + +void TranslatorVisitor::SHR_reg(u64) { + ThrowNotImplemented(Opcode::SHR_reg); +} + +void TranslatorVisitor::SHR_cbuf(u64) { + ThrowNotImplemented(Opcode::SHR_cbuf); +} + +void TranslatorVisitor::SHR_imm(u64) { + ThrowNotImplemented(Opcode::SHR_imm); +} + +void TranslatorVisitor::SSY(u64) { + ThrowNotImplemented(Opcode::SSY); +} + +void TranslatorVisitor::ST(u64) { + ThrowNotImplemented(Opcode::ST); +} + +void TranslatorVisitor::STL(u64) { + ThrowNotImplemented(Opcode::STL); +} + +void TranslatorVisitor::STP(u64) { + ThrowNotImplemented(Opcode::STP); +} + +void TranslatorVisitor::STS(u64) { + ThrowNotImplemented(Opcode::STS); +} + +void TranslatorVisitor::SUATOM_cas(u64) { + ThrowNotImplemented(Opcode::SUATOM_cas); +} + +void TranslatorVisitor::SULD(u64) { + ThrowNotImplemented(Opcode::SULD); +} + +void TranslatorVisitor::SURED(u64) { + ThrowNotImplemented(Opcode::SURED); +} + +void TranslatorVisitor::SUST(u64) { + ThrowNotImplemented(Opcode::SUST); +} + +void TranslatorVisitor::SYNC(u64) { + ThrowNotImplemented(Opcode::SYNC); +} + +void TranslatorVisitor::TEX(u64) { + ThrowNotImplemented(Opcode::TEX); +} + +void TranslatorVisitor::TEX_b(u64) { + ThrowNotImplemented(Opcode::TEX_b); +} + +void TranslatorVisitor::TEXS(u64) { + ThrowNotImplemented(Opcode::TEXS); +} + +void TranslatorVisitor::TLD(u64) { + ThrowNotImplemented(Opcode::TLD); +} + +void TranslatorVisitor::TLD_b(u64) { + ThrowNotImplemented(Opcode::TLD_b); +} + +void TranslatorVisitor::TLD4(u64) { + ThrowNotImplemented(Opcode::TLD4); +} + +void TranslatorVisitor::TLD4_b(u64) { + ThrowNotImplemented(Opcode::TLD4_b); +} + +void TranslatorVisitor::TLD4S(u64) { + ThrowNotImplemented(Opcode::TLD4S); +} + +void TranslatorVisitor::TLDS(u64) { + ThrowNotImplemented(Opcode::TLDS); +} + +void TranslatorVisitor::TMML(u64) { + ThrowNotImplemented(Opcode::TMML); +} + +void TranslatorVisitor::TMML_b(u64) { + ThrowNotImplemented(Opcode::TMML_b); +} + +void TranslatorVisitor::TXA(u64) { + ThrowNotImplemented(Opcode::TXA); +} + +void TranslatorVisitor::TXD(u64) { + ThrowNotImplemented(Opcode::TXD); +} + +void TranslatorVisitor::TXD_b(u64) { + ThrowNotImplemented(Opcode::TXD_b); +} + +void TranslatorVisitor::TXQ(u64) { + ThrowNotImplemented(Opcode::TXQ); +} + +void TranslatorVisitor::TXQ_b(u64) { + ThrowNotImplemented(Opcode::TXQ_b); +} + +void TranslatorVisitor::VABSDIFF(u64) { + ThrowNotImplemented(Opcode::VABSDIFF); +} + +void TranslatorVisitor::VABSDIFF4(u64) { + ThrowNotImplemented(Opcode::VABSDIFF4); +} + +void TranslatorVisitor::VADD(u64) { + ThrowNotImplemented(Opcode::VADD); +} + +void TranslatorVisitor::VMAD(u64) { + ThrowNotImplemented(Opcode::VMAD); +} + +void TranslatorVisitor::VMNMX(u64) { + ThrowNotImplemented(Opcode::VMNMX); +} + +void TranslatorVisitor::VOTE(u64) { + ThrowNotImplemented(Opcode::VOTE); +} + +void TranslatorVisitor::VOTE_vtg(u64) { + ThrowNotImplemented(Opcode::VOTE_vtg); +} + +void TranslatorVisitor::VSET(u64) { + ThrowNotImplemented(Opcode::VSET); +} + +void TranslatorVisitor::VSETP(u64) { + ThrowNotImplemented(Opcode::VSETP); +} + +void TranslatorVisitor::VSHL(u64) { + ThrowNotImplemented(Opcode::VSHL); +} + +void TranslatorVisitor::VSHR(u64) { + ThrowNotImplemented(Opcode::VSHR); +} + +void TranslatorVisitor::XMAD_reg(u64) { + ThrowNotImplemented(Opcode::XMAD_reg); +} + +void TranslatorVisitor::XMAD_rc(u64) { + ThrowNotImplemented(Opcode::XMAD_rc); +} + +void TranslatorVisitor::XMAD_cr(u64) { + ThrowNotImplemented(Opcode::XMAD_cr); +} + +void TranslatorVisitor::XMAD_imm(u64) { + ThrowNotImplemented(Opcode::XMAD_imm); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp new file mode 100644 index 000000000..7fa35ba3a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +union MOV { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> src_reg; + BitField<39, 4, u64> mask; +}; + +void CheckMask(MOV mov) { + if (mov.mask != 0xf) { + throw NotImplementedException("Non-full move mask"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, X(mov.src_reg)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetImm(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..66a306745 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" + +namespace Shader::Maxwell { + +template +static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { + using MethodType = decltype(visitor_method); + if constexpr (std::is_invocable_r_v) { + (visitor.*visitor_method)(pc, insn); + } else if constexpr (std::is_invocable_r_v) { + (visitor.*visitor_method)(insn); + } else { + (visitor.*visitor_method)(); + } +} + +IR::Block Translate(Environment& env, const Flow::Block& flow_block) { + IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; + TranslatorVisitor visitor{env, block}; + + const Location pc_end{flow_block.end}; + Location pc{flow_block.begin}; + while (pc != pc_end) { + const u64 insn{env.ReadInstruction(pc.Offset())}; + const Opcode opcode{Decode(insn)}; + switch (opcode) { +#define INST(name, cute, mask) \ + case Opcode::name: \ + Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ + break; +#include "shader_recompiler/frontend/maxwell/maxwell.inc" +#undef OPCODE + default: + throw LogicError("Invalid opcode {}", opcode); + } + ++pc; + } + return block; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..788742dea --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 6c4cc0cd062fbbba5349da1108d3c23cb330ca8a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 2 Feb 2021 21:07:00 -0300 Subject: shader: SSA and dominance --- src/shader_recompiler/frontend/ir/basic_block.cpp | 51 +++++--- src/shader_recompiler/frontend/ir/basic_block.h | 20 +++- src/shader_recompiler/frontend/ir/function.cpp | 5 + src/shader_recompiler/frontend/ir/function.h | 25 ++++ .../frontend/ir/microinstruction.cpp | 22 ++++ .../frontend/ir/microinstruction.h | 10 ++ src/shader_recompiler/frontend/ir/opcode.inc | 8 ++ src/shader_recompiler/frontend/ir/pred.h | 7 ++ src/shader_recompiler/frontend/ir/reg.h | 9 +- src/shader_recompiler/frontend/ir/value.cpp | 37 ++++++ src/shader_recompiler/frontend/ir/value.h | 3 + .../frontend/maxwell/control_flow.cpp | 130 ++++++++++++++++++++- .../frontend/maxwell/control_flow.h | 44 ++++++- src/shader_recompiler/frontend/maxwell/program.cpp | 75 +++++++----- src/shader_recompiler/frontend/maxwell/program.h | 11 +- .../frontend/maxwell/termination_code.cpp | 7 ++ .../frontend/maxwell/termination_code.h | 1 + .../frontend/maxwell/translate/impl/impl.h | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 6 +- 19 files changed, 401 insertions(+), 74 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/function.cpp create mode 100644 src/shader_recompiler/frontend/ir/function.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 0406726ad..e795618fc 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -37,6 +37,10 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } +void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { + imm_predecessors.push_back(immediate_predecessor); +} + u32 Block::LocationBegin() const noexcept { return location_begin; } @@ -53,6 +57,18 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +std::span Block::ImmediatePredecessors() const noexcept { + return imm_predecessors; +} + +static std::string BlockToIndex(const std::map& block_to_index, + Block* block) { + if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { + return fmt::format("{{Block ${}}}", it->second); + } + return fmt::format("$", reinterpret_cast(block)); +} + static std::string ArgToIndex(const std::map& block_to_index, const std::map& inst_to_index, const Value& arg) { @@ -60,10 +76,7 @@ static std::string ArgToIndex(const std::map& block_to_ind return ""; } if (arg.IsLabel()) { - if (const auto it{block_to_index.find(arg.Label())}; it != block_to_index.end()) { - return fmt::format("{{Block ${}}}", it->second); - } - return fmt::format("$", reinterpret_cast(arg.Label())); + return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { @@ -115,16 +128,26 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); - - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("", actual_type, expected_type); + if (op == Opcode::Phi) { + size_t val_index{0}; + for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { + ret += val_index != 0 ? ", " : " "; + ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), + BlockToIndex(block_to_index, phi_block)); + ++val_index; + } + } else { + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + ret += arg_index != 0 ? ", " : " "; + ret += ArgToIndex(block_to_index, inst_to_index, arg); + + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3ed2eb957..4b6b80c4b 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include @@ -36,7 +38,11 @@ public: void AppendNewInst(Opcode op, std::initializer_list args); /// Prepends a new instruction to this basic block before the insertion point. - iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args); + iterator PrependNewInst(iterator insertion_point, Opcode op, + std::initializer_list args = {}); + + /// Adds a new immediate predecessor to the basic block. + void AddImmediatePredecessor(IR::Block* immediate_predecessor); /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; @@ -44,9 +50,12 @@ public: [[nodiscard]] u32 LocationEnd() const noexcept; /// Gets a mutable reference to the instruction list for this basic block. - InstructionList& Instructions() noexcept; + [[nodiscard]] InstructionList& Instructions() noexcept; /// Gets an immutable reference to the instruction list for this basic block. - const InstructionList& Instructions() const noexcept; + [[nodiscard]] const InstructionList& Instructions() const noexcept; + + /// Gets an immutable span to the immediate predecessors. + [[nodiscard]] std::span ImmediatePredecessors() const noexcept; [[nodiscard]] bool empty() const { return instructions.empty(); @@ -115,13 +124,16 @@ private: /// End location of this block u32 location_end; - /// List of instructions in this block. + /// List of instructions in this block InstructionList instructions; /// Memory pool for instruction list boost::fast_pool_allocator instruction_alloc_pool; + + /// Block immediate predecessors + std::vector imm_predecessors; }; [[nodiscard]] std::string DumpBlock(const Block& block); diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp new file mode 100644 index 000000000..d1fc9461d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.cpp @@ -0,0 +1,5 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h new file mode 100644 index 000000000..2d4dc5b98 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/function.h @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +struct Function { + struct InplaceDelete { + void operator()(IR::Block* block) const noexcept { + std::destroy_at(block); + } + }; + using UniqueBlock = std::unique_ptr; + + std::vector blocks; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 553fec3b7..ecf76e23d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -30,6 +30,11 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) bool Inst::MayHaveSideEffects() const noexcept { switch (op) { + case Opcode::Branch: + case Opcode::BranchConditional: + case Opcode::Exit: + case Opcode::Return: + case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: @@ -113,6 +118,17 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } +std::span> Inst::PhiOperands() const noexcept { + return phi_operands; +} + +void Inst::AddPhiOperand(Block* predecessor, const Value& value) { + if (!value.IsImmediate()) { + Use(value); + } + phi_operands.emplace_back(predecessor, value); +} + void Inst::Invalidate() { ClearArgs(); op = Opcode::Void; @@ -125,6 +141,12 @@ void Inst::ClearArgs() { } value = {}; } + for (auto& [phi_block, phi_op] : phi_operands) { + if (!phi_op.IsImmediate()) { + UndoUse(phi_op); + } + } + phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 43460b950..7f1ed6710 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include @@ -15,6 +17,8 @@ namespace Shader::IR { +class Block; + constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { @@ -59,6 +63,11 @@ public: /// Set the value of a given argument index. void SetArg(size_t index, Value value); + /// Get an immutable span to the phi operands. + [[nodiscard]] std::span> PhiOperands() const noexcept; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + void Invalidate(); void ClearArgs(); @@ -76,6 +85,7 @@ private: Inst* carry_inst{}; Inst* overflow_inst{}; Inst* zsco_inst{}; + std::vector> phi_operands; u64 flags{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 371064bf3..40759e96a 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -5,6 +5,7 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) @@ -35,6 +36,13 @@ OPCODE(SetSFlag, Void, U1, OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +// Undefined +OPCODE(Undef1, U1, ) +OPCODE(Undef8, U8, ) +OPCODE(Undef16, U16, ) +OPCODE(Undef32, U32, ) +OPCODE(Undef64, U64, ) + // Memory operations OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index 37cc53006..daf23193f 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -10,6 +10,13 @@ namespace Shader::IR { enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +constexpr size_t NUM_USER_PREDS = 6; +constexpr size_t NUM_PREDS = 7; + +[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { + return static_cast(pred); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 316fc4be8..771094eb9 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -271,6 +271,9 @@ enum class Reg : u64 { }; static_assert(static_cast(Reg::RZ) == 255); +constexpr size_t NUM_USER_REGS = 255; +constexpr size_t NUM_REGS = 256; + [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { if (reg == Reg::RZ) { // Adding or subtracting registers from RZ yields RZ @@ -290,8 +293,12 @@ static_assert(static_cast(Reg::RZ) == 255); return reg + (-num); } +[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { + return static_cast(reg); +} + [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { - return (static_cast(reg) / align) * align == static_cast(reg); + return (RegIndex(reg) / align) * align == RegIndex(reg); } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 7b5b35d6c..1e974e88c 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -115,6 +115,43 @@ u64 Value::U64() const { return imm_u64; } +bool Value::operator==(const Value& other) const { + if (type != other.type) { + return false; + } + switch (type) { + case Type::Void: + return true; + case Type::Opaque: + return inst == other.inst; + case Type::Label: + return label == other.label; + case Type::Reg: + return reg == other.reg; + case Type::Pred: + return pred == other.pred; + case Type::Attribute: + return attribute == other.attribute; + case Type::U1: + return imm_u1 == other.imm_u1; + case Type::U8: + return imm_u8 == other.imm_u8; + case Type::U16: + return imm_u16 == other.imm_u16; + case Type::U32: + return imm_u32 == other.imm_u32; + case Type::U64: + return imm_u64 == other.imm_u64; + case Type::ZSCO: + throw NotImplementedException("ZSCO comparison"); + } + throw LogicError("Invalid type {}", type); +} + +bool Value::operator!=(const Value& other) const { + return !operator==(other); +} + void Value::ValidateAccess(IR::Type expected) const { if (type != expected) { throw LogicError("Reading {} out of {}", expected, type); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 664dacf9d..368119921 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -48,6 +48,9 @@ public: [[nodiscard]] u32 U32() const; [[nodiscard]] u64 U64() const; + [[nodiscard]] bool operator==(const Value& other) const; + [[nodiscard]] bool operator!=(const Value& other) const; + private: void ValidateAccess(IR::Type expected) const; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index fc4dba826..21ee98137 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -36,6 +36,7 @@ static std::array Split(Block&& block, Location pc, BlockId new_id) { .cond{true}, .branch_true{new_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }, Block{ .begin{pc}, @@ -46,6 +47,7 @@ static std::array Split(Block&& block, Location pc, BlockId new_id) { .cond{block.cond}, .branch_true{block.branch_true}, .branch_false{block.branch_false}, + .imm_predecessors{}, }, }; } @@ -108,7 +110,7 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string Name(const Block& block) { +static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.id); } else { @@ -154,13 +156,127 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(Location start_address) - : entrypoint{start_address}, labels{Label{ + : entrypoint{start_address}, labels{{ .address{start_address}, .block_id{0}, .stack{}, }} {} +void Function::BuildBlocksMap() { + const size_t num_blocks{NumBlocks()}; + blocks_map.resize(num_blocks); + for (size_t block_index = 0; block_index < num_blocks; ++block_index) { + Block& block{blocks_data[block_index]}; + blocks_map[block.id] = █ + } +} + +void Function::BuildImmediatePredecessors() { + for (const Block& block : blocks_data) { + if (block.branch_true != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); + } + if (block.branch_false != UNREACHABLE_BLOCK_ID) { + blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); + } + } +} + +void Function::BuildPostOrder() { + boost::container::small_vector block_stack; + post_order_map.resize(NumBlocks()); + + Block& first_block{blocks_data[blocks.front()]}; + first_block.post_order_visited = true; + block_stack.push_back(first_block.id); + + const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { + if (branch_id == UNREACHABLE_BLOCK_ID) { + return false; + } + if (blocks_map[branch_id]->post_order_visited) { + return false; + } + blocks_map[branch_id]->post_order_visited = true; + + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block_id); + block_stack.push_back(branch_id); + return true; + }; + while (!block_stack.empty()) { + const Block* const block{blocks_map[block_stack.back()]}; + block_stack.pop_back(); + + if (!visit_branch(block->id, block->branch_true) && + !visit_branch(block->id, block->branch_false)) { + post_order_map[block->id] = static_cast(post_order_blocks.size()); + post_order_blocks.push_back(block->id); + } + } +} + +void Function::BuildImmediateDominators() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; + auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; + auto intersect{[this](Block* finger1, Block* finger2) { + while (finger1 != finger2) { + while (post_order_map[finger1->id] < post_order_map[finger2->id]) { + finger1 = finger1->imm_dominator; + } + while (post_order_map[finger2->id] < post_order_map[finger1->id]) { + finger2 = finger2->imm_dominator; + } + } + return finger1; + }}; + for (Block& block : blocks_data) { + block.imm_dominator = nullptr; + } + Block* const start_block{&blocks_data[blocks.front()]}; + start_block->imm_dominator = start_block; + + bool changed{true}; + while (changed) { + changed = false; + for (Block* const block : post_order_blocks | reverse_order_but_first) { + Block* new_idom{}; + for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { + new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; + } + changed |= block->imm_dominator != new_idom; + block->imm_dominator = new_idom; + } + } +} + +void Function::BuildDominanceFrontier() { + auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; + auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; + for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { + for (Block* current : block.imm_predecessors | transform_block_id) { + while (current != block.imm_dominator) { + current->dominance_frontiers.push_back(current->id); + current = current->imm_dominator; + } + } + } +} + CFG::CFG(Environment& env_, Location start_address) : env{env_} { + VisitFunctions(start_address); + + for (Function& function : functions) { + function.BuildBlocksMap(); + function.BuildImmediatePredecessors(); + function.BuildPostOrder(); + function.BuildImmediateDominators(); + function.BuildDominanceFrontier(); + } +} + +void CFG::VisitFunctions(Location start_address) { functions.emplace_back(start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -202,6 +318,7 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { .cond{true}, .branch_true{UNREACHABLE_BLOCK_ID}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, }; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; @@ -310,7 +427,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.push_back(cal_pc); + functions.emplace_back(cal_pc); } // Handle CAL like a regular instruction break; @@ -352,6 +469,7 @@ void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block_id}, .branch_false{UNREACHABLE_BLOCK_ID}, + .imm_predecessors{}, })}; // Set the end properties of the conditional instruction and give it a new identity Block& conditional_block{block}; @@ -465,14 +583,14 @@ std::string CFG::Dot() const { dot += fmt::format("\t\tnode [style=filled];\n"); for (const u32 block_index : function.blocks) { const Block& block{function.blocks_data[block_index]}; - const std::string name{Name(block)}; + const std::string name{NameOf(block)}; const auto add_branch = [&](BlockId branch_id, bool add_label) { const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; dot += fmt::format("\t\t{}->", name); if (it == function.blocks_data.end()) { dot += fmt::format("\"Unknown label {}\"", branch_id); } else { - dot += Name(*it); + dot += NameOf(*it); }; if (add_label && block.cond != true && block.cond != false) { dot += fmt::format(" [label=\"{}\"]", block.cond); @@ -520,7 +638,7 @@ std::string CFG::Dot() const { if (functions.front().blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", Name(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index b2ab0cdc3..20ada8afd 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -70,6 +70,12 @@ struct Block { IR::Condition cond; BlockId branch_true; BlockId branch_false; + boost::container::small_vector imm_predecessors; + boost::container::small_vector dominance_frontiers; + union { + bool post_order_visited{false}; + Block* imm_dominator; + }; }; struct Label { @@ -81,11 +87,30 @@ struct Label { struct Function { Function(Location start_address); + void BuildBlocksMap(); + + void BuildImmediatePredecessors(); + + void BuildPostOrder(); + + void BuildImmediateDominators(); + + void BuildDominanceFrontier(); + + [[nodiscard]] size_t NumBlocks() const noexcept { + return static_cast(current_block_id) + 1; + } + Location entrypoint; BlockId current_block_id{0}; boost::container::small_vector labels; boost::container::small_vector blocks; boost::container::small_vector blocks_data; + // Translates from BlockId to block index + boost::container::small_vector blocks_map; + + boost::container::small_vector post_order_blocks; + boost::container::small_vector post_order_map; }; class CFG { @@ -97,6 +122,12 @@ class CFG { public: explicit CFG(Environment& env, Location start_address); + CFG& operator=(const CFG&) = delete; + CFG(const CFG&) = delete; + + CFG& operator=(CFG&&) = delete; + CFG(CFG&&) = delete; + [[nodiscard]] std::string Dot() const; [[nodiscard]] std::span Functions() const noexcept { @@ -104,20 +135,22 @@ public: } private: + void VisitFunctions(Location start_address); + void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited - [[nodiscard]] bool InspectVisitedBlocks(FunctionId function_id, const Label& label); + bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - [[nodiscard]] AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - [[nodiscard]] bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, - Instruction inst, Opcode opcode); + bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + Opcode opcode); void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); @@ -126,8 +159,7 @@ private: AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - [[nodiscard]] BlockId AddLabel(const Block& block, Stack stack, Location pc, - FunctionId function_id); + BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); Environment& env; boost::container::small_vector functions; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 67a98ba57..49d1f4bfb 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,40 +8,53 @@ #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { +namespace { +void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span block_map, IR::Block* block_memory) { + const size_t num_blocks{cfg_function.blocks.size()}; + function.blocks.reserve(num_blocks); -Program::Function::~Function() { - std::ranges::for_each(blocks, &std::destroy_at); -} - -Program::Program(Environment& env, const Flow::CFG& cfg) { - std::vector block_map; - functions.reserve(cfg.Functions().size()); + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - for (const Flow::Function& cfg_function : cfg.Functions()) { - Function& function{functions.emplace_back()}; + function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); + block_map[flow_block.id] = function.blocks.back().get(); + ++block_memory; + } +} - const size_t num_blocks{cfg_function.blocks.size()}; - IR::Block* block_memory{block_alloc_pool.allocate(num_blocks)}; - function.blocks.reserve(num_blocks); +void EmitTerminationInsts(const Flow::Function& cfg_function, + std::span block_map) { + for (const Flow::BlockId block_id : cfg_function.blocks) { + const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + EmitTerminationCode(flow_block, block_map); + } +} - block_map.resize(cfg_function.blocks_data.size()); +void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, + IR::Block* block_memory) { + std::vector block_map; + block_map.resize(cfg_function.blocks_data.size()); - // Visit the instructions of all blocks - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; + TranslateCode(env, cfg_function, function, block_map, block_memory); + EmitTerminationInsts(cfg_function, block_map); +} +} // Anonymous namespace - IR::Block* const block{std::construct_at(block_memory, Translate(env, flow_block))}; - ++block_memory; - function.blocks.push_back(block); - block_map[flow_block.id] = block; - } - // Now that all blocks are defined, emit the termination instructions - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } +Program::Program(Environment& env, const Flow::CFG& cfg) { + functions.reserve(cfg.Functions().size()); + for (const Flow::Function& cfg_function : cfg.Functions()) { + TranslateFunction(env, cfg_function, functions.emplace_back(), + block_alloc_pool.allocate(cfg_function.blocks.size())); + } + std::ranges::for_each(functions, Optimization::SsaRewritePass); + for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); + Optimization::Invoke(Optimization::IdentityRemovalPass, function); + // Optimization::Invoke(Optimization::VerificationPass, function); } } @@ -50,16 +63,16 @@ std::string DumpProgram(const Program& program) { std::map inst_to_index; std::map block_to_index; - for (const Program::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); + for (const IR::Function& function : program.functions) { + for (const auto& block : function.blocks) { + block_to_index.emplace(block.get(), index); ++index; } } std::string ret; - for (const Program::Function& function : program.functions) { + for (const IR::Function& function : program.functions) { ret += fmt::format("Function\n"); - for (const IR::Block* const block : function.blocks) { + for (const auto& block : function.blocks) { ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } } diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 7814b2c01..36e678a9e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -4,13 +4,16 @@ #pragma once +#include #include #include +#include #include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" namespace Shader::Maxwell { @@ -22,16 +25,10 @@ public: explicit Program(Environment& env, const Flow::CFG& cfg); private: - struct Function { - ~Function(); - - std::vector blocks; - }; - boost::pool_allocator block_alloc_pool; - std::vector functions; + boost::container::small_vector functions; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp index a4ea5c5e3..ed5137f20 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ b/src/shader_recompiler/frontend/maxwell/termination_code.cpp @@ -47,12 +47,19 @@ static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { static void EmitBranch(const Flow::Block& flow_block, std::span block_map, IR::IREmitter& ir) { + const auto add_immediate_predecessor = [&](Flow::BlockId label) { + block_map[label]->AddImmediatePredecessor(&ir.block); + }; if (flow_block.cond == true) { + add_immediate_predecessor(flow_block.branch_true); return ir.Branch(block_map[flow_block.branch_true]); } if (flow_block.cond == false) { + add_immediate_predecessor(flow_block.branch_false); return ir.Branch(block_map[flow_block.branch_false]); } + add_immediate_predecessor(flow_block.branch_true); + add_immediate_predecessor(flow_block.branch_false); return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], block_map[flow_block.branch_false]); } diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h index b0d667942..04e044534 100644 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ b/src/shader_recompiler/frontend/maxwell/termination_code.h @@ -11,6 +11,7 @@ namespace Shader::Maxwell { +/// Emit termination instructions and collect immediate predecessors void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bc607b002..8be7d6ff1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -208,7 +208,7 @@ public: void P2R_reg(u64 insn); void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); - void PBK(u64 insn); + void PBK(); void PCNT(u64 insn); void PEXIT(u64 insn); void PIXLD(u64 insn); @@ -252,7 +252,7 @@ public: void SHR_reg(u64 insn); void SHR_cbuf(u64 insn); void SHR_imm(u64 insn); - void SSY(u64 insn); + void SSY(); void ST(u64 insn); void STG(u64 insn); void STL(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c907c1ffb..0f52696d1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -762,7 +762,7 @@ void TranslatorVisitor::P2R_imm(u64) { ThrowNotImplemented(Opcode::P2R_imm); } -void TranslatorVisitor::PBK(u64) { +void TranslatorVisitor::PBK() { // PBK is a no-op } @@ -938,8 +938,8 @@ void TranslatorVisitor::SHR_imm(u64) { ThrowNotImplemented(Opcode::SHR_imm); } -void TranslatorVisitor::SSY(u64) { - ThrowNotImplemented(Opcode::SSY); +void TranslatorVisitor::SSY() { + // SSY is a no-op } void TranslatorVisitor::ST(u64) { -- cgit v1.2.3 From d24a16045f0f6b0b873d5e3b5bf187c1a8c4343f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Feb 2021 16:43:04 -0300 Subject: shader: Initial instruction support --- src/shader_recompiler/frontend/ir/basic_block.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 200 +++++++++++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 67 ++++++- .../frontend/ir/microinstruction.h | 12 +- src/shader_recompiler/frontend/ir/modifiers.h | 28 +++ src/shader_recompiler/frontend/ir/opcode.inc | 139 +++++++++----- src/shader_recompiler/frontend/ir/pred.h | 11 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/common_encoding.h | 56 ++++++ .../maxwell/translate/impl/floating_point_add.cpp | 71 ++++++++ .../impl/floating_point_fused_multiply_add.cpp | 73 ++++++++ .../translate/impl/floating_point_multiply.cpp | 108 +++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 26 ++- .../frontend/maxwell/translate/impl/impl.h | 9 +- .../maxwell/translate/impl/integer_add.cpp | 106 +++++++++++ .../maxwell/translate/impl/integer_scaled_add.cpp | 73 ++++++++ .../translate/impl/integer_set_predicate.cpp | 99 ++++++++++ .../maxwell/translate/impl/integer_shift_left.cpp | 71 ++++++++ .../translate/impl/integer_short_multiply_add.cpp | 110 ++++++++++++ .../maxwell/translate/impl/load_store_memory.cpp | 149 ++++++++++++--- .../maxwell/translate/impl/move_register.cpp | 45 +++++ .../translate/impl/move_special_register.cpp | 114 ++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 149 +-------------- .../maxwell/translate/impl/register_move.cpp | 45 ----- 25 files changed, 1481 insertions(+), 287 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/modifiers.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e795618fc..249251dd0 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args) { - Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; + std::initializer_list args, u64 flags) { + Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 4b6b80c4b..ec4a41cb1 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -39,7 +39,7 @@ public: /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args = {}); + std::initializer_list args = {}, u64 flags = 0); /// Adds a new immediate predecessor to the basic block. void AddImmediatePredecessor(IR::Block* immediate_predecessor); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6450e4b2c..87b253c9a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { Inst(Opcode::SetAttribute, attribute, value); } +U32 IREmitter::WorkgroupIdX() { + return Inst(Opcode::WorkgroupIdX); +} + +U32 IREmitter::WorkgroupIdY() { + return Inst(Opcode::WorkgroupIdY); +} + +U32 IREmitter::WorkgroupIdZ() { + return Inst(Opcode::WorkgroupIdZ); +} + +U32 IREmitter::LocalInvocationIdX() { + return Inst(Opcode::LocalInvocationIdX); +} + +U32 IREmitter::LocalInvocationIdY() { + return Inst(Opcode::LocalInvocationIdY); +} + +U32 IREmitter::LocalInvocationIdZ() { + return Inst(Opcode::LocalInvocationIdZ); +} + +U32 IREmitter::LoadGlobalU8(const U64& address) { + return Inst(Opcode::LoadGlobalU8, address); +} + +U32 IREmitter::LoadGlobalS8(const U64& address) { + return Inst(Opcode::LoadGlobalS8, address); +} + +U32 IREmitter::LoadGlobalU16(const U64& address) { + return Inst(Opcode::LoadGlobalU16, address); +} + +U32 IREmitter::LoadGlobalS16(const U64& address) { + return Inst(Opcode::LoadGlobalS16, address); +} + +U32 IREmitter::LoadGlobal32(const U64& address) { + return Inst(Opcode::LoadGlobal32, address); +} + +Value IREmitter::LoadGlobal64(const U64& address) { + return Inst(Opcode::LoadGlobal64, address); +} + +Value IREmitter::LoadGlobal128(const U64& address) { + return Inst(Opcode::LoadGlobal128, address); +} + void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { Inst(Opcode::WriteGlobalU8, address, value); } @@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } -U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { +U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U16: - return Inst(Opcode::FPAdd16, a, b); + return Inst(Opcode::FPAdd16, Flags{control}, a, b); case Type::U32: - return Inst(Opcode::FPAdd32, a, b); + return Inst(Opcode::FPAdd32, Flags{control}, a, b); case Type::U64: - return Inst(Opcode::FPAdd64, a, b); + return Inst(Opcode::FPAdd64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } @@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { if (e1.Type() != e2.Type()) { - throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); + throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } return Inst(Opcode::CompositeConstruct2, e1, e2); } Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { - throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); + throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); } return Inst(Opcode::CompositeConstruct3, e1, e2, e3); } @@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, const UAny& e4) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { - throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), - e4.Type()); + throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), + e3.Type(), e4.Type()); } return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); } @@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { return Inst(Opcode::CompositeExtract, vector, Imm32(static_cast(element))); } +UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { + if (true_value.Type() != false_value.Type()) { + throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); + } + switch (true_value.Type()) { + case Type::U8: + return Inst(Opcode::Select8, condition, true_value, false_value); + case Type::U16: + return Inst(Opcode::Select16, condition, true_value, false_value); + case Type::U32: + return Inst(Opcode::Select32, condition, true_value, false_value); + case Type::U64: + return Inst(Opcode::Select64, condition, true_value, false_value); + default: + throw InvalidArgument("Invalid type {}", true_value.Type()); + } +} + U64 IREmitter::PackUint2x32(const Value& vector) { return Inst(Opcode::PackUint2x32, vector); } @@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) { return Inst(Opcode::UnpackDouble2x32, value); } -U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { +U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U16: - return Inst(Opcode::FPMul16, a, b); + return Inst(Opcode::FPMul16, Flags{control}, a, b); case Type::U32: - return Inst(Opcode::FPMul32, a, b); + return Inst(Opcode::FPMul32, Flags{control}, a, b); case Type::U64: - return Inst(Opcode::FPMul64, a, b); + return Inst(Opcode::FPMul64, Flags{control}, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + FpControl control) { + if (a.Type() != b.Type() || a.Type() != c.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); + } + switch (a.Type()) { + case Type::U16: + return Inst(Opcode::FPFma16, Flags{control}, a, b, c); + case Type::U32: + return Inst(Opcode::FPFma32, Flags{control}, a, b, c); + case Type::U64: + return Inst(Opcode::FPFma64, Flags{control}, a, b, c); default: ThrowInvalidType(a.Type()); } @@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { } } +U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::IAdd32, a, b); + case Type::U64: + return Inst(Opcode::IAdd64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + +U32 IREmitter::IMul(const U32& a, const U32& b) { + return Inst(Opcode::IMul32, a, b); +} + +U32 IREmitter::INeg(const U32& value) { + return Inst(Opcode::INeg32, value); +} + +U32 IREmitter::IAbs(const U32& value) { + return Inst(Opcode::IAbs32, value); +} + +U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { + return Inst(Opcode::ShiftLeftLogical32, base, shift); +} + +U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { + return Inst(Opcode::ShiftRightLogical32, base, shift); +} + +U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { + return Inst(Opcode::ShiftRightArithmetic32, base, shift); +} + +U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseAnd32, a, b); +} + +U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseOr32, a, b); +} + +U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { + return Inst(Opcode::BitwiseXor32, a, b); +} + +U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count) { + return Inst(Opcode::BitFieldInsert, base, insert, offset, count); +} + +U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed) { + return Inst(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, + count); +} + +U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +} + +U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { + return Inst(Opcode::IEqual, lhs, rhs); +} + +U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); +} + +U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { + return Inst(Opcode::INotEqual, lhs, rhs); +} + +U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { + return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst(Opcode::LogicalOr, a, b); } @@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { return Inst(Opcode::LogicalAnd, a, b); } +U1 IREmitter::LogicalXor(const U1& a, const U1& b) { + return Inst(Opcode::LogicalXor, a, b); +} + U1 IREmitter::LogicalNot(const U1& value) { return Inst(Opcode::LogicalNot, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1af79f41c..7ff763ecf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -4,8 +4,12 @@ #pragma once +#include +#include + #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { @@ -52,6 +56,22 @@ public: [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const U32& value); + [[nodiscard]] U32 WorkgroupIdX(); + [[nodiscard]] U32 WorkgroupIdY(); + [[nodiscard]] U32 WorkgroupIdZ(); + + [[nodiscard]] U32 LocalInvocationIdX(); + [[nodiscard]] U32 LocalInvocationIdY(); + [[nodiscard]] U32 LocalInvocationIdZ(); + + [[nodiscard]] U32 LoadGlobalU8(const U64& address); + [[nodiscard]] U32 LoadGlobalS8(const U64& address); + [[nodiscard]] U32 LoadGlobalU16(const U64& address); + [[nodiscard]] U32 LoadGlobalS16(const U64& address); + [[nodiscard]] U32 LoadGlobal32(const U64& address); + [[nodiscard]] Value LoadGlobal64(const U64& address); + [[nodiscard]] Value LoadGlobal128(const U64& address); + void WriteGlobalU8(const U64& address, const U32& value); void WriteGlobalS8(const U64& address, const U32& value); void WriteGlobalU16(const U64& address, const U32& value); @@ -71,6 +91,8 @@ public: const UAny& e4); [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); @@ -80,8 +102,10 @@ public: [[nodiscard]] U64 PackDouble2x32(const Value& vector); [[nodiscard]] Value UnpackDouble2x32(const U64& value); - [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); - [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); + [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); + [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); + [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + FpControl control = {}); [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); @@ -100,8 +124,31 @@ public: [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32 INeg(const U32& value); + [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); + [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); + [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); + [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); + [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, + const U32& count); + [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, + bool is_signed); + + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); + [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); [[nodiscard]] U1 LogicalNot(const U1& value); [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); @@ -118,6 +165,22 @@ private: auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } + + template + requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v) struct Flags { + Flags() = default; + Flags(T proxy_) : proxy{proxy_} {} + + T proxy; + }; + + template + T Inst(Opcode op, Flags flags, Args... args) { + u64 raw_flags{}; + std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); + auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + return T{Value{&*it}}; + } }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 7f1ed6710..61849695a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -5,7 +5,9 @@ #pragma once #include +#include #include +#include #include #include @@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_) noexcept : op(op_) {} + explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { @@ -73,6 +75,14 @@ public: void ReplaceUsesWith(Value replacement); + template + requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(&ret, &flags, sizeof(ret)); + return ret; + } + private: void Use(const Value& value); void UndoUse(const Value& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 000000000..28bb9e798 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader::IR { + +enum class FmzMode { + None, // Denorms are not flushed, NAN is propagated (nouveau) + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) +}; + +enum class FpRounding { + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero +}; + +struct FpControl { + bool no_contraction{false}; + FpRounding rounding : 8 = FpRounding::RN; + FmzMode fmz_mode : 8 = FmzMode::FTZ; +}; +static_assert(sizeof(FpControl) <= sizeof(u64)); +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 40759e96a..4ecb5e936 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -35,6 +35,12 @@ OPCODE(SetZFlag, Void, U1, OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupIdX, U32, ) +OPCODE(WorkgroupIdY, U32, ) +OPCODE(WorkgroupIdZ, U32, ) +OPCODE(LocalInvocationIdX, U32, ) +OPCODE(LocalInvocationIdY, U32, ) +OPCODE(LocalInvocationIdZ, U32, ) // Undefined OPCODE(Undef1, U1, ) @@ -44,6 +50,13 @@ OPCODE(Undef32, U32, OPCODE(Undef64, U64, ) // Memory operations +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, Opaque, U64, ) +OPCODE(LoadGlobal128, Opaque, U64, ) OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, ) @@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3, Opaque, Opaq OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(CompositeExtract, Opaque, Opaque, U32, ) +// Select operations +OPCODE(Select8, U8, U1, U8, U8, ) +OPCODE(Select16, U16, U1, U16, U16, ) +OPCODE(Select32, U32, U1, U32, U32, ) +OPCODE(Select64, U64, U1, U64, U64, ) + // Bitwise conversions OPCODE(PackUint2x32, U64, Opaque, ) OPCODE(UnpackUint2x32, Opaque, U64, ) @@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp, U1, Opaq OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, U16, U16 ) -OPCODE(FPAbs32, U32, U32 ) -OPCODE(FPAbs64, U64, U64 ) -OPCODE(FPAdd16, U16, U16, U16 ) -OPCODE(FPAdd32, U32, U32, U32 ) -OPCODE(FPAdd64, U64, U64, U64 ) -OPCODE(FPFma16, U16, U16, U16 ) -OPCODE(FPFma32, U32, U32, U32 ) -OPCODE(FPFma64, U64, U64, U64 ) -OPCODE(FPMax32, U32, U32, U32 ) -OPCODE(FPMax64, U64, U64, U64 ) -OPCODE(FPMin32, U32, U32, U32 ) -OPCODE(FPMin64, U64, U64, U64 ) -OPCODE(FPMul16, U16, U16, U16 ) -OPCODE(FPMul32, U32, U32, U32 ) -OPCODE(FPMul64, U64, U64, U64 ) -OPCODE(FPNeg16, U16, U16 ) -OPCODE(FPNeg32, U32, U32 ) -OPCODE(FPNeg64, U64, U64 ) -OPCODE(FPRecip32, U32, U32 ) -OPCODE(FPRecip64, U64, U64 ) -OPCODE(FPRecipSqrt32, U32, U32 ) -OPCODE(FPRecipSqrt64, U64, U64 ) -OPCODE(FPSqrt, U32, U32 ) -OPCODE(FPSin, U32, U32 ) -OPCODE(FPSinNotReduced, U32, U32 ) -OPCODE(FPExp2, U32, U32 ) -OPCODE(FPExp2NotReduced, U32, U32 ) -OPCODE(FPCos, U32, U32 ) -OPCODE(FPCosNotReduced, U32, U32 ) -OPCODE(FPLog2, U32, U32 ) -OPCODE(FPSaturate16, U16, U16 ) -OPCODE(FPSaturate32, U32, U32 ) -OPCODE(FPSaturate64, U64, U64 ) -OPCODE(FPRoundEven16, U16, U16 ) -OPCODE(FPRoundEven32, U32, U32 ) -OPCODE(FPRoundEven64, U64, U64 ) -OPCODE(FPFloor16, U16, U16 ) -OPCODE(FPFloor32, U32, U32 ) -OPCODE(FPFloor64, U64, U64 ) -OPCODE(FPCeil16, U16, U16 ) -OPCODE(FPCeil32, U32, U32 ) -OPCODE(FPCeil64, U64, U64 ) -OPCODE(FPTrunc16, U16, U16 ) -OPCODE(FPTrunc32, U32, U32 ) -OPCODE(FPTrunc64, U64, U64 ) +OPCODE(FPAbs16, U16, U16, ) +OPCODE(FPAbs32, U32, U32, ) +OPCODE(FPAbs64, U64, U64, ) +OPCODE(FPAdd16, U16, U16, U16, ) +OPCODE(FPAdd32, U32, U32, U32, ) +OPCODE(FPAdd64, U64, U64, U64, ) +OPCODE(FPFma16, U16, U16, U16, U16, ) +OPCODE(FPFma32, U32, U32, U32, U32, ) +OPCODE(FPFma64, U64, U64, U64, U64, ) +OPCODE(FPMax32, U32, U32, U32, ) +OPCODE(FPMax64, U64, U64, U64, ) +OPCODE(FPMin32, U32, U32, U32, ) +OPCODE(FPMin64, U64, U64, U64, ) +OPCODE(FPMul16, U16, U16, U16, ) +OPCODE(FPMul32, U32, U32, U32, ) +OPCODE(FPMul64, U64, U64, U64, ) +OPCODE(FPNeg16, U16, U16, ) +OPCODE(FPNeg32, U32, U32, ) +OPCODE(FPNeg64, U64, U64, ) +OPCODE(FPRecip32, U32, U32, ) +OPCODE(FPRecip64, U64, U64, ) +OPCODE(FPRecipSqrt32, U32, U32, ) +OPCODE(FPRecipSqrt64, U64, U64, ) +OPCODE(FPSqrt, U32, U32, ) +OPCODE(FPSin, U32, U32, ) +OPCODE(FPSinNotReduced, U32, U32, ) +OPCODE(FPExp2, U32, U32, ) +OPCODE(FPExp2NotReduced, U32, U32, ) +OPCODE(FPCos, U32, U32, ) +OPCODE(FPCosNotReduced, U32, U32, ) +OPCODE(FPLog2, U32, U32, ) +OPCODE(FPSaturate16, U16, U16, ) +OPCODE(FPSaturate32, U32, U32, ) +OPCODE(FPSaturate64, U64, U64, ) +OPCODE(FPRoundEven16, U16, U16, ) +OPCODE(FPRoundEven32, U32, U32, ) +OPCODE(FPRoundEven64, U64, U64, ) +OPCODE(FPFloor16, U16, U16, ) +OPCODE(FPFloor32, U32, U32, ) +OPCODE(FPFloor64, U64, U64, ) +OPCODE(FPCeil16, U16, U16, ) +OPCODE(FPCeil32, U32, U32, ) +OPCODE(FPCeil64, U64, U64, ) +OPCODE(FPTrunc16, U16, U16, ) +OPCODE(FPTrunc32, U32, U32, ) +OPCODE(FPTrunc64, U64, U64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) + +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) OPCODE(LogicalNot, U1, U1, ) // Conversion operations diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index daf23193f..c6f2f82bf 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -8,7 +8,16 @@ namespace Shader::IR { -enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; +enum class Pred : u64 { + P0, + P1, + P2, + P3, + P4, + P5, + P6, + PT, +}; constexpr size_t NUM_USER_PREDS = 6; constexpr size_t NUM_PREDS = 7; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 49d1f4bfb..bd1f96c07 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::Invoke(Optimization::IdentityRemovalPass, function); // Optimization::Invoke(Optimization::VerificationPass, function); } + //*/ } std::string DumpProgram(const Program& program) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..3da37a2bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" + +namespace Shader::Maxwell { + +enum class FpRounding : u64 { + RN, + RM, + RP, + RZ, +}; + +enum class FmzMode : u64 { + None, + FTZ, + FMZ, + INVALIDFMZ3, +}; + +inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { + switch (fp_rounding) { + case FpRounding::RN: + return IR::FpRounding::RN; + case FpRounding::RM: + return IR::FpRounding::RM; + case FpRounding::RP: + return IR::FpRounding::RP; + case FpRounding::RZ: + return IR::FpRounding::RZ; + } + throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); +} + +inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { + switch (fmz_mode) { + case FmzMode::None: + return IR::FmzMode::None; + case FmzMode::FTZ: + return IR::FmzMode::FTZ; + case FmzMode::FMZ: + return IR::FmzMode::FMZ; + case FmzMode::INVALIDFMZ3: + break; + } + throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..d2c44b9cc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, + const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fadd{insn}; + + if (sat) { + throw NotImplementedException("FADD SAT"); + } + if (cc) { + throw NotImplementedException("FADD CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); +} + +void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd{insn}; + + FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, + fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FADD_reg(u64 insn) { + FADD(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FADD_cbuf(u64) { + throw NotImplementedException("FADD (cbuf)"); +} + +void TranslatorVisitor::FADD_imm(u64) { + throw NotImplementedException("FADD (imm)"); +} + +void TranslatorVisitor::FADD32I(u64) { + throw NotImplementedException("FADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..30ca052ec --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, + bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const ffma{insn}; + + if (sat) { + throw NotImplementedException("FFMA SAT"); + } + if (cc) { + throw NotImplementedException("FFMA CC"); + } + const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); +} + +void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> sat; + BitField<51, 2, FpRounding> fp_rounding; + BitField<53, 2, FmzMode> fmz_mode; + } const ffma{insn}; + + FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, + ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); +} +} // Anonymous namespace + +void TranslatorVisitor::FFMA_reg(u64 insn) { + FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_rc(u64) { + throw NotImplementedException("FFMA (rc)"); +} + +void TranslatorVisitor::FFMA_cr(u64 insn) { + FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::FFMA_imm(u64) { + throw NotImplementedException("FFMA (imm)"); +} + +void TranslatorVisitor::FFMA32I(u64) { + throw NotImplementedException("FFMA32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..743a1e2f0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Scale : u64 { + None, + D2, + D4, + D8, + M8, + M4, + M2, + INVALIDSCALE37, +}; + +float ScaleFactor(Scale scale) { + switch (scale) { + case Scale::None: + return 1.0f; + case Scale::D2: + return 1.0f / 2.0f; + case Scale::D4: + return 1.0f / 4.0f; + case Scale::D8: + return 1.0f / 8.0f; + case Scale::M8: + return 8.0f; + case Scale::M4: + return 4.0f; + case Scale::M2: + return 2.0f; + case Scale::INVALIDSCALE37: + break; + } + throw NotImplementedException("Invalid FMUL scale {}", scale); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, + FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const fmul{insn}; + + if (cc) { + throw NotImplementedException("FMUL CC"); + } + if (sat) { + throw NotImplementedException("FMUL SAT"); + } + IR::U32 op_a{v.X(fmul.src_a)}; + if (scale != Scale::None) { + if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { + throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); + } + op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); + } + const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{CastFpRounding(fp_rounding)}, + .fmz_mode{CastFmzMode(fmz_mode)}, + }; + v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); +} + +void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { + union { + u64 raw; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 3, Scale> scale; + BitField<44, 2, FmzMode> fmz; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<50, 1, u64> sat; + } fmul{insn}; + + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, + fmul.neg_b != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::FMUL_reg(u64 insn) { + return FMUL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FMUL_cbuf(u64) { + throw NotImplementedException("FMUL (cbuf)"); +} + +void TranslatorVisitor::FMUL_imm(u64) { + throw NotImplementedException("FMUL (imm)"); +} + +void TranslatorVisitor::FMUL32I(u64) { + throw NotImplementedException("FMUL32I"); +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7bc7ce9f2..548c7f611 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +IR::U32 TranslatorVisitor::GetReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + +IR::U32 TranslatorVisitor::GetReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::U32 TranslatorVisitor::GetImm(u64 insn) { +IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; BitField<20, 19, u64> value; @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { return ir.Imm32(value); } +IR::U32 TranslatorVisitor::GetImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(static_cast(imm.value)); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8be7d6ff1..ef6d977fe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -46,7 +46,7 @@ public: void DADD_reg(u64 insn); void DADD_cbuf(u64 insn); void DADD_imm(u64 insn); - void DEPBAR(u64 insn); + void DEPBAR(); void DFMA_reg(u64 insn); void DFMA_rc(u64 insn); void DFMA_cr(u64 insn); @@ -298,9 +298,14 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + [[nodiscard]] IR::U32 GetReg20(u64 insn); + [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::U32 GetImm(u64 insn); + [[nodiscard]] IR::U32 GetImm20(u64 insn); + + [[nodiscard]] IR::U32 GetImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..60f79b160 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -0,0 +1,106 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, + bool cc) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const iadd{insn}; + + if (sat) { + throw NotImplementedException("IADD SAT"); + } + if (x && po) { + throw NotImplementedException("IADD X+PO"); + } + // Operand A is always read from here, negated if needed + IR::U32 op_a{v.X(iadd.src_a)}; + if (neg_a) { + op_a = v.ir.INeg(op_a); + } + // Add both operands + IR::U32 result{v.ir.IAdd(op_a, op_b)}; + if (x) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + result = v.ir.IAdd(result, carry); + } + if (po) { + // .PO adds one to the result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + if (cc) { + // Store flags + // TODO: Does this grab the result pre-PO or after? + if (po) { + throw NotImplementedException("IADD CC+PO"); + } + // TODO: How does CC behave when X is set? + if (x) { + throw NotImplementedException("IADD X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); + } + // Store result + v.X(iadd.dest_reg, result); +} + +void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<50, 1, u64> sat; + } const iadd{insn}; + + const bool po{iadd.three_for_po == 3}; + const bool neg_a{!po && iadd.neg_a != 0}; + if (!po && iadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD_reg(u64) { + throw NotImplementedException("IADD (reg)"); +} + +void TranslatorVisitor::IADD_cbuf(u64 insn) { + IADD(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IADD_imm(u64) { + throw NotImplementedException("IADD (imm)"); +} + +void TranslatorVisitor::IADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 1, u64> x; + BitField<54, 1, u64> sat; + BitField<55, 2, u64> three_for_po; + BitField<56, 1, u64> neg_a; + } const iadd32i{insn}; + + const bool po{iadd32i.three_for_po == 3}; + const bool neg_a{!po && iadd32i.neg_a != 0}; + IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..f92c0bbd6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -0,0 +1,73 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<47, 1, u64> cc; + BitField<48, 2, u64> three_for_po; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + const bool po{iscadd.three_for_po == 3}; + IR::U32 op_a{v.X(iscadd.op_a)}; + if (!po) { + // When PO is not present, the bits are interpreted as negation + if (iscadd.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iscadd.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + } + // With the operands already processed, scale A + const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; + const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; + + IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; + if (po) { + // .PO adds one to the final result + result = v.ir.IAdd(result, v.ir.Imm32(1)); + } + v.X(iscadd.dest_reg, result); + + if (iscadd.cc != 0) { + throw NotImplementedException("ISCADD CC"); + } +} + +} // Anonymous namespace + +void TranslatorVisitor::ISCADD_reg(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> op_b; + } const iscadd{insn}; + + ISCADD(*this, insn, X(iscadd.op_b)); +} + +void TranslatorVisitor::ISCADD_cbuf(u64) { + throw NotImplementedException("ISCADD (cbuf)"); +} + +void TranslatorVisitor::ISCADD_imm(u64) { + throw NotImplementedException("ISCADD (imm)"); +} + +void TranslatorVisitor::ISCADD32I(u64) { + throw NotImplementedException("ISCADD32I"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..76c6b5291 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class CompareOp : u64 { + F, // Always false + LT, // Less than + EQ, // Equal + LE, // Less than or equal + GT, // Greater than + NE, // Not equal + GE, // Greater than or equal + T, // Always true +}; + +enum class Bop : u64 { + AND, + OR, + XOR, +}; + +IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, + bool is_signed) { + switch (op) { + case CompareOp::F: + return ir.Imm1(false); + case CompareOp::LT: + return ir.ILessThan(lhs, rhs, is_signed); + case CompareOp::EQ: + return ir.IEqual(lhs, rhs); + case CompareOp::LE: + return ir.ILessThanEqual(lhs, rhs, is_signed); + case CompareOp::GT: + return ir.IGreaterThan(lhs, rhs, is_signed); + case CompareOp::NE: + return ir.INotEqual(lhs, rhs); + case CompareOp::GE: + return ir.IGreaterThanEqual(lhs, rhs, is_signed); + case CompareOp::T: + return ir.Imm1(true); + } + throw NotImplementedException("Invalid ISETP compare op {}", op); +} + +IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { + switch (bop) { + case Bop::AND: + return ir.LogicalAnd(comparison, bop_pred); + case Bop::OR: + return ir.LogicalOr(comparison, bop_pred); + case Bop::XOR: + return ir.LogicalXor(comparison, bop_pred); + } + throw NotImplementedException("Invalid ISETP bop {}", bop); +} + +void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, Bop> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, CompareOp> compare_op; + } const isetp{insn}; + + const Bop bop{isetp.bop}; + const IR::U32 op_a{v.X(isetp.src_reg_a)}; + const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; + const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; + const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + v.ir.SetPred(isetp.dest_pred_a, result_a); + v.ir.SetPred(isetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::ISETP_reg(u64 insn) { + ISETP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISETP_cbuf(u64 insn) { + ISETP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISETP_imm(u64) { + throw NotImplementedException("ISETP_imm"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..d4b417d14 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> w; + BitField<43, 1, u64> x; + BitField<47, 1, u64> cc; + } const shl{insn}; + + if (shl.x != 0) { + throw NotImplementedException("SHL.X"); + } + if (shl.cc != 0) { + throw NotImplementedException("SHL.CC"); + } + const IR::U32 base{v.X(shl.src_reg_a)}; + IR::U32 result; + if (shl.w != 0) { + // When .W is set, the shift value is wrapped + // To emulate this we just have to clamp it ourselves. + const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; + result = v.ir.ShiftLeftLogical(base, shift); + } else { + // When .W is not set, the shift value is clamped between 0 and 32. + // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. + // We can safely evaluate an out of bounds shift according to the SPIR-V specification: + // + // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical + // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than + // or equal to the bit width of the components of Base." + // + // And on the GLASM specification it is also safe to evaluate out of bounds: + // + // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt + // "The results of a shift operation ("<<") are undefined if the value of the second operand + // is negative, or greater than or equal to the number of bits in the first operand." + // + // Emphasis on undefined results in contrast to undefined behavior. + // + const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; + const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; + result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + } + v.X(shl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHL_reg(u64) { + throw NotImplementedException("SHL_reg"); +} + +void TranslatorVisitor::SHL_cbuf(u64) { + throw NotImplementedException("SHL_cbuf"); +} + +void TranslatorVisitor::SHL_imm(u64 insn) { + SHL(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..70a7c76c5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SelectMode : u64 { + Default, + CLO, + CHI, + CSFU, + CBCC, +}; + +enum class Half : u64 { + H0, // Least-significant bits (15:0) + H1, // Most-significant bits (31:16) +}; + +IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { + const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; + return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); +} + +void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, + SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> is_a_signed; + BitField<49, 1, u64> is_b_signed; + BitField<53, 1, Half> half_a; + } const xmad{insn}; + + if (x) { + throw NotImplementedException("XMAD X"); + } + const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; + const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; + + IR::U32 product{v.ir.IMul(op_a, op_b)}; + if (psl) { + // .PSL shifts the product 16 bits + product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); + } + const IR::U32 op_c{[&]() -> IR::U32 { + switch (select_mode) { + case SelectMode::Default: + return src_c; + case SelectMode::CLO: + return ExtractHalf(v, src_c, Half::H0, false); + case SelectMode::CHI: + return ExtractHalf(v, src_c, Half::H1, false); + case SelectMode::CBCC: + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + case SelectMode::CSFU: + throw NotImplementedException("XMAD CSFU"); + } + throw NotImplementedException("Invalid XMAD select mode {}", select_mode); + }()}; + IR::U32 result{v.ir.IAdd(product, op_c)}; + if (mrg) { + // .MRG inserts src_b [15:0] into result's [31:16]. + const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; + result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); + } + if (xmad.cc) { + throw NotImplementedException("XMAD CC"); + } + // Store result + v.X(xmad.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::XMAD_reg(u64) { + throw NotImplementedException("XMAD (reg)"); +} + +void TranslatorVisitor::XMAD_rc(u64) { + throw NotImplementedException("XMAD (rc)"); +} + +void TranslatorVisitor::XMAD_cr(u64) { + throw NotImplementedException("XMAD (cr)"); +} + +void TranslatorVisitor::XMAD_imm(u64 insn) { + union { + u64 raw; + BitField<20, 16, u64> src_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<39, 8, IR::Reg> src_c; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; + const IR::U32 src_c{X(xmad.src_c)}; + XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, + xmad.x != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index d8fd387cf..c9669c617 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -10,16 +10,35 @@ namespace Shader::Maxwell { namespace { +enum class LoadSize : u64 { + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend + B32, + B64, + B128, + U128, // ??? +}; + enum class StoreSize : u64 { - U8, - S8, - U16, - S16, + U8, // Zero-extend + S8, // Sign-extend + U16, // Zero-extend + S16, // Sign-extend B32, B64, B128, }; +// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html +enum class LoadCache : u64 { + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (cache in L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) +}; + // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html enum class StoreCache : u64 { WB, // Cache write-back all coherent levels @@ -27,61 +46,137 @@ enum class StoreCache : u64 { CS, // Cache streaming, likely to be accessed once WT, // Cache write-through (to system memory) }; -} // Anonymous namespace -void TranslatorVisitor::STG(u64 insn) { - // STG stores registers into global memory. +IR::U64 Address(TranslatorVisitor& v, u64 insn) { union { u64 raw; - BitField<0, 8, IR::Reg> data_reg; BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 24, s64> addr_offset; + BitField<20, 24, u64> rz_addr_offset; BitField<45, 1, u64> e; - BitField<46, 2, StoreCache> cache; - BitField<48, 3, StoreSize> size; - } const stg{insn}; + } const mem{insn}; const IR::U64 address{[&]() -> IR::U64 { - if (stg.e == 0) { - // STG without .E uses a 32-bit pointer, zero-extend it - return ir.ConvertU(64, X(stg.addr_reg)); + if (mem.e == 0) { + // LDG/STG without .E uses a 32-bit pointer, zero-extend it + return v.ir.ConvertU(64, v.X(mem.addr_reg)); } - if (!IR::IsAligned(stg.addr_reg, 2)) { + if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); } - // Pack two registers to build the 32-bit address - return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); + // Pack two registers to build the 64-bit address + return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } }()}; + // Apply the offset + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDG(u64 insn) { + // LDG loads global memory into registers + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<46, 2, LoadCache> cache; + BitField<48, 3, LoadSize> size; + } const ldg{insn}; + + // Pointer to load data from + const IR::U64 address{Address(*this, insn)}; + const IR::Reg dest_reg{ldg.dest_reg}; + switch (ldg.size) { + case LoadSize::U8: + X(dest_reg, ir.LoadGlobalU8(address)); + break; + case LoadSize::S8: + X(dest_reg, ir.LoadGlobalS8(address)); + break; + case LoadSize::U16: + X(dest_reg, ir.LoadGlobalU16(address)); + break; + case LoadSize::S16: + X(dest_reg, ir.LoadGlobalS16(address)); + break; + case LoadSize::B32: + X(dest_reg, ir.LoadGlobal32(address)); + break; + case LoadSize::B64: { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal64(address)}; + for (int i = 0; i < 2; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::B128: { + if (!IR::IsAligned(dest_reg, 4)) { + throw NotImplementedException("Unaligned data registers"); + } + const IR::Value vector{ir.LoadGlobal128(address)}; + for (int i = 0; i < 4; ++i) { + X(dest_reg + i, ir.CompositeExtract(vector, i)); + } + break; + } + case LoadSize::U128: + throw NotImplementedException("LDG U.128"); + default: + throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); + } +} + +void TranslatorVisitor::STG(u64 insn) { + // STG stores registers into global memory. + union { + u64 raw; + BitField<0, 8, IR::Reg> data_reg; + BitField<46, 2, StoreCache> cache; + BitField<48, 3, StoreSize> size; + } const stg{insn}; + // Pointer to store data into + const IR::U64 address{Address(*this, insn)}; + const IR::Reg data_reg{stg.data_reg}; switch (stg.size) { case StoreSize::U8: - ir.WriteGlobalU8(address, X(stg.data_reg)); + ir.WriteGlobalU8(address, X(data_reg)); break; case StoreSize::S8: - ir.WriteGlobalS8(address, X(stg.data_reg)); + ir.WriteGlobalS8(address, X(data_reg)); break; case StoreSize::U16: - ir.WriteGlobalU16(address, X(stg.data_reg)); + ir.WriteGlobalU16(address, X(data_reg)); break; case StoreSize::S16: - ir.WriteGlobalS16(address, X(stg.data_reg)); + ir.WriteGlobalS16(address, X(data_reg)); break; case StoreSize::B32: - ir.WriteGlobal32(address, X(stg.data_reg)); + ir.WriteGlobal32(address, X(data_reg)); break; case StoreSize::B64: { - if (!IR::IsAligned(stg.data_reg, 2)) { + if (!IR::IsAligned(data_reg, 2)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; + const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; ir.WriteGlobal64(address, vector); break; } case StoreSize::B128: - if (!IR::IsAligned(stg.data_reg, 4)) { + if (!IR::IsAligned(data_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } - const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), - X(stg.data_reg + 2), X(stg.data_reg + 3))}; + const IR::Value vector{ + ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; ir.WriteGlobal128(address, vector); break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..1711d3f48 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +union MOV { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, IR::Reg> src_reg; + BitField<39, 4, u64> mask; +}; + +void CheckMask(MOV mov) { + if (mov.mask != 0xf) { + throw NotImplementedException("Non-full move mask"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::MOV_reg(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, X(mov.src_reg)); +} + +void TranslatorVisitor::MOV_cbuf(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetCbuf(insn)); +} + +void TranslatorVisitor::MOV_imm(u64 insn) { + const MOV mov{insn}; + CheckMask(mov); + X(mov.dest_reg, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..93cea302a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -0,0 +1,114 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class SpecialRegister : u64 { + SR_LANEID = 0, + SR_VIRTCFG = 2, + SR_VIRTID = 3, + SR_PM0 = 4, + SR_PM1 = 5, + SR_PM2 = 6, + SR_PM3 = 7, + SR_PM4 = 8, + SR_PM5 = 9, + SR_PM6 = 10, + SR_PM7 = 11, + SR_ORDERING_TICKET = 15, + SR_PRIM_TYPE = 16, + SR_INVOCATION_ID = 17, + SR_Y_DIRECTION = 18, + SR_THREAD_KILL = 19, + SM_SHADER_TYPE = 20, + SR_DIRECTCBEWRITEADDRESSLOW = 21, + SR_DIRECTCBEWRITEADDRESSHIGH = 22, + SR_DIRECTCBEWRITEENABLE = 23, + SR_MACHINE_ID_0 = 24, + SR_MACHINE_ID_1 = 25, + SR_MACHINE_ID_2 = 26, + SR_MACHINE_ID_3 = 27, + SR_AFFINITY = 28, + SR_INVOCATION_INFO = 29, + SR_WSCALEFACTOR_XY = 30, + SR_WSCALEFACTOR_Z = 31, + SR_TID = 32, + SR_TID_X = 33, + SR_TID_Y = 34, + SR_TID_Z = 35, + SR_CTAID_X = 37, + SR_CTAID_Y = 38, + SR_CTAID_Z = 39, + SR_NTID = 49, + SR_CirQueueIncrMinusOne = 50, + SR_NLATC = 51, + SR_SWINLO = 57, + SR_SWINSZ = 58, + SR_SMEMSZ = 59, + SR_SMEMBANKS = 60, + SR_LWINLO = 61, + SR_LWINSZ = 62, + SR_LMEMLOSZ = 63, + SR_LMEMHIOFF = 64, + SR_EQMASK = 65, + SR_LTMASK = 66, + SR_LEMASK = 67, + SR_GTMASK = 68, + SR_GEMASK = 69, + SR_REGALLOC = 70, + SR_GLOBALERRORSTATUS = 73, + SR_WARPERRORSTATUS = 75, + SR_PM_HI0 = 81, + SR_PM_HI1 = 82, + SR_PM_HI2 = 83, + SR_PM_HI3 = 84, + SR_PM_HI4 = 85, + SR_PM_HI5 = 86, + SR_PM_HI6 = 87, + SR_PM_HI7 = 88, + SR_CLOCKLO = 89, + SR_CLOCKHI = 90, + SR_GLOBALTIMERLO = 91, + SR_GLOBALTIMERHI = 92, + SR_HWTASKID = 105, + SR_CIRCULARQUEUEENTRYINDEX = 106, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, +}; + +[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { + switch (special_register) { + case SpecialRegister::SR_TID_X: + return ir.LocalInvocationIdX(); + case SpecialRegister::SR_TID_Y: + return ir.LocalInvocationIdY(); + case SpecialRegister::SR_TID_Z: + return ir.LocalInvocationIdZ(); + case SpecialRegister::SR_CTAID_X: + return ir.WorkgroupIdX(); + case SpecialRegister::SR_CTAID_Y: + return ir.WorkgroupIdY(); + case SpecialRegister::SR_CTAID_Z: + return ir.WorkgroupIdZ(); + default: + throw NotImplementedException("S2R special register {}", special_register); + } +} +} // Anonymous namespace + +void TranslatorVisitor::S2R(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 8, SpecialRegister> src_reg; + } const s2r{insn}; + + X(s2r.dest_reg, Read(ir, s2r.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0f52696d1..d70399f6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -7,21 +7,8 @@ #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#include "shader_recompiler/ir_opt/passes.h" - namespace Shader::Maxwell { -[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { - auto raw{IR::DumpBlock(block)}; - - Optimization::GetSetElimination(block); - Optimization::DeadCodeEliminationPass(block); - Optimization::IdentityRemovalPass(block); - auto dumped{IR::DumpBlock(block)}; - - fmt::print(stderr, "{}", dumped); -} - [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { throw NotImplementedException("Instruction {} is not implemented", opcode); } @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { ThrowNotImplemented(Opcode::DADD_imm); } -void TranslatorVisitor::DEPBAR(u64) { - ThrowNotImplemented(Opcode::DEPBAR); +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op } void TranslatorVisitor::DFMA_reg(u64) { @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { ThrowNotImplemented(Opcode::F2F_imm); } -void TranslatorVisitor::FADD_reg(u64) { - ThrowNotImplemented(Opcode::FADD_reg); -} - -void TranslatorVisitor::FADD_cbuf(u64) { - ThrowNotImplemented(Opcode::FADD_cbuf); -} - -void TranslatorVisitor::FADD_imm(u64) { - ThrowNotImplemented(Opcode::FADD_imm); -} - -void TranslatorVisitor::FADD32I(u64) { - ThrowNotImplemented(Opcode::FADD32I); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FFMA_reg(u64) { - ThrowNotImplemented(Opcode::FFMA_reg); -} - -void TranslatorVisitor::FFMA_rc(u64) { - ThrowNotImplemented(Opcode::FFMA_rc); -} - -void TranslatorVisitor::FFMA_cr(u64) { - ThrowNotImplemented(Opcode::FFMA_cr); -} - -void TranslatorVisitor::FFMA_imm(u64) { - ThrowNotImplemented(Opcode::FFMA_imm); -} - -void TranslatorVisitor::FFMA32I(u64) { - ThrowNotImplemented(Opcode::FFMA32I); -} - void TranslatorVisitor::FLO_reg(u64) { ThrowNotImplemented(Opcode::FLO_reg); } @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { ThrowNotImplemented(Opcode::FMNMX_imm); } -void TranslatorVisitor::FMUL_reg(u64) { - ThrowNotImplemented(Opcode::FMUL_reg); -} - -void TranslatorVisitor::FMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::FMUL_cbuf); -} - -void TranslatorVisitor::FMUL_imm(u64) { - ThrowNotImplemented(Opcode::FMUL_imm); -} - -void TranslatorVisitor::FMUL32I(u64) { - ThrowNotImplemented(Opcode::FMUL32I); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD_reg(u64) { - ThrowNotImplemented(Opcode::IADD_reg); -} - -void TranslatorVisitor::IADD_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD_cbuf); -} - -void TranslatorVisitor::IADD_imm(u64) { - ThrowNotImplemented(Opcode::IADD_imm); -} - void TranslatorVisitor::IADD3_reg(u64) { ThrowNotImplemented(Opcode::IADD3_reg); } @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::IADD32I(u64) { - ThrowNotImplemented(Opcode::IADD32I); -} - void TranslatorVisitor::ICMP_reg(u64) { ThrowNotImplemented(Opcode::ICMP_reg); } @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISCADD_reg(u64) { - ThrowNotImplemented(Opcode::ISCADD_reg); -} - -void TranslatorVisitor::ISCADD_cbuf(u64) { - ThrowNotImplemented(Opcode::ISCADD_cbuf); -} - -void TranslatorVisitor::ISCADD_imm(u64) { - ThrowNotImplemented(Opcode::ISCADD_imm); -} - -void TranslatorVisitor::ISCADD32I(u64) { - ThrowNotImplemented(Opcode::ISCADD32I); -} - void TranslatorVisitor::ISET_reg(u64) { ThrowNotImplemented(Opcode::ISET_reg); } @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { ThrowNotImplemented(Opcode::ISET_imm); } -void TranslatorVisitor::ISETP_reg(u64) { - ThrowNotImplemented(Opcode::ISETP_reg); -} - -void TranslatorVisitor::ISETP_cbuf(u64) { - ThrowNotImplemented(Opcode::ISETP_cbuf); -} - -void TranslatorVisitor::ISETP_imm(u64) { - ThrowNotImplemented(Opcode::ISETP_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { ThrowNotImplemented(Opcode::LDC); } -void TranslatorVisitor::LDG(u64) { - ThrowNotImplemented(Opcode::LDG); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } -void TranslatorVisitor::S2R(u64) { - ThrowNotImplemented(Opcode::S2R); -} - void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHL_reg(u64) { - ThrowNotImplemented(Opcode::SHL_reg); -} - -void TranslatorVisitor::SHL_cbuf(u64) { - ThrowNotImplemented(Opcode::SHL_cbuf); -} - -void TranslatorVisitor::SHL_imm(u64) { - ThrowNotImplemented(Opcode::SHL_imm); -} - void TranslatorVisitor::SHR_reg(u64) { ThrowNotImplemented(Opcode::SHR_reg); } @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { ThrowNotImplemented(Opcode::VSHR); } -void TranslatorVisitor::XMAD_reg(u64) { - ThrowNotImplemented(Opcode::XMAD_reg); -} - -void TranslatorVisitor::XMAD_rc(u64) { - ThrowNotImplemented(Opcode::XMAD_rc); -} - -void TranslatorVisitor::XMAD_cr(u64) { - ThrowNotImplemented(Opcode::XMAD_cr); -} - -void TranslatorVisitor::XMAD_imm(u64) { - ThrowNotImplemented(Opcode::XMAD_imm); -} - } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp deleted file mode 100644 index 7fa35ba3a..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/register_move.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { - throw NotImplementedException("Non-full move mask"); - } -} -} // Anonymous namespace - -void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); -} - -void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); -} - -void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm(insn)); -} - -} // namespace Shader::Maxwell -- cgit v1.2.3 From e81739493a0cacc1efe3295f9d287d5d31b1a989 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 05:58:02 -0300 Subject: shader: Constant propagation and global memory to storage buffer --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 20 ++++++++++++++--- src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++++- .../frontend/ir/microinstruction.cpp | 26 +++++++++++++--------- .../frontend/ir/microinstruction.h | 4 +++- src/shader_recompiler/frontend/ir/opcode.inc | 22 ++++++++++++++---- src/shader_recompiler/frontend/ir/type.cpp | 2 +- src/shader_recompiler/frontend/ir/type.h | 1 - src/shader_recompiler/frontend/ir/value.cpp | 17 ++++++++++++-- src/shader_recompiler/frontend/ir/value.h | 1 - src/shader_recompiler/frontend/maxwell/program.cpp | 6 +++-- 10 files changed, 77 insertions(+), 27 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 87b253c9a..1c5ae0109 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -504,6 +504,20 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { } } +U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::ISub32, a, b); + case Type::U64: + return Inst(Opcode::ISub64, a, b); + default: + ThrowInvalidType(a.Type()); + } +} + U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst(Opcode::IMul32, a, b); } @@ -679,8 +693,8 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& v } } -U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { - switch (bitsize) { +U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { + switch (result_bitsize) { case 32: switch (value.Type()) { case Type::U32: @@ -703,7 +717,7 @@ U32U64 IREmitter::ConvertU(size_t bitsize, const U32U64& value) { break; } } - throw NotImplementedException("Conversion from {} to {} bits", value.Type(), bitsize); + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7ff763ecf..84b844898 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -17,6 +17,8 @@ namespace Shader::IR { class IREmitter { public: explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + explicit IREmitter(Block& block_, Block::iterator insertion_point_) + : block{block_}, insertion_point{insertion_point_} {} Block& block; @@ -125,6 +127,7 @@ public: [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); + [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32 INeg(const U32& value); [[nodiscard]] U32 IAbs(const U32& value); @@ -155,7 +158,7 @@ public: [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); - [[nodiscard]] U32U64 ConvertU(size_t bitsize, const U32U64& value); + [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ecf76e23d..de953838c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/type.h" @@ -44,6 +46,13 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteGlobal32: case Opcode::WriteGlobal64: case Opcode::WriteGlobal128: + case Opcode::WriteStorageU8: + case Opcode::WriteStorageS8: + case Opcode::WriteStorageU16: + case Opcode::WriteStorageS16: + case Opcode::WriteStorage32: + case Opcode::WriteStorage64: + case Opcode::WriteStorage128: return true; default: return false; @@ -56,15 +65,19 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetSignFromOp: case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: - case Opcode::GetZSCOFromOp: return true; default: return false; } } +bool Inst::AreAllArgsImmediates() const noexcept { + return std::all_of(args.begin(), args.begin() + NumArgs(), + [](const IR::Value& value) { return value.IsImmediate(); }); +} + bool Inst::HasAssociatedPseudoOperation() const noexcept { - return zero_inst || sign_inst || carry_inst || overflow_inst || zsco_inst; + return zero_inst || sign_inst || carry_inst || overflow_inst; } Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { @@ -82,9 +95,6 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { case Opcode::GetOverflowFromOp: CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); return overflow_inst; - case Opcode::GetZSCOFromOp: - CheckPseudoInstruction(zsco_inst, Opcode::GetZSCOFromOp); - return zsco_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -176,9 +186,6 @@ void Inst::Use(const Value& value) { case Opcode::GetOverflowFromOp: SetPseudoInstruction(value.Inst()->overflow_inst, this); break; - case Opcode::GetZSCOFromOp: - SetPseudoInstruction(value.Inst()->zsco_inst, this); - break; default: break; } @@ -200,9 +207,6 @@ void Inst::UndoUse(const Value& value) { case Opcode::GetOverflowFromOp: RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); break; - case Opcode::GetZSCOFromOp: - RemovePseudoInstruction(value.Inst()->zsco_inst, Opcode::GetZSCOFromOp); - break; default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 61849695a..22101c9e2 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -49,6 +49,9 @@ public: /// Pseudo-instructions depend on their parent instructions for their semantics. [[nodiscard]] bool IsPseudoInstruction() const noexcept; + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const noexcept; + /// Determines if there is a pseudo-operation associated with this instruction. [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction @@ -94,7 +97,6 @@ private: Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; - Inst* zsco_inst{}; std::vector> phi_operands; u64 flags{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 4ecb5e936..4596bf39f 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -24,9 +24,6 @@ OPCODE(GetAttribute, U32, Attr OPCODE(SetAttribute, U32, Attribute, ) OPCODE(GetAttributeIndexed, U32, U32, ) OPCODE(SetAttributeIndexed, U32, U32, ) -OPCODE(GetZSCORaw, U32, ) -OPCODE(SetZSCORaw, Void, U32, ) -OPCODE(SetZSCO, Void, ZSCO, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) @@ -65,6 +62,22 @@ OPCODE(WriteGlobal32, Void, U64, OPCODE(WriteGlobal64, Void, U64, Opaque, ) OPCODE(WriteGlobal128, Void, U64, Opaque, ) +// Storage buffer operations +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, Opaque, U32, U32, ) +OPCODE(LoadStorage128, Opaque, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, Opaque, ) +OPCODE(WriteStorage128, Void, U32, U32, Opaque, ) + // Vector utility OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) @@ -90,7 +103,6 @@ OPCODE(GetZeroFromOp, U1, Opaq OPCODE(GetSignFromOp, U1, Opaque, ) OPCODE(GetCarryFromOp, U1, Opaque, ) OPCODE(GetOverflowFromOp, U1, Opaque, ) -OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) // Floating-point operations OPCODE(FPAbs16, U16, U16, ) @@ -143,6 +155,8 @@ OPCODE(FPTrunc64, U64, U64, // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) OPCODE(IAbs32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp index da1e2a0f6..13cc09195 100644 --- a/src/shader_recompiler/frontend/ir/type.cpp +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -11,7 +11,7 @@ namespace Shader::IR { std::string NameOf(Type type) { static constexpr std::array names{ - "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "ZSCO", + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", }; const size_t bits{static_cast(type)}; if (bits == 0) { diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index f753628e8..397875018 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -25,7 +25,6 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, - ZSCO = 1 << 10, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e974e88c..59a9b10dc 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -91,26 +91,41 @@ IR::Attribute Value::Attribute() const { } bool Value::U1() const { + if (IsIdentity()) { + return inst->Arg(0).U1(); + } ValidateAccess(Type::U1); return imm_u1; } u8 Value::U8() const { + if (IsIdentity()) { + return inst->Arg(0).U8(); + } ValidateAccess(Type::U8); return imm_u8; } u16 Value::U16() const { + if (IsIdentity()) { + return inst->Arg(0).U16(); + } ValidateAccess(Type::U16); return imm_u16; } u32 Value::U32() const { + if (IsIdentity()) { + return inst->Arg(0).U32(); + } ValidateAccess(Type::U32); return imm_u32; } u64 Value::U64() const { + if (IsIdentity()) { + return inst->Arg(0).U64(); + } ValidateAccess(Type::U64); return imm_u64; } @@ -142,8 +157,6 @@ bool Value::operator==(const Value& other) const { return imm_u32 == other.imm_u32; case Type::U64: return imm_u64 == other.imm_u64; - case Type::ZSCO: - throw NotImplementedException("ZSCO comparison"); } throw LogicError("Invalid type {}", type); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 368119921..31f831794 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -96,6 +96,5 @@ using U64 = TypedValue; using U32U64 = TypedValue; using U16U32U64 = TypedValue; using UAny = TypedValue; -using ZSCO = TypedValue; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index bd1f96c07..b3f2de852 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -52,9 +52,11 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { + Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); + Optimization::Invoke(Optimization::ConstantPropagationPass, function); Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); - Optimization::Invoke(Optimization::IdentityRemovalPass, function); - // Optimization::Invoke(Optimization::VerificationPass, function); + Optimization::IdentityRemovalPass(function); + Optimization::VerificationPass(function); } //*/ } -- cgit v1.2.3 From be94ee88d227d0d3dbeabe9ade98bacd910c7a7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 19:19:36 -0300 Subject: shader: Make typed IR --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 275 ++++++++++++++------- src/shader_recompiler/frontend/ir/ir_emitter.h | 69 +++--- src/shader_recompiler/frontend/ir/opcode.inc | 200 ++++++++------- src/shader_recompiler/frontend/ir/type.cpp | 4 +- src/shader_recompiler/frontend/ir/type.h | 15 ++ src/shader_recompiler/frontend/ir/value.cpp | 28 +++ src/shader_recompiler/frontend/ir/value.h | 10 + .../maxwell/translate/impl/floating_point_add.cpp | 12 +- .../impl/floating_point_conversion_integer.cpp | 20 +- .../impl/floating_point_fused_multiply_add.cpp | 16 +- .../impl/floating_point_multi_function.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 13 +- .../frontend/maxwell/translate/impl/impl.cpp | 20 ++ .../frontend/maxwell/translate/impl/impl.h | 6 + .../translate/impl/load_store_attribute.cpp | 23 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- 16 files changed, 463 insertions(+), 258 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1c5ae0109..9d7dc034c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -32,16 +32,16 @@ U32 IREmitter::Imm32(s32 value) const { return U32{Value{static_cast(value)}}; } -U32 IREmitter::Imm32(f32 value) const { - return U32{Value{Common::BitCast(value)}}; +F32 IREmitter::Imm32(f32 value) const { + return F32{Value{value}}; } U64 IREmitter::Imm64(u64 value) const { return U64{Value{value}}; } -U64 IREmitter::Imm64(f64 value) const { - return U64{Value{Common::BitCast(value)}}; +F64 IREmitter::Imm64(f64 value) const { + return F64{Value{value}}; } void IREmitter::Branch(IR::Block* label) { @@ -121,11 +121,11 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U32 IREmitter::GetAttribute(IR::Attribute attribute) { - return Inst(Opcode::GetAttribute, attribute); +F32 IREmitter::GetAttribute(IR::Attribute attribute) { + return Inst(Opcode::GetAttribute, attribute); } -void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } @@ -225,50 +225,113 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } -U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { +F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { - case Type::U16: - return Inst(Opcode::FPAdd16, Flags{control}, a, b); - case Type::U32: - return Inst(Opcode::FPAdd32, Flags{control}, a, b); - case Type::U64: - return Inst(Opcode::FPAdd64, Flags{control}, a, b); + case Type::F16: + return Inst(Opcode::FPAdd16, Flags{control}, a, b); + case Type::F32: + return Inst(Opcode::FPAdd32, Flags{control}, a, b); + case Type::F64: + return Inst(Opcode::FPAdd64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } } -Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } - return Inst(Opcode::CompositeConstruct2, e1, e2); + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x2, e1, e2); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x2, e1, e2); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x2, e1, e2); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x2, e1, e2); + default: + ThrowInvalidType(e1.Type()); + } } -Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); } - return Inst(Opcode::CompositeConstruct3, e1, e2, e3); + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3); + default: + ThrowInvalidType(e1.Type()); + } } -Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, - const UAny& e4) { +Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } - return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); + switch (e1.Type()) { + case Type::U32: + return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); + case Type::F16: + return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4); + case Type::F32: + return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); + case Type::F64: + return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + default: + ThrowInvalidType(e1.Type()); + } } -UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { - if (element >= 4) { - throw InvalidArgument("Out of bounds element {}", element); +Value IREmitter::CompositeExtract(const Value& vector, size_t element) { + const auto read = [&](Opcode opcode, size_t limit) -> Value { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, Value{static_cast(element)}); + }; + switch (vector.Type()) { + case Type::U32x2: + return read(Opcode::CompositeExtractU32x2, 2); + case Type::U32x3: + return read(Opcode::CompositeExtractU32x3, 3); + case Type::U32x4: + return read(Opcode::CompositeExtractU32x4, 4); + case Type::F16x2: + return read(Opcode::CompositeExtractF16x2, 2); + case Type::F16x3: + return read(Opcode::CompositeExtractF16x3, 3); + case Type::F16x4: + return read(Opcode::CompositeExtractF16x4, 4); + case Type::F32x2: + return read(Opcode::CompositeExtractF32x2, 2); + case Type::F32x3: + return read(Opcode::CompositeExtractF32x3, 3); + case Type::F32x4: + return read(Opcode::CompositeExtractF32x4, 4); + case Type::F64x2: + return read(Opcode::CompositeExtractF64x2, 2); + case Type::F64x3: + return read(Opcode::CompositeExtractF64x3, 3); + case Type::F64x4: + return read(Opcode::CompositeExtractF64x4, 4); + default: + ThrowInvalidType(vector.Type()); } - return Inst(Opcode::CompositeExtract, vector, Imm32(static_cast(element))); } UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { @@ -289,6 +352,36 @@ UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& } } +template <> +IR::U32 IREmitter::BitCast(const IR::F32& value) { + return Inst(Opcode::BitCastU32F32, value); +} + +template <> +IR::F32 IREmitter::BitCast(const IR::U32& value) { + return Inst(Opcode::BitCastF32U32, value); +} + +template <> +IR::U16 IREmitter::BitCast(const IR::F16& value) { + return Inst(Opcode::BitCastU16F16, value); +} + +template <> +IR::F16 IREmitter::BitCast(const IR::U16& value) { + return Inst(Opcode::BitCastF16U16, value); +} + +template <> +IR::U64 IREmitter::BitCast(const IR::F64& value) { + return Inst(Opcode::BitCastU64F64, value); +} + +template <> +IR::F64 IREmitter::BitCast(const IR::U64& value) { + return Inst(Opcode::BitCastF64U64, value); +} + U64 IREmitter::PackUint2x32(const Value& vector) { return Inst(Opcode::PackUint2x32, vector); } @@ -305,75 +398,75 @@ Value IREmitter::UnpackFloat2x16(const U32& value) { return Inst(Opcode::UnpackFloat2x16, value); } -U64 IREmitter::PackDouble2x32(const Value& vector) { - return Inst(Opcode::PackDouble2x32, vector); +F64 IREmitter::PackDouble2x32(const Value& vector) { + return Inst(Opcode::PackDouble2x32, vector); } -Value IREmitter::UnpackDouble2x32(const U64& value) { +Value IREmitter::UnpackDouble2x32(const F64& value) { return Inst(Opcode::UnpackDouble2x32, value); } -U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { +F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { - case Type::U16: - return Inst(Opcode::FPMul16, Flags{control}, a, b); - case Type::U32: - return Inst(Opcode::FPMul32, Flags{control}, a, b); - case Type::U64: - return Inst(Opcode::FPMul64, Flags{control}, a, b); + case Type::F16: + return Inst(Opcode::FPMul16, Flags{control}, a, b); + case Type::F32: + return Inst(Opcode::FPMul32, Flags{control}, a, b); + case Type::F64: + return Inst(Opcode::FPMul64, Flags{control}, a, b); default: ThrowInvalidType(a.Type()); } } -U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, +F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, FpControl control) { if (a.Type() != b.Type() || a.Type() != c.Type()) { throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); } switch (a.Type()) { - case Type::U16: - return Inst(Opcode::FPFma16, Flags{control}, a, b, c); - case Type::U32: - return Inst(Opcode::FPFma32, Flags{control}, a, b, c); - case Type::U64: - return Inst(Opcode::FPFma64, Flags{control}, a, b, c); + case Type::F16: + return Inst(Opcode::FPFma16, Flags{control}, a, b, c); + case Type::F32: + return Inst(Opcode::FPFma32, Flags{control}, a, b, c); + case Type::F64: + return Inst(Opcode::FPFma64, Flags{control}, a, b, c); default: ThrowInvalidType(a.Type()); } } -U16U32U64 IREmitter::FPAbs(const U16U32U64& value) { +F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPAbs16, value); + return Inst(Opcode::FPAbs16, value); case Type::U32: - return Inst(Opcode::FPAbs32, value); + return Inst(Opcode::FPAbs32, value); case Type::U64: - return Inst(Opcode::FPAbs64, value); + return Inst(Opcode::FPAbs64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPNeg(const U16U32U64& value) { +F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPNeg16, value); + return Inst(Opcode::FPNeg16, value); case Type::U32: - return Inst(Opcode::FPNeg32, value); + return Inst(Opcode::FPNeg32, value); case Type::U64: - return Inst(Opcode::FPNeg64, value); + return Inst(Opcode::FPNeg64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { - U16U32U64 result{value}; +F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { + F16F32F64 result{value}; if (abs) { result = FPAbs(value); } @@ -383,108 +476,108 @@ U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { return result; } -U32 IREmitter::FPCosNotReduced(const U32& value) { - return Inst(Opcode::FPCosNotReduced, value); +F32 IREmitter::FPCosNotReduced(const F32& value) { + return Inst(Opcode::FPCosNotReduced, value); } -U32 IREmitter::FPExp2NotReduced(const U32& value) { - return Inst(Opcode::FPExp2NotReduced, value); +F32 IREmitter::FPExp2NotReduced(const F32& value) { + return Inst(Opcode::FPExp2NotReduced, value); } -U32 IREmitter::FPLog2(const U32& value) { - return Inst(Opcode::FPLog2, value); +F32 IREmitter::FPLog2(const F32& value) { + return Inst(Opcode::FPLog2, value); } -U32U64 IREmitter::FPRecip(const U32U64& value) { +F32F64 IREmitter::FPRecip(const F32F64& value) { switch (value.Type()) { case Type::U32: - return Inst(Opcode::FPRecip32, value); + return Inst(Opcode::FPRecip32, value); case Type::U64: - return Inst(Opcode::FPRecip64, value); + return Inst(Opcode::FPRecip64, value); default: ThrowInvalidType(value.Type()); } } -U32U64 IREmitter::FPRecipSqrt(const U32U64& value) { +F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { switch (value.Type()) { case Type::U32: - return Inst(Opcode::FPRecipSqrt32, value); + return Inst(Opcode::FPRecipSqrt32, value); case Type::U64: - return Inst(Opcode::FPRecipSqrt64, value); + return Inst(Opcode::FPRecipSqrt64, value); default: ThrowInvalidType(value.Type()); } } -U32 IREmitter::FPSinNotReduced(const U32& value) { - return Inst(Opcode::FPSinNotReduced, value); +F32 IREmitter::FPSinNotReduced(const F32& value) { + return Inst(Opcode::FPSinNotReduced, value); } -U32 IREmitter::FPSqrt(const U32& value) { - return Inst(Opcode::FPSqrt, value); +F32 IREmitter::FPSqrt(const F32& value) { + return Inst(Opcode::FPSqrt, value); } -U16U32U64 IREmitter::FPSaturate(const U16U32U64& value) { +F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPSaturate16, value); + return Inst(Opcode::FPSaturate16, value); case Type::U32: - return Inst(Opcode::FPSaturate32, value); + return Inst(Opcode::FPSaturate32, value); case Type::U64: - return Inst(Opcode::FPSaturate64, value); + return Inst(Opcode::FPSaturate64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPRoundEven(const U16U32U64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, value); case Type::U32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, value); case Type::U64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPFloor(const U16U32U64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, value); case Type::U32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, value); case Type::U64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPCeil(const U16U32U64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, value); case Type::U32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, value); case Type::U64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, value); default: ThrowInvalidType(value.Type()); } } -U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { switch (value.Type()) { case Type::U16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, value); case Type::U32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, value); case Type::U64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, value); default: ThrowInvalidType(value.Type()); } @@ -605,7 +698,7 @@ U1 IREmitter::LogicalNot(const U1& value) { return Inst(Opcode::LogicalNot, value); } -U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { +U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { @@ -645,7 +738,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { } } -U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { +U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { @@ -685,7 +778,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { } } -U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value) { +U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { if (is_signed) { return ConvertFToS(bitsize, value); } else { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 84b844898..bfd9916cc 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -27,9 +27,9 @@ public: [[nodiscard]] U16 Imm16(u16 value) const; [[nodiscard]] U32 Imm32(u32 value) const; [[nodiscard]] U32 Imm32(s32 value) const; - [[nodiscard]] U32 Imm32(f32 value) const; + [[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] U64 Imm64(u64 value) const; - [[nodiscard]] U64 Imm64(f64 value) const; + [[nodiscard]] F64 Imm64(f64 value) const; void Branch(IR::Block* label); void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); @@ -55,8 +55,8 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); - [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); - void SetAttribute(IR::Attribute attribute, const U32& value); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); + void SetAttribute(IR::Attribute attribute, const F32& value); [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); @@ -87,44 +87,47 @@ public: [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); - [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2); - [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3); - [[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, - const UAny& e4); - [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); + [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, + const Value& e4); + [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + template + [[nodiscard]] Dest BitCast(const Source& value); + [[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] Value UnpackUint2x32(const U64& value); [[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] Value UnpackFloat2x16(const U32& value); - [[nodiscard]] U64 PackDouble2x32(const Value& vector); - [[nodiscard]] Value UnpackDouble2x32(const U64& value); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); + [[nodiscard]] Value UnpackDouble2x32(const F64& value); - [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); - [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); - [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, + [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); + [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, FpControl control = {}); - [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPAbsNeg(const U16U32U64& value, bool abs, bool neg); - - [[nodiscard]] U32 FPCosNotReduced(const U32& value); - [[nodiscard]] U32 FPExp2NotReduced(const U32& value); - [[nodiscard]] U32 FPLog2(const U32& value); - [[nodiscard]] U32U64 FPRecip(const U32U64& value); - [[nodiscard]] U32U64 FPRecipSqrt(const U32U64& value); - [[nodiscard]] U32 FPSinNotReduced(const U32& value); - [[nodiscard]] U32 FPSqrt(const U32& value); - [[nodiscard]] U16U32U64 FPSaturate(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPRoundEven(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPFloor(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); - [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); + [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); + + [[nodiscard]] F32 FPCosNotReduced(const F32& value); + [[nodiscard]] F32 FPExp2NotReduced(const F32& value); + [[nodiscard]] F32 FPLog2(const F32& value); + [[nodiscard]] F32F64 FPRecip(const F32F64& value); + [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); + [[nodiscard]] F32 FPSinNotReduced(const F32& value); + [[nodiscard]] F32 FPSqrt(const F32& value); + [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); @@ -154,9 +157,9 @@ public: [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); [[nodiscard]] U1 LogicalNot(const U1& value); - [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); - [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); - [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); + [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); + [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); + [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc index 4596bf39f..6eb105d92 100644 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ b/src/shader_recompiler/frontend/ir/opcode.inc @@ -52,15 +52,15 @@ OPCODE(LoadGlobalS8, U32, U64, OPCODE(LoadGlobalU16, U32, U64, ) OPCODE(LoadGlobalS16, U32, U64, ) OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, Opaque, U64, ) -OPCODE(LoadGlobal128, Opaque, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, ) OPCODE(WriteGlobalS16, Void, U64, U32, ) OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, Opaque, ) -OPCODE(WriteGlobal128, Void, U64, Opaque, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) // Storage buffer operations OPCODE(LoadStorageU8, U32, U32, U32, ) @@ -68,21 +68,41 @@ OPCODE(LoadStorageS8, U32, U32, OPCODE(LoadStorageU16, U32, U32, U32, ) OPCODE(LoadStorageS16, U32, U32, U32, ) OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, Opaque, U32, U32, ) -OPCODE(LoadStorage128, Opaque, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, Opaque, ) -OPCODE(WriteStorage128, Void, U32, U32, Opaque, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) // Vector utility -OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) -OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(CompositeExtract, Opaque, Opaque, U32, ) +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) // Select operations OPCODE(Select8, U8, U1, U8, U8, ) @@ -91,12 +111,18 @@ OPCODE(Select32, U32, U1, OPCODE(Select64, U64, U1, U64, U64, ) // Bitwise conversions -OPCODE(PackUint2x32, U64, Opaque, ) -OPCODE(UnpackUint2x32, Opaque, U64, ) -OPCODE(PackFloat2x16, U32, Opaque, ) -OPCODE(UnpackFloat2x16, Opaque, U32, ) -OPCODE(PackDouble2x32, U64, Opaque, ) -OPCODE(UnpackDouble2x32, Opaque, U64, ) +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackDouble2x32, U64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, U64, ) // Pseudo-operation, handled specially at final emit OPCODE(GetZeroFromOp, U1, Opaque, ) @@ -105,52 +131,52 @@ OPCODE(GetCarryFromOp, U1, Opaq OPCODE(GetOverflowFromOp, U1, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, U16, U16, ) -OPCODE(FPAbs32, U32, U32, ) -OPCODE(FPAbs64, U64, U64, ) -OPCODE(FPAdd16, U16, U16, U16, ) -OPCODE(FPAdd32, U32, U32, U32, ) -OPCODE(FPAdd64, U64, U64, U64, ) -OPCODE(FPFma16, U16, U16, U16, U16, ) -OPCODE(FPFma32, U32, U32, U32, U32, ) -OPCODE(FPFma64, U64, U64, U64, U64, ) -OPCODE(FPMax32, U32, U32, U32, ) -OPCODE(FPMax64, U64, U64, U64, ) -OPCODE(FPMin32, U32, U32, U32, ) -OPCODE(FPMin64, U64, U64, U64, ) -OPCODE(FPMul16, U16, U16, U16, ) -OPCODE(FPMul32, U32, U32, U32, ) -OPCODE(FPMul64, U64, U64, U64, ) -OPCODE(FPNeg16, U16, U16, ) -OPCODE(FPNeg32, U32, U32, ) -OPCODE(FPNeg64, U64, U64, ) -OPCODE(FPRecip32, U32, U32, ) -OPCODE(FPRecip64, U64, U64, ) -OPCODE(FPRecipSqrt32, U32, U32, ) -OPCODE(FPRecipSqrt64, U64, U64, ) -OPCODE(FPSqrt, U32, U32, ) -OPCODE(FPSin, U32, U32, ) -OPCODE(FPSinNotReduced, U32, U32, ) -OPCODE(FPExp2, U32, U32, ) -OPCODE(FPExp2NotReduced, U32, U32, ) -OPCODE(FPCos, U32, U32, ) -OPCODE(FPCosNotReduced, U32, U32, ) -OPCODE(FPLog2, U32, U32, ) -OPCODE(FPSaturate16, U16, U16, ) -OPCODE(FPSaturate32, U32, U32, ) -OPCODE(FPSaturate64, U64, U64, ) -OPCODE(FPRoundEven16, U16, U16, ) -OPCODE(FPRoundEven32, U32, U32, ) -OPCODE(FPRoundEven64, U64, U64, ) -OPCODE(FPFloor16, U16, U16, ) -OPCODE(FPFloor32, U32, U32, ) -OPCODE(FPFloor64, U64, U64, ) -OPCODE(FPCeil16, U16, U16, ) -OPCODE(FPCeil32, U32, U32, ) -OPCODE(FPCeil64, U64, U64, ) -OPCODE(FPTrunc16, U16, U16, ) -OPCODE(FPTrunc32, U32, U32, ) -OPCODE(FPTrunc64, U64, U64, ) +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPSinNotReduced, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPExp2NotReduced, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPCosNotReduced, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) @@ -188,24 +214,24 @@ OPCODE(LogicalXor, U1, U1, OPCODE(LogicalNot, U1, U1, ) // Conversion operations -OPCODE(ConvertS16F16, U32, U16, ) -OPCODE(ConvertS16F32, U32, U32, ) -OPCODE(ConvertS16F64, U32, U64, ) -OPCODE(ConvertS32F16, U32, U16, ) -OPCODE(ConvertS32F32, U32, U32, ) -OPCODE(ConvertS32F64, U32, U64, ) -OPCODE(ConvertS64F16, U64, U16, ) -OPCODE(ConvertS64F32, U64, U32, ) -OPCODE(ConvertS64F64, U64, U64, ) -OPCODE(ConvertU16F16, U32, U16, ) -OPCODE(ConvertU16F32, U32, U32, ) -OPCODE(ConvertU16F64, U32, U64, ) -OPCODE(ConvertU32F16, U32, U16, ) -OPCODE(ConvertU32F32, U32, U32, ) -OPCODE(ConvertU32F64, U32, U64, ) -OPCODE(ConvertU64F16, U64, U16, ) -OPCODE(ConvertU64F32, U64, U32, ) -OPCODE(ConvertU64F64, U64, U64, ) +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp index 13cc09195..f28341bfe 100644 --- a/src/shader_recompiler/frontend/ir/type.cpp +++ b/src/shader_recompiler/frontend/ir/type.cpp @@ -11,7 +11,9 @@ namespace Shader::IR { std::string NameOf(Type type) { static constexpr std::array names{ - "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", + "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", + "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3", + "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4", }; const size_t bits{static_cast(type)}; if (bits == 0) { diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 397875018..9a32ca1e8 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -25,6 +25,21 @@ enum class Type { U16 = 1 << 7, U32 = 1 << 8, U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 59a9b10dc..93ff8ccf1 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -26,8 +26,12 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} +Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} + Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} +Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} + bool Value::IsIdentity() const noexcept { return type == Type::Opaque && inst->Opcode() == Opcode::Identity; } @@ -122,6 +126,14 @@ u32 Value::U32() const { return imm_u32; } +f32 Value::F32() const { + if (IsIdentity()) { + return inst->Arg(0).F32(); + } + ValidateAccess(Type::F32); + return imm_f32; +} + u64 Value::U64() const { if (IsIdentity()) { return inst->Arg(0).U64(); @@ -152,11 +164,27 @@ bool Value::operator==(const Value& other) const { case Type::U8: return imm_u8 == other.imm_u8; case Type::U16: + case Type::F16: return imm_u16 == other.imm_u16; case Type::U32: + case Type::F32: return imm_u32 == other.imm_u32; case Type::U64: + case Type::F64: return imm_u64 == other.imm_u64; + case Type::U32x2: + case Type::U32x3: + case Type::U32x4: + case Type::F16x2: + case Type::F16x3: + case Type::F16x4: + case Type::F32x2: + case Type::F32x3: + case Type::F32x4: + case Type::F64x2: + case Type::F64x3: + case Type::F64x4: + break; } throw LogicError("Invalid type {}", type); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 31f831794..2f3688c73 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -28,7 +28,9 @@ public: explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; explicit Value(u32 value) noexcept; + explicit Value(f32 value) noexcept; explicit Value(u64 value) noexcept; + explicit Value(f64 value) noexcept; [[nodiscard]] bool IsIdentity() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; @@ -46,6 +48,7 @@ public: [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; [[nodiscard]] u32 U32() const; + [[nodiscard]] f32 F32() const; [[nodiscard]] u64 U64() const; [[nodiscard]] bool operator==(const Value& other) const; @@ -65,7 +68,9 @@ private: u8 imm_u8; u16 imm_u16; u32 imm_u32; + f32 imm_f32; u64 imm_u64; + f64 imm_f64; }; }; @@ -93,8 +98,13 @@ using U8 = TypedValue; using U16 = TypedValue; using U32 = TypedValue; using U64 = TypedValue; +using F16 = TypedValue; +using F32 = TypedValue; +using F64 = TypedValue; using U32U64 = TypedValue; +using F32F64 = TypedValue; using U16U32U64 = TypedValue; +using F16F32F64 = TypedValue; using UAny = TypedValue; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index d2c44b9cc..cb3a326cf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -11,7 +11,7 @@ namespace Shader::Maxwell { namespace { void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, - const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { + const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; @@ -24,17 +24,17 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin if (cc) { throw NotImplementedException("FADD CC"); } - const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); } -void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { +void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { union { u64 raw; BitField<39, 2, FpRounding> fp_rounding; @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20(insn)); + FADD(*this, insn, GetReg20F(insn)); } void TranslatorVisitor::FADD_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index c4288d9a8..acd8445ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -55,21 +55,21 @@ size_t BitSize(DestFormat dest_format) { } } -void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { +void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; - const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; - const IR::U16U32U64 rounded_value{[&] { + const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; + const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(float_value); + return v.ir.FPRoundEven(op_a); case Rounding::Floor: - return v.ir.FPFloor(float_value); + return v.ir.FPFloor(op_a); case Rounding::Ceil: - return v.ir.FPCeil(float_value); + return v.ir.FPCeil(op_a); case Rounding::Trunc: - return v.ir.FPTrunc(float_value); + return v.ir.FPTrunc(op_a); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } @@ -105,12 +105,12 @@ void TranslatorVisitor::F2I_reg(u64 insn) { BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; - const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { switch (f2i.base.src_format) { case SrcFormat::F16: - return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; case SrcFormat::F32: - return X(f2i.src_reg); + return F(f2i.src_reg); case SrcFormat::F64: return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); default: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 30ca052ec..1464f2807 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -9,7 +9,7 @@ namespace Shader::Maxwell { namespace { -void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { union { u64 raw; @@ -23,18 +23,18 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s if (cc) { throw NotImplementedException("FFMA CC"); } - const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; - const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); } -void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { +void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { union { u64 raw; BitField<47, 1, u64> cc; @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); + FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); + FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index e2ab0dab2..90cddb18b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -35,8 +35,8 @@ void TranslatorVisitor::MUFU(u64 insn) { BitField<50, 1, u64> sat; } const mufu{insn}; - const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; - IR::U32 value{[&]() -> IR::U32 { + const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; + IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: return ir.FPCosNotReduced(op_a); @@ -65,7 +65,7 @@ void TranslatorVisitor::MUFU(u64 insn) { value = ir.FPSaturate(value); } - X(mufu.dest_reg, value); + F(mufu.dest_reg, value); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 743a1e2f0..1b1d38be7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -4,6 +4,7 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" @@ -43,7 +44,7 @@ float ScaleFactor(Scale scale) { throw NotImplementedException("Invalid FMUL scale {}", scale); } -void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { union { u64 raw; @@ -57,23 +58,23 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode if (sat) { throw NotImplementedException("FMUL SAT"); } - IR::U32 op_a{v.X(fmul.src_a)}; + IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); } op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); } - const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ .no_contraction{true}, .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); } -void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { +void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { union { u64 raw; BitField<39, 2, FpRounding> fp_rounding; @@ -90,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20(insn)); + return FMUL(*this, insn, GetReg20F(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 548c7f611..3c9eaddd9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -12,10 +12,18 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::F32 TranslatorVisitor::F(IR::Reg reg) { + return ir.BitCast(X(reg)); +} + void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { + X(dest_reg, ir.BitCast(value)); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; @@ -32,6 +40,14 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { + return ir.BitCast(GetReg20(insn)); +} + +IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { + return ir.BitCast(GetReg39(insn)); +} + IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { union { u64 raw; @@ -49,6 +65,10 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } +IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { + return ir.BitCast(GetCbuf(insn)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ef6d977fe..b701605d7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -296,12 +296,18 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::F32 F(IR::Reg reg); + void X(IR::Reg dest_reg, const IR::U32& value); + void F(IR::Reg dest_reg, const IR::F32& value); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetReg20F(u64 insn); + [[nodiscard]] IR::F32 GetReg39F(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); + [[nodiscard]] IR::F32 GetCbufF(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 23512db1a..de65173e8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -5,22 +5,23 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { enum class InterpolationMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, + Pass, + Multiply, + Constant, + Sc, }; enum class SampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, + Default, + Centroid, + Offset, }; } // Anonymous namespace @@ -54,12 +55,12 @@ void TranslatorVisitor::IPA(u64 insn) { } const IR::Attribute attribute{ipa.attribute}; - IR::U32 value{ir.GetAttribute(attribute)}; + IR::F32 value{ir.GetAttribute(attribute)}; if (IR::IsGeneric(attribute)) { // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; const bool is_perspective{false}; if (is_perspective) { - const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; + const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; value = ir.FPMul(value, rcp_position_w); } } @@ -68,7 +69,7 @@ void TranslatorVisitor::IPA(u64 insn) { case InterpolationMode::Pass: break; case InterpolationMode::Multiply: - value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); + value = ir.FPMul(value, F(ipa.multiplier)); break; case InterpolationMode::Constant: throw NotImplementedException("IPA.CONSTANT"); @@ -86,7 +87,7 @@ void TranslatorVisitor::IPA(u64 insn) { value = ir.FPSaturate(value); } - ir.SetReg(ipa.dest_reg, value); + F(ipa.dest_reg, value); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index c9669c617..9f1570479 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, ir.CompositeExtract(vector, i)); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } break; } @@ -124,7 +124,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, ir.CompositeExtract(vector, i)); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } break; } -- cgit v1.2.3 From 16cb00c521cae6e93ec49d10e15b575b7bc4857e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Feb 2021 23:11:23 -0300 Subject: shader: Add pools and rename files --- src/shader_recompiler/frontend/ir/basic_block.cpp | 5 +- src/shader_recompiler/frontend/ir/basic_block.h | 11 +- src/shader_recompiler/frontend/ir/function.h | 12 +- .../frontend/ir/microinstruction.h | 2 +- src/shader_recompiler/frontend/ir/opcode.cpp | 67 ------ src/shader_recompiler/frontend/ir/opcode.h | 44 ---- src/shader_recompiler/frontend/ir/opcode.inc | 237 --------------------- src/shader_recompiler/frontend/ir/opcodes.cpp | 67 ++++++ src/shader_recompiler/frontend/ir/opcodes.h | 44 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 237 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/program.cpp | 38 ++++ src/shader_recompiler/frontend/ir/program.h | 21 ++ src/shader_recompiler/frontend/ir/value.cpp | 2 +- .../frontend/maxwell/control_flow.h | 2 +- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- src/shader_recompiler/frontend/maxwell/decode.h | 2 +- src/shader_recompiler/frontend/maxwell/opcode.cpp | 26 --- src/shader_recompiler/frontend/maxwell/opcode.h | 30 --- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 26 +++ src/shader_recompiler/frontend/maxwell/opcodes.h | 30 +++ src/shader_recompiler/frontend/maxwell/program.cpp | 49 ++--- src/shader_recompiler/frontend/maxwell/program.h | 22 +- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_multi_function.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 2 +- .../maxwell/translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/move_register.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 5 +- .../frontend/maxwell/translate/translate.h | 7 +- 30 files changed, 513 insertions(+), 487 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/opcode.cpp delete mode 100644 src/shader_recompiler/frontend/ir/opcode.h delete mode 100644 src/shader_recompiler/frontend/ir/opcode.inc create mode 100644 src/shader_recompiler/frontend/ir/opcodes.cpp create mode 100644 src/shader_recompiler/frontend/ir/opcodes.h create mode 100644 src/shader_recompiler/frontend/ir/opcodes.inc create mode 100644 src/shader_recompiler/frontend/ir/program.cpp create mode 100644 src/shader_recompiler/frontend/ir/program.h delete mode 100644 src/shader_recompiler/frontend/maxwell/opcode.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/opcode.h create mode 100644 src/shader_recompiler/frontend/maxwell/opcodes.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/opcodes.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 249251dd0..1a5d82135 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -14,7 +14,8 @@ namespace Shader::IR { -Block::Block(u32 begin, u32 end) : location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool& inst_pool_, u32 begin, u32 end) + : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} Block::~Block() = default; @@ -24,7 +25,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u64 flags) { - Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; + Inst* const inst{inst_pool->Create(op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec4a41cb1..ec3ad6263 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -10,9 +10,9 @@ #include #include -#include #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -25,7 +25,7 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(u32 begin, u32 end); + explicit Block(ObjectPool& inst_pool_, u32 begin, u32 end); ~Block(); Block(const Block&) = delete; @@ -119,6 +119,8 @@ public: } private: + /// Memory pool for instruction list + ObjectPool* inst_pool; /// Starting location of this block u32 location_begin; /// End location of this block @@ -127,11 +129,6 @@ private: /// List of instructions in this block InstructionList instructions; - /// Memory pool for instruction list - boost::fast_pool_allocator - instruction_alloc_pool; - /// Block immediate predecessors std::vector imm_predecessors; }; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index 2d4dc5b98..bba7d1d39 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -4,22 +4,14 @@ #pragma once -#include -#include +#include #include "shader_recompiler/frontend/ir/basic_block.h" namespace Shader::IR { struct Function { - struct InplaceDelete { - void operator()(IR::Block* block) const noexcept { - std::destroy_at(block); - } - }; - using UniqueBlock = std::unique_ptr; - - std::vector blocks; + boost::container::small_vector blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 22101c9e2..80baffb2e 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -13,7 +13,7 @@ #include #include "common/common_types.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/type.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/frontend/ir/opcode.cpp b/src/shader_recompiler/frontend/ir/opcode.cpp deleted file mode 100644 index 65d074029..000000000 --- a/src/shader_recompiler/frontend/ir/opcode.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/opcode.h" - -namespace Shader::IR { -namespace { -struct OpcodeMeta { - std::string_view name; - Type type; - std::array arg_types; -}; - -using enum Type; - -constexpr std::array META_TABLE{ -#define OPCODE(name_token, type_token, ...) \ - OpcodeMeta{ \ - .name{#name_token}, \ - .type{type_token}, \ - .arg_types{__VA_ARGS__}, \ - }, -#include "opcode.inc" -#undef OPCODE -}; - -void ValidateOpcode(Opcode op) { - const size_t raw{static_cast(op)}; - if (raw >= META_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", raw); - } -} -} // Anonymous namespace - -Type TypeOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast(op)].type; -} - -size_t NumArgsOf(Opcode op) { - ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; - return static_cast(distance); -} - -Type ArgTypeOf(Opcode op, size_t arg_index) { - ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { - throw InvalidArgument("Out of bounds argument"); - } - return arg_types[arg_index]; -} - -std::string_view NameOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast(op)].name; -} - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcode.h b/src/shader_recompiler/frontend/ir/opcode.h deleted file mode 100644 index 1f4440379..000000000 --- a/src/shader_recompiler/frontend/ir/opcode.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include - -#include "shader_recompiler/frontend/ir/type.h" - -namespace Shader::IR { - -enum class Opcode { -#define OPCODE(name, ...) name, -#include "opcode.inc" -#undef OPCODE -}; - -/// Get return type of an opcode -[[nodiscard]] Type TypeOf(Opcode op); - -/// Get the number of arguments an opcode accepts -[[nodiscard]] size_t NumArgsOf(Opcode op); - -/// Get the required type of an argument of an opcode -[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); - -/// Get the name of an opcode -[[nodiscard]] std::string_view NameOf(Opcode op); - -} // namespace Shader::IR - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - template - auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { - return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); - } -}; diff --git a/src/shader_recompiler/frontend/ir/opcode.inc b/src/shader_recompiler/frontend/ir/opcode.inc deleted file mode 100644 index 6eb105d92..000000000 --- a/src/shader_recompiler/frontend/ir/opcode.inc +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Void, Void, ) -OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Phi, Opaque, /*todo*/ ) - -// Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(Exit, Void, ) -OPCODE(Return, Void, ) -OPCODE(Unreachable, Void, ) - -// Context getters/setters -OPCODE(GetRegister, U32, Reg, ) -OPCODE(SetRegister, Void, Reg, U32, ) -OPCODE(GetPred, U1, Pred, ) -OPCODE(SetPred, Void, Pred, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, U32, Attribute, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, U32, U32, ) -OPCODE(GetZFlag, U1, Void, ) -OPCODE(GetSFlag, U1, Void, ) -OPCODE(GetCFlag, U1, Void, ) -OPCODE(GetOFlag, U1, Void, ) -OPCODE(SetZFlag, Void, U1, ) -OPCODE(SetSFlag, Void, U1, ) -OPCODE(SetCFlag, Void, U1, ) -OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupIdX, U32, ) -OPCODE(WorkgroupIdY, U32, ) -OPCODE(WorkgroupIdZ, U32, ) -OPCODE(LocalInvocationIdX, U32, ) -OPCODE(LocalInvocationIdY, U32, ) -OPCODE(LocalInvocationIdZ, U32, ) - -// Undefined -OPCODE(Undef1, U1, ) -OPCODE(Undef8, U8, ) -OPCODE(Undef16, U16, ) -OPCODE(Undef32, U32, ) -OPCODE(Undef64, U64, ) - -// Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) - -// Storage buffer operations -OPCODE(LoadStorageU8, U32, U32, U32, ) -OPCODE(LoadStorageS8, U32, U32, U32, ) -OPCODE(LoadStorageU16, U32, U32, U32, ) -OPCODE(LoadStorageS16, U32, U32, U32, ) -OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, U32x2, U32, U32, ) -OPCODE(LoadStorage128, U32x4, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) -OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) - -// Vector utility -OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) -OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) -OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) -OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) -OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) -OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) -OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) -OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) -OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) -OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) -OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) -OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) -OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) -OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) -OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) -OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) -OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) -OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) -OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) -OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) -OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) -OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) -OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) -OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) - -// Select operations -OPCODE(Select8, U8, U1, U8, U8, ) -OPCODE(Select16, U16, U1, U16, U16, ) -OPCODE(Select32, U32, U1, U32, U32, ) -OPCODE(Select64, U64, U1, U64, U64, ) - -// Bitwise conversions -OPCODE(BitCastU16F16, U16, F16, ) -OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastF16U16, F16, U16, ) -OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) -OPCODE(PackUint2x32, U64, U32x2, ) -OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x16, U32, F16x2, ) -OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackDouble2x32, U64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, U64, ) - -// Pseudo-operation, handled specially at final emit -OPCODE(GetZeroFromOp, U1, Opaque, ) -OPCODE(GetSignFromOp, U1, Opaque, ) -OPCODE(GetCarryFromOp, U1, Opaque, ) -OPCODE(GetOverflowFromOp, U1, Opaque, ) - -// Floating-point operations -OPCODE(FPAbs16, F16, F16, ) -OPCODE(FPAbs32, F32, F32, ) -OPCODE(FPAbs64, F64, F64, ) -OPCODE(FPAdd16, F16, F16, F16, ) -OPCODE(FPAdd32, F32, F32, F32, ) -OPCODE(FPAdd64, F64, F64, F64, ) -OPCODE(FPFma16, F16, F16, F16, F16, ) -OPCODE(FPFma32, F32, F32, F32, F32, ) -OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) -OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) -OPCODE(FPMin64, F64, F64, F64, ) -OPCODE(FPMul16, F16, F16, F16, ) -OPCODE(FPMul32, F32, F32, F32, ) -OPCODE(FPMul64, F64, F64, F64, ) -OPCODE(FPNeg16, F16, F16, ) -OPCODE(FPNeg32, F32, F32, ) -OPCODE(FPNeg64, F64, F64, ) -OPCODE(FPRecip32, F32, F32, ) -OPCODE(FPRecip64, F64, F64, ) -OPCODE(FPRecipSqrt32, F32, F32, ) -OPCODE(FPRecipSqrt64, F64, F64, ) -OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPSin, F32, F32, ) -OPCODE(FPSinNotReduced, F32, F32, ) -OPCODE(FPExp2, F32, F32, ) -OPCODE(FPExp2NotReduced, F32, F32, ) -OPCODE(FPCos, F32, F32, ) -OPCODE(FPCosNotReduced, F32, F32, ) -OPCODE(FPLog2, F32, F32, ) -OPCODE(FPSaturate16, F16, F16, ) -OPCODE(FPSaturate32, F32, F32, ) -OPCODE(FPSaturate64, F64, F64, ) -OPCODE(FPRoundEven16, F16, F16, ) -OPCODE(FPRoundEven32, F32, F32, ) -OPCODE(FPRoundEven64, F64, F64, ) -OPCODE(FPFloor16, F16, F16, ) -OPCODE(FPFloor32, F32, F32, ) -OPCODE(FPFloor64, F64, F64, ) -OPCODE(FPCeil16, F16, F16, ) -OPCODE(FPCeil32, F32, F32, ) -OPCODE(FPCeil64, F64, F64, ) -OPCODE(FPTrunc16, F16, F16, ) -OPCODE(FPTrunc32, F32, F32, ) -OPCODE(FPTrunc64, F64, F64, ) - -// Integer operations -OPCODE(IAdd32, U32, U32, U32, ) -OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(ISub32, U32, U32, U32, ) -OPCODE(ISub64, U64, U64, U64, ) -OPCODE(IMul32, U32, U32, U32, ) -OPCODE(INeg32, U32, U32, ) -OPCODE(IAbs32, U32, U32, ) -OPCODE(ShiftLeftLogical32, U32, U32, U32, ) -OPCODE(ShiftRightLogical32, U32, U32, U32, ) -OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) -OPCODE(BitwiseAnd32, U32, U32, U32, ) -OPCODE(BitwiseOr32, U32, U32, U32, ) -OPCODE(BitwiseXor32, U32, U32, U32, ) -OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) -OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) -OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) - -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) -OPCODE(IEqual, U1, U32, U32, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) -OPCODE(INotEqual, U1, U32, U32, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) - -// Logical operations -OPCODE(LogicalOr, U1, U1, U1, ) -OPCODE(LogicalAnd, U1, U1, U1, ) -OPCODE(LogicalXor, U1, U1, U1, ) -OPCODE(LogicalNot, U1, U1, ) - -// Conversion operations -OPCODE(ConvertS16F16, U32, F16, ) -OPCODE(ConvertS16F32, U32, F32, ) -OPCODE(ConvertS16F64, U32, F64, ) -OPCODE(ConvertS32F16, U32, F16, ) -OPCODE(ConvertS32F32, U32, F32, ) -OPCODE(ConvertS32F64, U32, F64, ) -OPCODE(ConvertS64F16, U64, F16, ) -OPCODE(ConvertS64F32, U64, F32, ) -OPCODE(ConvertS64F64, U64, F64, ) -OPCODE(ConvertU16F16, U32, F16, ) -OPCODE(ConvertU16F32, U32, F32, ) -OPCODE(ConvertU16F64, U32, F64, ) -OPCODE(ConvertU32F16, U32, F16, ) -OPCODE(ConvertU32F32, U32, F32, ) -OPCODE(ConvertU32F64, U32, F64, ) -OPCODE(ConvertU64F16, U64, F16, ) -OPCODE(ConvertU64F32, U64, F32, ) -OPCODE(ConvertU64F64, U64, F64, ) - -OPCODE(ConvertU64U32, U64, U32, ) -OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp new file mode 100644 index 000000000..1f188411a --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/opcodes.h" + +namespace Shader::IR { +namespace { +struct OpcodeMeta { + std::string_view name; + Type type; + std::array arg_types; +}; + +using enum Type; + +constexpr std::array META_TABLE{ +#define OPCODE(name_token, type_token, ...) \ + OpcodeMeta{ \ + .name{#name_token}, \ + .type{type_token}, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcodes.inc" +#undef OPCODE +}; + +void ValidateOpcode(Opcode op) { + const size_t raw{static_cast(op)}; + if (raw >= META_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", raw); + } +} +} // Anonymous namespace + +Type TypeOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast(op)].type; +} + +size_t NumArgsOf(Opcode op) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; + return static_cast(distance); +} + +Type ArgTypeOf(Opcode op, size_t arg_index) { + ValidateOpcode(op); + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { + throw InvalidArgument("Out of bounds argument"); + } + return arg_types[arg_index]; +} + +std::string_view NameOf(Opcode op) { + ValidateOpcode(op); + return META_TABLE[static_cast(op)].name; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h new file mode 100644 index 000000000..999fb2e77 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "shader_recompiler/frontend/ir/type.h" + +namespace Shader::IR { + +enum class Opcode { +#define OPCODE(name, ...) name, +#include "opcodes.inc" +#undef OPCODE +}; + +/// Get return type of an opcode +[[nodiscard]] Type TypeOf(Opcode op); + +/// Get the number of arguments an opcode accepts +[[nodiscard]] size_t NumArgsOf(Opcode op); + +/// Get the required type of an argument of an opcode +[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); + +/// Get the name of an opcode +[[nodiscard]] std::string_view NameOf(Opcode op); + +} // namespace Shader::IR + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { + return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); + } +}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc new file mode 100644 index 000000000..6eb105d92 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -0,0 +1,237 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Void, Void, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Phi, Opaque, /*todo*/ ) + +// Control flow +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(Exit, Void, ) +OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) + +// Context getters/setters +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupIdX, U32, ) +OPCODE(WorkgroupIdY, U32, ) +OPCODE(WorkgroupIdZ, U32, ) +OPCODE(LocalInvocationIdX, U32, ) +OPCODE(LocalInvocationIdY, U32, ) +OPCODE(LocalInvocationIdZ, U32, ) + +// Undefined +OPCODE(Undef1, U1, ) +OPCODE(Undef8, U8, ) +OPCODE(Undef16, U16, ) +OPCODE(Undef32, U32, ) +OPCODE(Undef64, U64, ) + +// Memory operations +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) + +// Storage buffer operations +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) + +// Vector utility +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) + +// Select operations +OPCODE(Select8, U8, U1, U8, U8, ) +OPCODE(Select16, U16, U1, U16, U16, ) +OPCODE(Select32, U32, U1, U32, U32, ) +OPCODE(Select64, U64, U1, U64, U64, ) + +// Bitwise conversions +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackDouble2x32, U64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, U64, ) + +// Pseudo-operation, handled specially at final emit +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) + +// Floating-point operations +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPSinNotReduced, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPExp2NotReduced, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPCosNotReduced, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) + +// Integer operations +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) + +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) + +// Logical operations +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) + +// Conversion operations +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) + +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp new file mode 100644 index 000000000..0ce99ef2a --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::IR { + +std::string DumpProgram(const Program& program) { + size_t index{0}; + std::map inst_to_index; + std::map block_to_index; + + for (const IR::Function& function : program.functions) { + for (const IR::Block* const block : function.blocks) { + block_to_index.emplace(block, index); + ++index; + } + } + std::string ret; + for (const IR::Function& function : program.functions) { + ret += fmt::format("Function\n"); + for (const auto& block : function.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + } + } + return ret; +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h new file mode 100644 index 000000000..efaf1aa1e --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.h @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "shader_recompiler/frontend/ir/function.h" + +namespace Shader::IR { + +struct Program { + boost::container::small_vector functions; +}; + +[[nodiscard]] std::string DumpProgram(const Program& program); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 93ff8ccf1..9ea61813b 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/ir/opcode.h" +#include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 20ada8afd..49b369282 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -16,7 +16,7 @@ #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell::Flow { diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index ab1cc6c8d..bd85afa1e 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/decode.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { namespace { diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h index 2a3dd28e8..b4f080fd7 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.h +++ b/src/shader_recompiler/frontend/maxwell/decode.h @@ -5,7 +5,7 @@ #pragma once #include "common/common_types.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/opcode.cpp b/src/shader_recompiler/frontend/maxwell/opcode.cpp deleted file mode 100644 index 8a7bdb611..000000000 --- a/src/shader_recompiler/frontend/maxwell/opcode.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" - -namespace Shader::Maxwell { -namespace { -constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, -#include "maxwell.inc" -#undef INST -}; -} // Anonymous namespace - -const char* NameOf(Opcode opcode) { - if (static_cast(opcode) >= NAME_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); - } - return NAME_TABLE[static_cast(opcode)]; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcode.h b/src/shader_recompiler/frontend/maxwell/opcode.h deleted file mode 100644 index cd574f29d..000000000 --- a/src/shader_recompiler/frontend/maxwell/opcode.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -namespace Shader::Maxwell { - -enum class Opcode { -#define INST(name, cute, encode) name, -#include "maxwell.inc" -#undef INST -}; - -const char* NameOf(Opcode opcode); - -} // namespace Shader::Maxwell - -template <> -struct fmt::formatter { - constexpr auto parse(format_parse_context& ctx) { - return ctx.begin(); - } - template - auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { - return format_to(ctx.out(), "{}", NameOf(opcode)); - } -}; diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 000000000..12ddf2ac9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +constexpr std::array NAME_TABLE{ +#define INST(name, cute, encode) #cute, +#include "maxwell.inc" +#undef INST +}; +} // Anonymous namespace + +const char* NameOf(Opcode opcode) { + if (static_cast(opcode) >= NAME_TABLE.size()) { + throw InvalidArgument("Invalid opcode with raw value {}", static_cast(opcode)); + } + return NAME_TABLE[static_cast(opcode)]; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Shader::Maxwell { + +enum class Opcode { +#define INST(name, cute, encode) name, +#include "maxwell.inc" +#undef INST +}; + +const char* NameOf(Opcode opcode); + +} // namespace Shader::Maxwell + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + template + auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { + return format_to(ctx.out(), "{}", NameOf(opcode)); + } +}; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b3f2de852..8cdd20804 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -5,6 +5,7 @@ #include #include +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -12,17 +13,18 @@ namespace Shader::Maxwell { namespace { -void TranslateCode(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span block_map, IR::Block* block_memory) { +void TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::Function& cfg_function, IR::Function& function, + std::span block_map) { const size_t num_blocks{cfg_function.blocks.size()}; function.blocks.reserve(num_blocks); for (const Flow::BlockId block_id : cfg_function.blocks) { const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - function.blocks.emplace_back(std::construct_at(block_memory, Translate(env, flow_block))); - block_map[flow_block.id] = function.blocks.back().get(); - ++block_memory; + IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; + block_map[flow_block.id] = ir_block; + function.blocks.emplace_back(ir_block); } } @@ -34,21 +36,24 @@ void EmitTerminationInsts(const Flow::Function& cfg_function, } } -void TranslateFunction(Environment& env, const Flow::Function& cfg_function, IR::Function& function, - IR::Block* block_memory) { +void TranslateFunction(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::Function& cfg_function, + IR::Function& function) { std::vector block_map; block_map.resize(cfg_function.blocks_data.size()); - TranslateCode(env, cfg_function, function, block_map, block_memory); + TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); EmitTerminationInsts(cfg_function, block_map); } } // Anonymous namespace -Program::Program(Environment& env, const Flow::CFG& cfg) { +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, const Flow::CFG& cfg) { + IR::Program program; + auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(env, cfg_function, functions.emplace_back(), - block_alloc_pool.allocate(cfg_function.blocks.size())); + TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); } std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { @@ -59,27 +64,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { Optimization::VerificationPass(function); } //*/ -} - -std::string DumpProgram(const Program& program) { - size_t index{0}; - std::map inst_to_index; - std::map block_to_index; - - for (const IR::Function& function : program.functions) { - for (const auto& block : function.blocks) { - block_to_index.emplace(block.get(), index); - ++index; - } - } - std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } - } - return ret; + return program; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 36e678a9e..3355ab129 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -9,28 +9,16 @@ #include #include -#include #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -class Program { - friend std::string DumpProgram(const Program& program); - -public: - explicit Program(Environment& env, const Flow::CFG& cfg); - -private: - boost::pool_allocator - block_alloc_pool; - boost::container::small_vector functions; -}; - -[[nodiscard]] std::string DumpProgram(const Program& program); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + const Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index acd8445ad..3d0c48457 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index 90cddb18b..ba005fbf4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index de65173e8..ad97786d4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -6,7 +6,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 9f1570479..727524284 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1711d3f48..1f83d1068 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -5,7 +5,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d70399f6b..1bb160acb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -4,7 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/opcode.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 66a306745..dcc3f6c0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,8 +23,9 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(Environment& env, const Flow::Block& flow_block) { - IR::Block block{flow_block.begin.Offset(), flow_block.end.Offset()}; +IR::Block Translate(ObjectPool& inst_pool, Environment& env, + const Flow::Block& flow_block) { + IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; TranslatorVisitor visitor{env, block}; const Location pc_end{flow_block.end}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index 788742dea..c1c21b278 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,11 +6,14 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/location.h" +#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(Environment& env, const Flow::Block& flow_block); +[[nodiscard]] IR::Block Translate(ObjectPool& inst_pool, Environment& env, + const Flow::Block& flow_block); } // namespace Shader::Maxwell -- cgit v1.2.3 From da8096e6e35af250dcc56a1af76b8a211df63a90 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Feb 2021 02:38:22 -0300 Subject: shader: Properly store phi on Inst --- src/shader_recompiler/frontend/ir/basic_block.cpp | 33 +++---- .../frontend/ir/microinstruction.cpp | 102 +++++++++++++++------ .../frontend/ir/microinstruction.h | 37 +++++--- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- 4 files changed, 115 insertions(+), 59 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 1a5d82135..50c6a83cd 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -129,26 +129,21 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - if (op == Opcode::Phi) { - size_t val_index{0}; - for (const auto& [phi_block, phi_val] : inst.PhiOperands()) { - ret += val_index != 0 ? ", " : " "; - ret += fmt::format("[ {}, {} ]", ArgToIndex(block_to_index, inst_to_index, phi_val), - BlockToIndex(block_to_index, phi_block)); - ++val_index; + const size_t arg_count{NumArgsOf(op)}; + for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { + const Value arg{inst.Arg(arg_index)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + ret += arg_index != 0 ? ", " : " "; + if (op == Opcode::Phi) { + ret += fmt::format("[ {}, {} ]", arg_index, + BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); + } else { + ret += arg_str; } - } else { - const size_t arg_count{NumArgsOf(op)}; - for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { - const Value arg{inst.Arg(arg_index)}; - ret += arg_index != 0 ? ", " : " "; - ret += ArgToIndex(block_to_index, inst_to_index, arg); - - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("", actual_type, expected_type); - } + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); } } if (TypeOf(op) != Type::Void) { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index de953838c..e7ca92039 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/microinstruction.h" @@ -30,6 +31,22 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) inst = nullptr; } +Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} { + if (op == Opcode::Phi) { + std::construct_at(&phi_args); + } else { + std::construct_at(&args); + } +} + +Inst::~Inst() { + if (op == Opcode::Phi) { + std::destroy_at(&phi_args); + } else { + std::destroy_at(&args); + } +} + bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: @@ -71,7 +88,10 @@ bool Inst::IsPseudoInstruction() const noexcept { } } -bool Inst::AreAllArgsImmediates() const noexcept { +bool Inst::AreAllArgsImmediates() const { + if (op == Opcode::Phi) { + throw LogicError("Testing for all arguments are immediates on phi instruction"); + } return std::all_of(args.begin(), args.begin() + NumArgs(), [](const IR::Value& value) { return value.IsImmediate(); }); } @@ -101,7 +121,7 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { } size_t Inst::NumArgs() const { - return NumArgsOf(op); + return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); } IR::Type Inst::Type() const { @@ -109,13 +129,23 @@ IR::Type Inst::Type() const { } Value Inst::Arg(size_t index) const { - if (index >= NumArgsOf(op)) { - throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + if (op == Opcode::Phi) { + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction", index); + } + return phi_args[index].second; + } else { + if (index >= NumArgsOf(op)) { + throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); + } + return args[index]; } - return args[index]; } void Inst::SetArg(size_t index, Value value) { + if (op == Opcode::Phi) { + throw LogicError("Setting argument on a phi instruction"); + } if (index >= NumArgsOf(op)) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); } @@ -128,15 +158,21 @@ void Inst::SetArg(size_t index, Value value) { args[index] = value; } -std::span> Inst::PhiOperands() const noexcept { - return phi_operands; +Block* Inst::PhiBlock(size_t index) const { + if (op != Opcode::Phi) { + throw LogicError("{} is not a Phi instruction", op); + } + if (index >= phi_args.size()) { + throw InvalidArgument("Out of bounds argument index {} in phi instruction"); + } + return phi_args[index].first; } void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } - phi_operands.emplace_back(predecessor, value); + phi_args.emplace_back(predecessor, value); } void Inst::Invalidate() { @@ -145,18 +181,22 @@ void Inst::Invalidate() { } void Inst::ClearArgs() { - for (auto& value : args) { - if (!value.IsImmediate()) { - UndoUse(value); + if (op == Opcode::Phi) { + for (auto& pair : phi_args) { + IR::Value& value{pair.second}; + if (!value.IsImmediate()) { + UndoUse(value); + } } - value = {}; - } - for (auto& [phi_block, phi_op] : phi_operands) { - if (!phi_op.IsImmediate()) { - UndoUse(phi_op); + phi_args.clear(); + } else { + for (auto& value : args) { + if (!value.IsImmediate()) { + UndoUse(value); + } + value = {}; } } - phi_operands.clear(); } void Inst::ReplaceUsesWith(Value replacement) { @@ -167,24 +207,29 @@ void Inst::ReplaceUsesWith(Value replacement) { if (!replacement.IsImmediate()) { Use(replacement); } - args[0] = replacement; + if (op == Opcode::Phi) { + phi_args[0].second = replacement; + } else { + args[0] = replacement; + } } void Inst::Use(const Value& value) { - ++value.Inst()->use_count; + Inst* const inst{value.Inst()}; + ++inst->use_count; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(value.Inst()->zero_inst, this); + SetPseudoInstruction(inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(value.Inst()->sign_inst, this); + SetPseudoInstruction(inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(value.Inst()->carry_inst, this); + SetPseudoInstruction(inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(value.Inst()->overflow_inst, this); + SetPseudoInstruction(inst->overflow_inst, this); break; default: break; @@ -192,20 +237,21 @@ void Inst::Use(const Value& value) { } void Inst::UndoUse(const Value& value) { - --value.Inst()->use_count; + Inst* const inst{value.Inst()}; + --inst->use_count; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(value.Inst()->zero_inst, Opcode::GetZeroFromOp); + RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(value.Inst()->sign_inst, Opcode::GetSignFromOp); + RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(value.Inst()->carry_inst, Opcode::GetCarryFromOp); + RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(value.Inst()->overflow_inst, Opcode::GetOverflowFromOp); + RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 80baffb2e..ddf0f90a9 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -6,8 +6,8 @@ #include #include -#include #include +#include #include #include @@ -25,7 +25,14 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} + explicit Inst(Opcode op_, u64 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { @@ -50,26 +57,26 @@ public: [[nodiscard]] bool IsPseudoInstruction() const noexcept; /// Determines if all arguments of this instruction are immediates. - [[nodiscard]] bool AreAllArgsImmediates() const noexcept; + [[nodiscard]] bool AreAllArgsImmediates() const; /// Determines if there is a pseudo-operation associated with this instruction. [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); - /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const; - /// Get the type this instruction returns. [[nodiscard]] IR::Type Type() const; + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const; + /// Get the value of a given argument index. [[nodiscard]] Value Arg(size_t index) const; /// Set the value of a given argument index. void SetArg(size_t index, Value value); - /// Get an immutable span to the phi operands. - [[nodiscard]] std::span> PhiOperands() const noexcept; + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; /// Add phi operand to a phi instruction. void AddPhiOperand(Block* predecessor, const Value& value); @@ -87,18 +94,26 @@ public: } private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + void Use(const Value& value); void UndoUse(const Value& value); IR::Opcode op{}; int use_count{}; - std::array args{}; + u64 flags{}; + union { + NonTriviallyDummy dummy{}; + std::array args; + std::vector> phi_args; + }; Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; - std::vector> phi_operands; - u64 flags{}; }; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6eb105d92..82b04f37c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -3,9 +3,9 @@ // Refer to the license.txt file included. // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +OPCODE(Phi, Opaque, ) OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Phi, Opaque, /*todo*/ ) // Control flow OPCODE(Branch, Void, Label, ) -- cgit v1.2.3 From 6dafb08f52ac78119669a698c4b9a39bffd48f8f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Feb 2021 04:47:53 -0300 Subject: shader: Better constant folding --- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 50c6a83cd..da33ff6f1 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -87,7 +87,7 @@ static std::string ArgToIndex(const std::map& block_to_ind } switch (arg.Type()) { case Type::U1: - return fmt::format("#{}", arg.U1() ? '1' : '0'); + return fmt::format("#{}", arg.U1() ? "true" : "false"); case Type::U8: return fmt::format("#{}", arg.U8()); case Type::U16: -- cgit v1.2.3 From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 ++++++------ src/shader_recompiler/frontend/ir/opcodes.inc | 12 ++++-------- .../frontend/maxwell/translate/translate.cpp | 10 +++++----- 3 files changed, 15 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9d7dc034c..ada0be834 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -130,27 +130,27 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { } U32 IREmitter::WorkgroupIdX() { - return Inst(Opcode::WorkgroupIdX); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } U32 IREmitter::WorkgroupIdY() { - return Inst(Opcode::WorkgroupIdY); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)}; } U32 IREmitter::WorkgroupIdZ() { - return Inst(Opcode::WorkgroupIdZ); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } U32 IREmitter::LocalInvocationIdX() { - return Inst(Opcode::LocalInvocationIdX); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } U32 IREmitter::LocalInvocationIdY() { - return Inst(Opcode::LocalInvocationIdY); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)}; } U32 IREmitter::LocalInvocationIdZ() { - return Inst(Opcode::LocalInvocationIdZ); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } U32 IREmitter::LoadGlobalU8(const U64& address) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 82b04f37c..5dc65f2df 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -21,9 +21,9 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) @@ -32,12 +32,8 @@ OPCODE(SetZFlag, Void, U1, OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupIdX, U32, ) -OPCODE(WorkgroupIdY, U32, ) -OPCODE(WorkgroupIdZ, U32, ) -OPCODE(LocalInvocationIdX, U32, ) -OPCODE(LocalInvocationIdY, U32, ) -OPCODE(LocalInvocationIdZ, U32, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined OPCODE(Undef1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index dcc3f6c0e..7e6bb07a2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -11,15 +11,15 @@ namespace Shader::Maxwell { -template +template static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { - using MethodType = decltype(visitor_method); + using MethodType = decltype(method); if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(pc, insn); + (visitor.*method)(pc, insn); } else if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(insn); + (visitor.*method)(insn); } else { - (visitor.*visitor_method)(); + (visitor.*method)(); } } -- cgit v1.2.3 From 9170200a11715d131645d1ffb92e86e6ef0d7e88 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 11 Feb 2021 16:39:06 -0300 Subject: shader: Initial implementation of an AST --- src/shader_recompiler/frontend/ir/basic_block.cpp | 64 +- src/shader_recompiler/frontend/ir/basic_block.h | 40 +- src/shader_recompiler/frontend/ir/condition.cpp | 14 +- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/function.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 43 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 23 +- .../frontend/ir/microinstruction.cpp | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 16 +- .../frontend/ir/structured_control_flow.cpp | 742 +++++++++++++++++++++ .../frontend/ir/structured_control_flow.h | 22 + .../frontend/maxwell/control_flow.cpp | 426 +++++------- .../frontend/maxwell/control_flow.h | 77 +-- src/shader_recompiler/frontend/maxwell/location.h | 12 +- src/shader_recompiler/frontend/maxwell/program.cpp | 69 +- src/shader_recompiler/frontend/maxwell/program.h | 2 +- .../frontend/maxwell/termination_code.cpp | 86 --- .../frontend/maxwell/termination_code.h | 17 - .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../frontend/maxwell/translate/translate.cpp | 17 +- .../frontend/maxwell/translate/translate.h | 7 +- 21 files changed, 1152 insertions(+), 535 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/termination_code.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index da33ff6f1..b5616f394 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -17,6 +17,8 @@ namespace Shader::IR { Block::Block(ObjectPool& inst_pool_, u32 begin, u32 end) : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} +Block::Block(ObjectPool& inst_pool_) : Block{inst_pool_, 0, 0} {} + Block::~Block() = default; void Block::AppendNewInst(Opcode op, std::initializer_list args) { @@ -38,8 +40,25 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::AddImmediatePredecessor(IR::Block* immediate_predecessor) { - imm_predecessors.push_back(immediate_predecessor); +void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { + branch_cond = cond; + branch_true = branch_true_; + branch_false = branch_false_; +} + +void Block::SetBranch(Block* branch) { + branch_cond = Condition{true}; + branch_true = branch; +} + +void Block::SetReturn() { + branch_cond = Condition{true}; + branch_true = nullptr; + branch_false = nullptr; +} + +bool Block::IsVirtual() const noexcept { + return location_begin == location_end; } u32 Block::LocationBegin() const noexcept { @@ -58,6 +77,12 @@ const Block::InstructionList& Block::Instructions() const noexcept { return instructions; } +void Block::AddImmediatePredecessor(Block* block) { + if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { + imm_predecessors.push_back(block); + } +} + std::span Block::ImmediatePredecessors() const noexcept { return imm_predecessors; } @@ -70,8 +95,17 @@ static std::string BlockToIndex(const std::map& block_to_i return fmt::format("$", reinterpret_cast(block)); } +static size_t InstIndex(std::map& inst_to_index, size_t& inst_index, + const Inst* inst) { + const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; + if (is_inserted) { + ++inst_index; + } + return it->second; +} + static std::string ArgToIndex(const std::map& block_to_index, - const std::map& inst_to_index, + std::map& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return ""; @@ -80,10 +114,7 @@ static std::string ArgToIndex(const std::map& block_to_ind return BlockToIndex(block_to_index, arg.Label()); } if (!arg.IsImmediate()) { - if (const auto it{inst_to_index.find(arg.Inst())}; it != inst_to_index.end()) { - return fmt::format("%{}", it->second); - } - return fmt::format("%", reinterpret_cast(arg.Inst())); + return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { case Type::U1: @@ -125,14 +156,14 @@ std::string DumpBlock(const Block& block, const std::map& const Opcode op{inst.Opcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); if (TypeOf(op) != Type::Void) { - ret += fmt::format("%{:<5} = {}", inst_index, op); + ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); } else { ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces } - const size_t arg_count{NumArgsOf(op)}; + const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, arg)}; + const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_index, @@ -140,10 +171,12 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += arg_str; } - const Type actual_type{arg.Type()}; - const Type expected_type{ArgTypeOf(op, arg_index)}; - if (!AreTypesCompatible(actual_type, expected_type)) { - ret += fmt::format("", actual_type, expected_type); + if (op != Opcode::Phi) { + const Type actual_type{arg.Type()}; + const Type expected_type{ArgTypeOf(op, arg_index)}; + if (!AreTypesCompatible(actual_type, expected_type)) { + ret += fmt::format("", actual_type, expected_type); + } } } if (TypeOf(op) != Type::Void) { @@ -151,9 +184,6 @@ std::string DumpBlock(const Block& block, const std::map& } else { ret += '\n'; } - - inst_to_index.emplace(&inst, inst_index); - ++inst_index; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ec3ad6263..3205705e7 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,7 +11,9 @@ #include +#include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" namespace Shader::IR { @@ -26,6 +28,7 @@ public: using const_reverse_iterator = InstructionList::const_reverse_iterator; explicit Block(ObjectPool& inst_pool_, u32 begin, u32 end); + explicit Block(ObjectPool& inst_pool_); ~Block(); Block(const Block&) = delete; @@ -41,9 +44,15 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u64 flags = 0); - /// Adds a new immediate predecessor to the basic block. - void AddImmediatePredecessor(IR::Block* immediate_predecessor); + /// Set the branches to jump to when all instructions have executed. + void SetBranches(Condition cond, Block* branch_true, Block* branch_false); + /// Set the branch to unconditionally jump to when all instructions have executed. + void SetBranch(Block* branch); + /// Mark the block as a return block. + void SetReturn(); + /// Returns true when the block does not implement any guest instructions directly. + [[nodiscard]] bool IsVirtual() const noexcept; /// Gets the starting location of this basic block. [[nodiscard]] u32 LocationBegin() const noexcept; /// Gets the end location for this basic block. @@ -54,8 +63,23 @@ public: /// Gets an immutable reference to the instruction list for this basic block. [[nodiscard]] const InstructionList& Instructions() const noexcept; + /// Adds a new immediate predecessor to this basic block. + void AddImmediatePredecessor(Block* block); /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + + [[nodiscard]] Condition BranchCondition() const noexcept { + return branch_cond; + } + [[nodiscard]] bool IsTerminationBlock() const noexcept { + return !branch_true && !branch_false; + } + [[nodiscard]] Block* TrueBranch() const noexcept { + return branch_true; + } + [[nodiscard]] Block* FalseBranch() const noexcept { + return branch_false; + } [[nodiscard]] bool empty() const { return instructions.empty(); @@ -129,10 +153,18 @@ private: /// List of instructions in this block InstructionList instructions; + /// Condition to choose the branch to take + Condition branch_cond{true}; + /// Block to jump into when the branch condition evaluates as true + Block* branch_true{nullptr}; + /// Block to jump into when the branch condition evaluates as false + Block* branch_false{nullptr}; /// Block immediate predecessors - std::vector imm_predecessors; + std::vector imm_predecessors; }; +using BlockList = std::vector; + [[nodiscard]] std::string DumpBlock(const Block& block); [[nodiscard]] std::string DumpBlock(const Block& block, diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index edff35dc7..ec1659e2b 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -16,15 +16,13 @@ std::string NameOf(Condition condition) { ret = fmt::to_string(condition.FlowTest()); } const auto [pred, negated]{condition.Pred()}; - if (pred != Pred::PT || negated) { - if (!ret.empty()) { - ret += '&'; - } - if (negated) { - ret += '!'; - } - ret += fmt::to_string(pred); + if (!ret.empty()) { + ret += '&'; } + if (negated) { + ret += '!'; + } + ret += fmt::to_string(pred); return ret; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 52737025c..16b4ae888 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -26,7 +26,7 @@ public: explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept : Condition(FlowTest::T, pred_, pred_negated_) {} - Condition(bool value) : Condition(Pred::PT, !value) {} + explicit Condition(bool value) : Condition(Pred::PT, !value) {} auto operator<=>(const Condition&) const noexcept = default; diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index bba7d1d39..fd7d56419 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -11,7 +11,7 @@ namespace Shader::IR { struct Function { - boost::container::small_vector blocks; + BlockList blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ada0be834..30932043f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -44,24 +44,27 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(IR::Block* label) { +void IREmitter::Branch(Block* label) { + label->AddImmediatePredecessor(block); Inst(Opcode::Branch, label); } -void IREmitter::BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label) { - Inst(Opcode::BranchConditional, cond, true_label, false_label); +void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + true_label->AddImmediatePredecessor(block); + false_label->AddImmediatePredecessor(block); + Inst(Opcode::BranchConditional, condition, true_label, false_label); } -void IREmitter::Exit() { - Inst(Opcode::Exit); +void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { + Inst(Opcode::LoopMerge, merge_block, continue_target); } -void IREmitter::Return() { - Inst(Opcode::Return); +void IREmitter::SelectionMerge(Block* merge_block) { + Inst(Opcode::SelectionMerge, merge_block); } -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); +void IREmitter::Return() { + Inst(Opcode::Return); } U32 IREmitter::GetReg(IR::Reg reg) { @@ -81,6 +84,14 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +U1 IREmitter::GetGotoVariable(u32 id) { + return Inst(Opcode::GetGotoVariable, id); +} + +void IREmitter::SetGotoVariable(u32 id, const U1& value) { + Inst(Opcode::SetGotoVariable, id, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } @@ -121,6 +132,20 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } +U1 IREmitter::Condition(IR::Condition cond) { + if (cond == IR::Condition{true}) { + return Imm1(true); + } else if (cond == IR::Condition{false}) { + return Imm1(false); + } + const FlowTest flow_test{cond.FlowTest()}; + const auto [pred, is_negated]{cond.Pred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } + throw NotImplementedException("Condition {}", cond); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute) { return Inst(Opcode::GetAttribute, attribute); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bfd9916cc..4decb46bc 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -16,11 +16,11 @@ namespace Shader::IR { class IREmitter { public: - explicit IREmitter(Block& block_) : block{block_}, insertion_point{block.end()} {} + explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) - : block{block_}, insertion_point{insertion_point_} {} + : block{&block_}, insertion_point{insertion_point_} {} - Block& block; + Block* block; [[nodiscard]] U1 Imm1(bool value) const; [[nodiscard]] U8 Imm8(u8 value) const; @@ -31,11 +31,11 @@ public: [[nodiscard]] U64 Imm64(u64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(IR::Block* label); - void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); - void Exit(); + void Branch(Block* label); + void BranchConditional(const U1& condition, Block* true_label, Block* false_label); + void LoopMerge(Block* merge_block, Block* continue_target); + void SelectionMerge(Block* merge_block); void Return(); - void Unreachable(); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -43,6 +43,9 @@ public: [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); void SetPred(IR::Pred pred, const U1& value); + [[nodiscard]] U1 GetGotoVariable(u32 id); + void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); @@ -55,6 +58,8 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); + [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); @@ -168,7 +173,7 @@ private: template T Inst(Opcode op, Args... args) { - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; return T{Value{&*it}}; } @@ -184,7 +189,7 @@ private: T Inst(Opcode op, Flags flags, Args... args) { u64 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); - auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; + auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; } }; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index e7ca92039..b4ae371bd 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -51,9 +51,9 @@ bool Inst::MayHaveSideEffects() const noexcept { switch (op) { case Opcode::Branch: case Opcode::BranchConditional: - case Opcode::Exit: + case Opcode::LoopMerge: + case Opcode::SelectionMerge: case Opcode::Return: - case Opcode::Unreachable: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5dc65f2df..ede5e20c2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -10,15 +10,17 @@ OPCODE(Identity, Opaque, Opaq // Control flow OPCODE(Branch, Void, Label, ) OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(Exit, Void, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) -OPCODE(Unreachable, Void, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) OPCODE(GetPred, U1, Pred, ) OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) @@ -36,11 +38,11 @@ OPCODE(WorkgroupId, U32x3, OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(Undef1, U1, ) -OPCODE(Undef8, U8, ) -OPCODE(Undef16, U16, ) -OPCODE(Undef32, U32, ) -OPCODE(Undef64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations OPCODE(LoadGlobalU8, U32, U64, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp new file mode 100644 index 000000000..2e9ce2525 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -0,0 +1,742 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook>; + +using Tree = boost::intrusive::list, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + Block* code; + Node label; + Tree children; + Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return ""; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) + : pool{stmt_pool} { + std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; + fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector BuildUnorderedTreeGetGotos(std::span blocks) { + // Assume all blocks have two branches + std::vector gotos; + gotos.reserve(blocks.size() * 2); + + const std::unordered_map labels_map{BuildLabels(blocks)}; + Tree& root{root_stmt.children}; + auto insert_point{root.begin()}; + for (Block* const block : blocks) { + ++insert_point; // Skip label + ++insert_point; // Skip set variable + root.insert(insert_point, *pool.Create(block, &root_stmt)); + + if (block->IsTerminationBlock()) { + root.insert(insert_point, *pool.Create(Return{})); + continue; + } + const Condition cond{block->BranchCondition()}; + Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; + if (cond == Condition{true} || cond == Condition{false}) { + const bool is_true{cond == Condition{true}}; + const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; + const Node label{labels_map.at(branch)}; + Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_stmt)); + } else { + Statement* const ident_cond{pool.Create(Identity{}, cond)}; + const Node true_label{labels_map.at(block->TrueBranch())}; + const Node false_label{labels_map.at(block->FalseBranch())}; + Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; + Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; + gotos.push_back(root.insert(insert_point, *goto_true)); + gotos.push_back(root.insert(insert_point, *goto_false)); + } + } + return gotos; + } + + std::unordered_map BuildLabels(std::span blocks) { + // TODO: Consider storing labels intrusively inside the block + std::unordered_map labels_map; + Tree& root{root_stmt.children}; + u32 label_id{0}; + for (const Block* const block : blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + labels_map.emplace(block, root.insert(root.end(), *label)); + Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; + root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + ++label_id; + } + return labels_map; + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + // Update nested if condition + switch (label_nested_stmt->type) { + case StatementType::If: + label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool& pool; + Statement root_stmt{FunctionTag{}}; +}; + +Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, + ObjectPool& stmt_pool_, Statement& root_stmt, + const std::function& func_, BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, Block* continue_block, Block* break_block) { + Tree& tree{parent.children}; + Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IREmitter ir{*current_block}; + ir.Branch(stmt.code); + } + current_block = stmt.code; + func(stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + Block* const first_if_block{block_list.at(first_block_index)}; + IREmitter ir{*current_block}; + const U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + Block* const new_continue_block{block_pool.Create(inst_pool)}; + Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + Block* const first_loop_block{block_list.at(first_block_index)}; + IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IREmitter continue_ir{*new_continue_block}; + const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + Block* const skip_block{MergeBlock(parent, stmt)}; + + IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IREmitter ir{*current_block}; + ir.Branch(continue_block); + } + } + + Block* MergeBlock(Statement& parent, Statement& stmt) { + if (Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool& stmt_pool; + ObjectPool& inst_pool; + ObjectPool& block_pool; + const std::function& func; + BlockList& block_list; +}; +} // Anonymous namespace + +BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span unordered_blocks, + const std::function& func) { + ObjectPool stmt_pool; + GotoPass goto_pass{unordered_blocks, stmt_pool}; + BlockList block_list; + TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), + func, block_list}; + return block_list; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h new file mode 100644 index 000000000..a574c24f7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { + +[[nodiscard]] BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span unordered_blocks, + const std::function& func); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 21ee98137..e766b555b 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -17,38 +17,49 @@ #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { +namespace { +struct Compare { + bool operator()(const Block& lhs, Location rhs) const noexcept { + return lhs.begin < rhs; + } + + bool operator()(Location lhs, const Block& rhs) const noexcept { + return lhs < rhs.begin; + } + + bool operator()(const Block& lhs, const Block& rhs) const noexcept { + return lhs.begin < rhs.begin; + } +}; +} // Anonymous namespace static u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static std::array Split(Block&& block, Location pc, BlockId new_id) { - if (pc <= block.begin || pc >= block.end) { +static void Split(Block* old_block, Block* new_block, Location pc) { + if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - return { - Block{ - .begin{block.begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{block.id}, - .stack{block.stack}, - .cond{true}, - .branch_true{new_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }, - Block{ - .begin{pc}, - .end{block.end}, - .end_class{block.end_class}, - .id{new_id}, - .stack{std::move(block.stack)}, - .cond{block.cond}, - .branch_true{block.branch_true}, - .branch_false{block.branch_false}, - .imm_predecessors{}, - }, + *new_block = Block{ + .begin{pc}, + .end{old_block->end}, + .end_class{old_block->end_class}, + .stack{old_block->stack}, + .cond{old_block->cond}, + .branch_true{old_block->branch_true}, + .branch_false{old_block->branch_false}, + .ir{nullptr}, + }; + *old_block = Block{ + .begin{old_block->begin}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{std::move(old_block->stack)}, + .cond{IR::Condition{true}}, + .branch_true{new_block}, + .branch_false{nullptr}, + .ir{nullptr}, }; } @@ -112,7 +123,7 @@ static bool HasFlowTest(Opcode opcode) { static std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { - return fmt::format("\"Virtual {}\"", block.id); + return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } @@ -158,126 +169,23 @@ bool Block::Contains(Location pc) const noexcept { Function::Function(Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block_id{0}, + .block{nullptr}, .stack{}, }} {} -void Function::BuildBlocksMap() { - const size_t num_blocks{NumBlocks()}; - blocks_map.resize(num_blocks); - for (size_t block_index = 0; block_index < num_blocks; ++block_index) { - Block& block{blocks_data[block_index]}; - blocks_map[block.id] = █ - } -} - -void Function::BuildImmediatePredecessors() { - for (const Block& block : blocks_data) { - if (block.branch_true != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_true]->imm_predecessors.push_back(block.id); - } - if (block.branch_false != UNREACHABLE_BLOCK_ID) { - blocks_map[block.branch_false]->imm_predecessors.push_back(block.id); - } - } -} - -void Function::BuildPostOrder() { - boost::container::small_vector block_stack; - post_order_map.resize(NumBlocks()); - - Block& first_block{blocks_data[blocks.front()]}; - first_block.post_order_visited = true; - block_stack.push_back(first_block.id); - - const auto visit_branch = [&](BlockId block_id, BlockId branch_id) { - if (branch_id == UNREACHABLE_BLOCK_ID) { - return false; - } - if (blocks_map[branch_id]->post_order_visited) { - return false; - } - blocks_map[branch_id]->post_order_visited = true; - - // Calling push_back twice is faster than insert on msvc - block_stack.push_back(block_id); - block_stack.push_back(branch_id); - return true; - }; - while (!block_stack.empty()) { - const Block* const block{blocks_map[block_stack.back()]}; - block_stack.pop_back(); - - if (!visit_branch(block->id, block->branch_true) && - !visit_branch(block->id, block->branch_false)) { - post_order_map[block->id] = static_cast(post_order_blocks.size()); - post_order_blocks.push_back(block->id); - } - } -} - -void Function::BuildImmediateDominators() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto reverse_order_but_first{std::views::reverse | std::views::drop(1) | transform_block_id}; - auto has_idom{std::views::filter([](Block* block) { return block->imm_dominator; })}; - auto intersect{[this](Block* finger1, Block* finger2) { - while (finger1 != finger2) { - while (post_order_map[finger1->id] < post_order_map[finger2->id]) { - finger1 = finger1->imm_dominator; - } - while (post_order_map[finger2->id] < post_order_map[finger1->id]) { - finger2 = finger2->imm_dominator; - } - } - return finger1; - }}; - for (Block& block : blocks_data) { - block.imm_dominator = nullptr; - } - Block* const start_block{&blocks_data[blocks.front()]}; - start_block->imm_dominator = start_block; - - bool changed{true}; - while (changed) { - changed = false; - for (Block* const block : post_order_blocks | reverse_order_but_first) { - Block* new_idom{}; - for (Block* predecessor : block->imm_predecessors | transform_block_id | has_idom) { - new_idom = new_idom ? intersect(predecessor, new_idom) : predecessor; - } - changed |= block->imm_dominator != new_idom; - block->imm_dominator = new_idom; - } - } -} - -void Function::BuildDominanceFrontier() { - auto transform_block_id{std::views::transform([this](BlockId id) { return blocks_map[id]; })}; - auto has_enough_predecessors{[](Block& block) { return block.imm_predecessors.size() >= 2; }}; - for (Block& block : blocks_data | std::views::filter(has_enough_predecessors)) { - for (Block* current : block.imm_predecessors | transform_block_id) { - while (current != block.imm_dominator) { - current->dominance_frontiers.push_back(current->id); - current = current->imm_dominator; - } - } - } -} - -CFG::CFG(Environment& env_, Location start_address) : env{env_} { - VisitFunctions(start_address); - - for (Function& function : functions) { - function.BuildBlocksMap(); - function.BuildImmediatePredecessors(); - function.BuildPostOrder(); - function.BuildImmediateDominators(); - function.BuildDominanceFrontier(); - } -} - -void CFG::VisitFunctions(Location start_address) { +CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) + : env{env_}, block_pool{block_pool_} { functions.emplace_back(start_address); + functions.back().labels.back().block = block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + }); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -294,35 +202,16 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { return; } // Try to find the next block - Function* function{&functions[function_id]}; + Function* const function{&functions[function_id]}; Location pc{label.address}; - const auto next{std::upper_bound(function->blocks.begin(), function->blocks.end(), pc, - [function](Location pc, u32 block_index) { - return pc < function->blocks_data[block_index].begin; - })}; - const auto next_index{std::distance(function->blocks.begin(), next)}; - const bool is_last{next == function->blocks.end()}; - Location next_pc; - BlockId next_id{UNREACHABLE_BLOCK_ID}; - if (!is_last) { - next_pc = function->blocks_data[*next].begin; - next_id = function->blocks_data[*next].id; - } + const auto next_it{function->blocks.upper_bound(pc, Compare{})}; + const bool is_last{next_it == function->blocks.end()}; + Block* const next{is_last ? nullptr : &*next_it}; // Insert before the next block - Block block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .id{label.block_id}, - .stack{std::move(label.stack)}, - .cond{true}, - .branch_true{UNREACHABLE_BLOCK_ID}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - }; + Block* const block{label.block}; // Analyze instructions until it reaches an already visited block or there's a branch bool is_branch{false}; - while (is_last || pc < next_pc) { + while (!next || pc < next->begin) { is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; if (is_branch) { break; @@ -332,43 +221,36 @@ void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { if (!is_branch) { // If the block finished without a branch, // it means that the next instruction is already visited, jump to it - block.end = pc; - block.cond = true; - block.branch_true = next_id; - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = next; + block->branch_false = nullptr; } // Function's pointer might be invalid, resolve it again - function = &functions[function_id]; - const u32 new_block_index = static_cast(function->blocks_data.size()); - function->blocks.insert(function->blocks.begin() + next_index, new_block_index); - function->blocks_data.push_back(std::move(block)); + // Insert the new block + functions[function_id].blocks.insert(*block); } bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { const Location pc{label.address}; Function& function{functions[function_id]}; - const auto it{std::ranges::find_if(function.blocks, [&function, pc](u32 block_index) { - return function.blocks_data[block_index].Contains(pc); - })}; + const auto it{ + std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; if (it == function.blocks.end()) { // Address has not been visited return false; } - Block& block{function.blocks_data[*it]}; - if (block.begin == pc) { - throw LogicError("Dangling branch"); - } - const u32 first_index{*it}; - const u32 second_index{static_cast(function.blocks_data.size())}; - const std::array new_indices{first_index, second_index}; - std::array split_blocks{Split(std::move(block), pc, label.block_id)}; - function.blocks_data[*it] = std::move(split_blocks[0]); - function.blocks_data.push_back(std::move(split_blocks[1])); - function.blocks.insert(function.blocks.erase(it), new_indices.begin(), new_indices.end()); + Block* const visited_block{&*it}; + if (visited_block->begin == pc) { + throw LogicError("Dangling block"); + } + Block* const new_block{label.block}; + Split(visited_block, new_block, pc); + function.blocks.insert(it, *new_block); return true; } -CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Location pc) { +CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { const Instruction inst{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(inst.raw)}; switch (opcode) { @@ -390,12 +272,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); break; case Opcode::RET: - block.end_class = EndClass::Return; + block->end_class = EndClass::Return; break; default: break; } - block.end = pc; + block->end = pc; return AnalysisState::Branch; case Opcode::BRK: case Opcode::CONT: @@ -404,9 +286,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; } - const auto [stack_pc, new_stack]{block.stack.Pop(OpcodeToken(opcode))}; - block.branch_true = AddLabel(block, new_stack, stack_pc, function_id); - block.end = pc; + const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; + block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); + block->end = pc; return AnalysisState::Branch; } case Opcode::PBK: @@ -414,7 +296,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati case Opcode::PEXIT: case Opcode::PLONGJMP: case Opcode::SSY: - block.stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); + block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); @@ -444,51 +326,51 @@ CFG::AnalysisState CFG::AnalyzeInst(Block& block, FunctionId function_id, Locati return AnalysisState::Branch; } -void CFG::AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, +void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond) { - if (block.begin != pc) { + if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later - block.end = pc; - block.cond = true; - block.branch_true = AddLabel(block, block.stack, pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + block->end = pc; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, block->stack, pc, function_id); + block->branch_false = nullptr; return; } - // Impersonate the visited block with a virtual block - // Jump from this virtual to the real conditional instruction and the next instruction - Function& function{functions[function_id]}; - const BlockId conditional_block_id{++function.current_block_id}; - function.blocks.push_back(static_cast(function.blocks_data.size())); - Block& virtual_block{function.blocks_data.emplace_back(Block{ - .begin{}, // Virtual block - .end{}, + // Create a virtual block and a conditional block + Block* const conditional_block{block_pool.Create()}; + Block virtual_block{ + .begin{block->begin.Virtual()}, + .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .id{block.id}, // Impersonating - .stack{block.stack}, + .stack{block->stack}, .cond{cond}, - .branch_true{conditional_block_id}, - .branch_false{UNREACHABLE_BLOCK_ID}, - .imm_predecessors{}, - })}; - // Set the end properties of the conditional instruction and give it a new identity - Block& conditional_block{block}; - conditional_block.end = pc; - conditional_block.end_class = insn_end_class; - conditional_block.id = conditional_block_id; + .branch_true{conditional_block}, + .branch_false{nullptr}, + .ir{nullptr}, + }; + // Save the contents of the visited block in the conditional block + *conditional_block = std::move(*block); + // Impersonate the visited block with a virtual block + *block = std::move(virtual_block); + // Set the end properties of the conditional instruction + conditional_block->end = pc; + conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction - const BlockId endif_block_id{AddLabel(conditional_block, block.stack, pc + 1, function_id)}; + Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block - virtual_block.branch_false = endif_block_id; + block->branch_false = endif_block; // And branch to it from the conditional instruction if it is a branch if (insn_end_class == EndClass::Branch) { - conditional_block.cond = true; - conditional_block.branch_true = endif_block_id; - conditional_block.branch_false = UNREACHABLE_BLOCK_ID; + conditional_block->cond = IR::Condition{true}; + conditional_block->branch_true = endif_block; + conditional_block->branch_false = nullptr; } + // Finally insert the condition block into the list of blocks + functions[function_id].blocks.insert(*conditional_block); } -bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, +bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode) { if (inst.branch.is_cbuf) { throw NotImplementedException("Branch with constant buffer offset"); @@ -500,21 +382,21 @@ bool CFG::AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instr const bool has_flow_test{HasFlowTest(opcode)}; const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - block.cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); - block.branch_false = AddLabel(block, block.stack, pc + 1, function_id); + block->cond = IR::Condition(flow_test, static_cast(pred.index), pred.negated); + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); } else { - block.cond = true; + block->cond = IR::Condition{true}; } return true; } -void CFG::AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, +void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute) { const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - block.branch_true = AddLabel(block, block.stack, bra_pc, function_id); + block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block&, Location, Instruction, bool is_absolute) { +void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } @@ -528,7 +410,7 @@ void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { } } -CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, +CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; const Predicate pred{inst.Pred()}; @@ -537,41 +419,52 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block& block, FunctionId function_id, Locati return AnalysisState::Continue; } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { - if (block.stack.Peek(Token::PEXIT).has_value()) { + if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } - if (const std::optional exit_pc{block.stack.Peek(Token::PEXIT)}) { - const Stack popped_stack{block.stack.Remove(Token::PEXIT)}; - block.cond = true; - block.branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); - block.branch_false = UNREACHABLE_BLOCK_ID; + if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { + const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; + block->cond = IR::Condition{true}; + block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); + block->branch_false = nullptr; return AnalysisState::Branch; } - block.end = pc; - block.end_class = EndClass::Exit; + block->end = pc; + block->end_class = EndClass::Exit; return AnalysisState::Branch; } -BlockId CFG::AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id) { +Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { Function& function{functions[function_id]}; - if (block.begin == pc) { - return block.id; + if (block->begin == pc) { + // Jumps to itself + return block; } - const auto target{std::ranges::find(function.blocks_data, pc, &Block::begin)}; - if (target != function.blocks_data.end()) { - return target->id; + if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { + // Block already exists and it has been visited + return &*it; } - const BlockId block_id{++function.current_block_id}; + // TODO: FIX DANGLING BLOCKS + Block* const new_block{block_pool.Create(Block{ + .begin{pc}, + .end{pc}, + .end_class{EndClass::Branch}, + .stack{stack}, + .cond{IR::Condition{true}}, + .branch_true{nullptr}, + .branch_false{nullptr}, + .ir{nullptr}, + })}; function.labels.push_back(Label{ .address{pc}, - .block_id{block_id}, + .block{new_block}, .stack{std::move(stack)}, }); - return block_id; + return new_block; } std::string CFG::Dot() const { @@ -581,18 +474,12 @@ std::string CFG::Dot() const { for (const Function& function : functions) { dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); dot += fmt::format("\t\tnode [style=filled];\n"); - for (const u32 block_index : function.blocks) { - const Block& block{function.blocks_data[block_index]}; + for (const Block& block : function.blocks) { const std::string name{NameOf(block)}; - const auto add_branch = [&](BlockId branch_id, bool add_label) { - const auto it{std::ranges::find(function.blocks_data, branch_id, &Block::id)}; - dot += fmt::format("\t\t{}->", name); - if (it == function.blocks_data.end()) { - dot += fmt::format("\"Unknown label {}\"", branch_id); - } else { - dot += NameOf(*it); - }; - if (add_label && block.cond != true && block.cond != false) { + const auto add_branch = [&](Block* branch, bool add_label) { + dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); + if (add_label && block.cond != IR::Condition{true} && + block.cond != IR::Condition{false}) { dot += fmt::format(" [label=\"{}\"]", block.cond); } dot += '\n'; @@ -600,10 +487,10 @@ std::string CFG::Dot() const { dot += fmt::format("\t\t{};\n", name); switch (block.end_class) { case EndClass::Branch: - if (block.cond != false) { + if (block.cond != IR::Condition{false}) { add_branch(block.branch_true, true); } - if (block.cond != true) { + if (block.cond != IR::Condition{true}) { add_branch(block.branch_false, false); } break; @@ -619,12 +506,6 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; - case EndClass::Unreachable: - dot += fmt::format("\t\t{}->N{};\n", name, node_uid); - dot += fmt::format( - "\t\tN{} [label=\"Unreachable\"][shape=square][style=stripped];\n", node_uid); - ++node_uid; - break; } } if (function.entrypoint == 8) { @@ -635,10 +516,11 @@ std::string CFG::Dot() const { dot += "\t}\n"; } if (!functions.empty()) { - if (functions.front().blocks.empty()) { + auto& function{functions.front()}; + if (function.blocks.empty()) { dot += "Start;\n"; } else { - dot += fmt::format("\tStart -> {};\n", NameOf(functions.front().blocks_data.front())); + dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); } dot += fmt::format("\tStart [shape=diamond];\n"); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 49b369282..8179787b8 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -11,25 +11,27 @@ #include #include +#include #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/maxwell/instruction.h" #include "shader_recompiler/frontend/maxwell/location.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::IR { +class Block; +} namespace Shader::Maxwell::Flow { -using BlockId = u32; using FunctionId = size_t; -constexpr BlockId UNREACHABLE_BLOCK_ID{static_cast(-1)}; - enum class EndClass { Branch, Exit, Return, - Unreachable, }; enum class Token { @@ -59,58 +61,37 @@ private: boost::container::small_vector entries; }; -struct Block { +struct Block : boost::intrusive::set_base_hook< + // Normal link is ~2.5% faster compared to safe link + boost::intrusive::link_mode> { [[nodiscard]] bool Contains(Location pc) const noexcept; + bool operator<(const Block& rhs) const noexcept { + return begin < rhs.begin; + } + Location begin; Location end; EndClass end_class; - BlockId id; Stack stack; IR::Condition cond; - BlockId branch_true; - BlockId branch_false; - boost::container::small_vector imm_predecessors; - boost::container::small_vector dominance_frontiers; - union { - bool post_order_visited{false}; - Block* imm_dominator; - }; + Block* branch_true; + Block* branch_false; + IR::Block* ir; }; struct Label { Location address; - BlockId block_id; + Block* block; Stack stack; }; struct Function { Function(Location start_address); - void BuildBlocksMap(); - - void BuildImmediatePredecessors(); - - void BuildPostOrder(); - - void BuildImmediateDominators(); - - void BuildDominanceFrontier(); - - [[nodiscard]] size_t NumBlocks() const noexcept { - return static_cast(current_block_id) + 1; - } - Location entrypoint; - BlockId current_block_id{0}; boost::container::small_vector labels; - boost::container::small_vector blocks; - boost::container::small_vector blocks_data; - // Translates from BlockId to block index - boost::container::small_vector blocks_map; - - boost::container::small_vector post_order_blocks; - boost::container::small_vector post_order_map; + boost::intrusive::set blocks; }; class CFG { @@ -120,7 +101,7 @@ class CFG { }; public: - explicit CFG(Environment& env, Location start_address); + explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -133,35 +114,37 @@ public: [[nodiscard]] std::span Functions() const noexcept { return std::span(functions.data(), functions.size()); } + [[nodiscard]] std::span Functions() noexcept { + return std::span(functions.data(), functions.size()); + } private: - void VisitFunctions(Location start_address); - void AnalyzeLabel(FunctionId function_id, Label& label); /// Inspect already visited blocks. /// Return true when the block has already been visited bool InspectVisitedBlocks(FunctionId function_id, const Label& label); - AnalysisState AnalyzeInst(Block& block, FunctionId function_id, Location pc); + AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); - void AnalyzeCondInst(Block& block, FunctionId function_id, Location pc, EndClass insn_end_class, + void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch - bool AnalyzeBranch(Block& block, FunctionId function_id, Location pc, Instruction inst, + bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, Opcode opcode); - void AnalyzeBRA(Block& block, FunctionId function_id, Location pc, Instruction inst, + void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block& block, Location pc, Instruction inst, bool is_absolute); + void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); - AnalysisState AnalyzeEXIT(Block& block, FunctionId function_id, Location pc, Instruction inst); + AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id - BlockId AddLabel(const Block& block, Stack stack, Location pc, FunctionId function_id); + Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); Environment& env; + ObjectPool& block_pool; boost::container::small_vector functions; FunctionId current_function_id{0}; }; diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h index 66b51a19e..26d29eae2 100644 --- a/src/shader_recompiler/frontend/maxwell/location.h +++ b/src/shader_recompiler/frontend/maxwell/location.h @@ -15,7 +15,7 @@ namespace Shader::Maxwell { class Location { - static constexpr u32 VIRTUAL_OFFSET{std::numeric_limits::max()}; + static constexpr u32 VIRTUAL_BIAS{4}; public: constexpr Location() = default; @@ -27,12 +27,18 @@ public: Align(); } + constexpr Location Virtual() const noexcept { + Location virtual_location; + virtual_location.offset = offset - VIRTUAL_BIAS; + return virtual_location; + } + [[nodiscard]] constexpr u32 Offset() const noexcept { return offset; } [[nodiscard]] constexpr bool IsVirtual() const { - return offset == VIRTUAL_OFFSET; + return offset % 8 == VIRTUAL_BIAS; } constexpr auto operator<=>(const Location&) const noexcept = default; @@ -89,7 +95,7 @@ private: offset -= 8 + (offset % 32 == 8 ? 8 : 0); } - u32 offset{VIRTUAL_OFFSET}; + u32 offset{0xcccccccc}; }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8cdd20804..9fa912ed8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,57 +4,58 @@ #include #include +#include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { namespace { -void TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::Function& cfg_function, IR::Function& function, - std::span block_map) { +IR::BlockList TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::Function& cfg_function) { const size_t num_blocks{cfg_function.blocks.size()}; - function.blocks.reserve(num_blocks); - - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - - IR::Block* const ir_block{block_pool.Create(Translate(inst_pool, env, flow_block))}; - block_map[flow_block.id] = ir_block; - function.blocks.emplace_back(ir_block); - } -} - -void EmitTerminationInsts(const Flow::Function& cfg_function, - std::span block_map) { - for (const Flow::BlockId block_id : cfg_function.blocks) { - const Flow::Block& flow_block{cfg_function.blocks_data[block_id]}; - EmitTerminationCode(flow_block, block_map); - } -} - -void TranslateFunction(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::Function& cfg_function, - IR::Function& function) { - std::vector block_map; - block_map.resize(cfg_function.blocks_data.size()); - - TranslateCode(inst_pool, block_pool, env, cfg_function, function, block_map); - EmitTerminationInsts(cfg_function, block_map); + std::vector blocks(cfg_function.blocks.size()); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + const u32 begin{cfg_block.begin.Offset()}; + const u32 end{cfg_block.end.Offset()}; + blocks[i] = block_pool.Create(inst_pool, begin, end); + cfg_block.ir = blocks[i]; + ++i; + }); + std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { + IR::Block* const block{blocks[i]}; + ++i; + if (cfg_block.end_class != Flow::EndClass::Branch) { + block->SetReturn(); + } else if (cfg_block.cond == IR::Condition{true}) { + block->SetBranch(cfg_block.branch_true->ir); + } else if (cfg_block.cond == IR::Condition{false}) { + block->SetBranch(cfg_block.branch_false->ir); + } else { + block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, + cfg_block.branch_false->ir); + } + }); + return IR::VisitAST(inst_pool, block_pool, blocks, + [&](IR::Block* block) { Translate(env, block); }); } } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, const Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg) { IR::Program program; auto& functions{program.functions}; functions.reserve(cfg.Functions().size()); - for (const Flow::Function& cfg_function : cfg.Functions()) { - TranslateFunction(inst_pool, block_pool, env, cfg_function, functions.emplace_back()); + for (Flow::Function& cfg_function : cfg.Functions()) { + functions.push_back(IR::Function{ + .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, + }); } + + fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program)); std::ranges::for_each(functions, Optimization::SsaRewritePass); for (IR::Function& function : functions) { Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h index 3355ab129..542621a1d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ b/src/shader_recompiler/frontend/maxwell/program.h @@ -19,6 +19,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - const Flow::CFG& cfg); + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.cpp b/src/shader_recompiler/frontend/maxwell/termination_code.cpp deleted file mode 100644 index ed5137f20..000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/termination_code.h" - -namespace Shader::Maxwell { - -static void EmitExit(IR::IREmitter& ir) { - ir.Exit(); -} - -static IR::U1 GetFlowTest(IR::FlowTest flow_test, IR::IREmitter& ir) { - switch (flow_test) { - case IR::FlowTest::T: - return ir.Imm1(true); - case IR::FlowTest::F: - return ir.Imm1(false); - case IR::FlowTest::NE: - // FIXME: Verify this - return ir.LogicalNot(ir.GetZFlag()); - case IR::FlowTest::NaN: - // FIXME: Verify this - return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); - default: - throw NotImplementedException("Flow test {}", flow_test); - } -} - -static IR::U1 GetCond(IR::Condition cond, IR::IREmitter& ir) { - const IR::FlowTest flow_test{cond.FlowTest()}; - const auto [pred, pred_negated]{cond.Pred()}; - if (pred == IR::Pred::PT && !pred_negated) { - return GetFlowTest(flow_test, ir); - } - if (flow_test == IR::FlowTest::T) { - return ir.GetPred(pred, pred_negated); - } - return ir.LogicalAnd(ir.GetPred(pred, pred_negated), GetFlowTest(flow_test, ir)); -} - -static void EmitBranch(const Flow::Block& flow_block, std::span block_map, - IR::IREmitter& ir) { - const auto add_immediate_predecessor = [&](Flow::BlockId label) { - block_map[label]->AddImmediatePredecessor(&ir.block); - }; - if (flow_block.cond == true) { - add_immediate_predecessor(flow_block.branch_true); - return ir.Branch(block_map[flow_block.branch_true]); - } - if (flow_block.cond == false) { - add_immediate_predecessor(flow_block.branch_false); - return ir.Branch(block_map[flow_block.branch_false]); - } - add_immediate_predecessor(flow_block.branch_true); - add_immediate_predecessor(flow_block.branch_false); - return ir.BranchConditional(GetCond(flow_block.cond, ir), block_map[flow_block.branch_true], - block_map[flow_block.branch_false]); -} - -void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map) { - IR::Block* const block{block_map[flow_block.id]}; - IR::IREmitter ir(*block); - switch (flow_block.end_class) { - case Flow::EndClass::Branch: - EmitBranch(flow_block, block_map, ir); - break; - case Flow::EndClass::Exit: - EmitExit(ir); - break; - case Flow::EndClass::Return: - ir.Return(); - break; - case Flow::EndClass::Unreachable: - ir.Unreachable(); - break; - } -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/termination_code.h b/src/shader_recompiler/frontend/maxwell/termination_code.h deleted file mode 100644 index 04e044534..000000000 --- a/src/shader_recompiler/frontend/maxwell/termination_code.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" - -namespace Shader::Maxwell { - -/// Emit termination instructions and collect immediate predecessors -void EmitTerminationCode(const Flow::Block& flow_block, std::span block_map); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d4b417d14..b752785d4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -28,7 +28,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { IR::U32 result; if (shl.w != 0) { // When .W is set, the shift value is wrapped - // To emulate this we just have to clamp it ourselves. + // To emulate this we just have to wrap it ourselves. const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; result = v.ir.ShiftLeftLogical(base, shift); } else { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 7e6bb07a2..f1230f58f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,14 +23,13 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -IR::Block Translate(ObjectPool& inst_pool, Environment& env, - const Flow::Block& flow_block) { - IR::Block block{inst_pool, flow_block.begin.Offset(), flow_block.end.Offset()}; - TranslatorVisitor visitor{env, block}; - - const Location pc_end{flow_block.end}; - Location pc{flow_block.begin}; - while (pc != pc_end) { +void Translate(Environment& env, IR::Block* block) { + if (block->IsVirtual()) { + return; + } + TranslatorVisitor visitor{env, *block}; + const Location pc_end{block->LocationEnd()}; + for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { @@ -43,9 +42,7 @@ IR::Block Translate(ObjectPool& inst_pool, Environment& env, default: throw LogicError("Invalid opcode {}", opcode); } - ++pc; } - return block; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index c1c21b278..e1aa2e0f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -6,14 +6,9 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/location.h" -#include "shader_recompiler/object_pool.h" namespace Shader::Maxwell { -[[nodiscard]] IR::Block Translate(ObjectPool& inst_pool, Environment& env, - const Flow::Block& flow_block); +void Translate(Environment& env, IR::Block* block); } // namespace Shader::Maxwell -- cgit v1.2.3 From 8af9297f0972d0aaa8306369c5d04926b886a89e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 01:24:32 -0300 Subject: shader: Misc fixes --- src/shader_recompiler/frontend/ir/basic_block.cpp | 4 ++-- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 ++ src/shader_recompiler/frontend/ir/microinstruction.cpp | 16 +++++++++------- .../frontend/maxwell/translate/impl/integer_add.cpp | 4 ++-- .../maxwell/translate/impl/integer_set_predicate.cpp | 4 ++-- 5 files changed, 17 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index b5616f394..c97626712 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -113,7 +113,7 @@ static std::string ArgToIndex(const std::map& block_to_ind if (arg.IsLabel()) { return BlockToIndex(block_to_index, arg.Label()); } - if (!arg.IsImmediate()) { + if (!arg.IsImmediate() || arg.IsIdentity()) { return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } switch (arg.Type()) { @@ -166,7 +166,7 @@ std::string DumpBlock(const Block& block, const std::map& const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { - ret += fmt::format("[ {}, {} ]", arg_index, + ret += fmt::format("[ {}, {} ]", arg_str, BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); } else { ret += arg_str; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 30932043f..f42489d41 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -46,10 +46,12 @@ F64 IREmitter::Imm64(f64 value) const { void IREmitter::Branch(Block* label) { label->AddImmediatePredecessor(block); + block->SetBranch(label); Inst(Opcode::Branch, label); } void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { + block->SetBranches(IR::Condition{true}, true_label, false_label); true_label->AddImmediatePredecessor(block); false_label->AddImmediatePredecessor(block); Inst(Opcode::BranchConditional, condition, true_label, false_label); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b4ae371bd..9279b9692 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -143,19 +143,21 @@ Value Inst::Arg(size_t index) const { } void Inst::SetArg(size_t index, Value value) { - if (op == Opcode::Phi) { - throw LogicError("Setting argument on a phi instruction"); - } - if (index >= NumArgsOf(op)) { + if (index >= NumArgs()) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); } - if (!args[index].IsImmediate()) { - UndoUse(args[index]); + const IR::Value arg{Arg(index)}; + if (!arg.IsImmediate()) { + UndoUse(arg); } if (!value.IsImmediate()) { Use(value); } - args[index] = value; + if (op == Opcode::Phi) { + phi_args[index].second = value; + } else { + args[index] = value; + } } Block* Inst::PhiBlock(size_t index) const { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 60f79b160..623e78ff8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -76,8 +76,8 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } // Anonymous namespace -void TranslatorVisitor::IADD_reg(u64) { - throw NotImplementedException("IADD (reg)"); +void TranslatorVisitor::IADD_reg(u64 insn) { + IADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::IADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 76c6b5291..1bc9ef363 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -92,8 +92,8 @@ void TranslatorVisitor::ISETP_cbuf(u64 insn) { ISETP(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::ISETP_imm(u64) { - throw NotImplementedException("ISETP_imm"); +void TranslatorVisitor::ISETP_imm(u64 insn) { + ISETP(*this, insn, GetImm20(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From cbfb7d182a4e90e4e263696d1fca35e47d3eabb4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Feb 2021 20:15:42 -0300 Subject: shader: Support SSA loops on IR --- src/shader_recompiler/frontend/ir/function.h | 1 + src/shader_recompiler/frontend/ir/post_order.cpp | 48 ++++++++++++++++++++++ src/shader_recompiler/frontend/ir/post_order.h | 13 ++++++ src/shader_recompiler/frontend/maxwell/program.cpp | 12 ++++-- 4 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/post_order.cpp create mode 100644 src/shader_recompiler/frontend/ir/post_order.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h index fd7d56419..d1f061146 100644 --- a/src/shader_recompiler/frontend/ir/function.h +++ b/src/shader_recompiler/frontend/ir/function.h @@ -12,6 +12,7 @@ namespace Shader::IR { struct Function { BlockList blocks; + BlockList post_order_blocks; }; } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp new file mode 100644 index 000000000..a48b8dec5 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" + +namespace Shader::IR { + +BlockList PostOrder(const BlockList& blocks) { + boost::container::small_vector block_stack; + boost::container::flat_set visited; + + BlockList post_order_blocks; + post_order_blocks.reserve(blocks.size()); + + Block* const first_block{blocks.front()}; + visited.insert(first_block); + block_stack.push_back(first_block); + + const auto visit_branch = [&](Block* block, Block* branch) { + if (!branch) { + return false; + } + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on msvc + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }; + while (!block_stack.empty()) { + Block* const block{block_stack.back()}; + block_stack.pop_back(); + + if (!visit_branch(block, block->TrueBranch()) && + !visit_branch(block, block->FalseBranch())) { + post_order_blocks.push_back(block); + } + } + return post_order_blocks; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h new file mode 100644 index 000000000..30137ff57 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/frontend/ir/basic_block.h" + +namespace Shader::IR { + +BlockList PostOrder(const BlockList& blocks); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 9fa912ed8..dab6d68c0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" @@ -56,11 +57,14 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sun, 14 Feb 2021 22:46:40 -0300 Subject: shader: Add support for forward declarations --- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/basic_block.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++-- .../frontend/ir/microinstruction.cpp | 2 +- src/shader_recompiler/frontend/ir/microinstruction.h | 20 +++++++++++++++++--- src/shader_recompiler/frontend/ir/modifiers.h | 10 +++++----- 6 files changed, 27 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index c97626712..5ae91dd7d 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -26,7 +26,7 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args, u64 flags) { + std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; const auto result_it{instructions.insert(insertion_point, *inst)}; diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3205705e7..778b32e43 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -42,7 +42,7 @@ public: /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, - std::initializer_list args = {}, u64 flags = 0); + std::initializer_list args = {}, u32 flags = 0); /// Set the branches to jump to when all instructions have executed. void SetBranches(Condition cond, Block* branch_true, Block* branch_false); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4decb46bc..24b012a39 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -178,7 +178,7 @@ private: } template - requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v) struct Flags { + requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v) struct Flags { Flags() = default; Flags(T proxy_) : proxy{proxy_} {} @@ -187,7 +187,7 @@ private: template T Inst(Opcode op, Flags flags, Args... args) { - u64 raw_flags{}; + u32 raw_flags{}; std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; return T{Value{&*it}}; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 9279b9692..ee76db9ad 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -31,7 +31,7 @@ static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) inst = nullptr; } -Inst::Inst(IR::Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} { +Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { std::construct_at(&phi_args); } else { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index ddf0f90a9..5b244fa0b 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -12,6 +12,7 @@ #include +#include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/type.h" @@ -25,7 +26,7 @@ constexpr size_t MAX_ARG_COUNT = 4; class Inst : public boost::intrusive::list_base_hook<> { public: - explicit Inst(Opcode op_, u64 flags_) noexcept; + explicit Inst(Opcode op_, u32 flags_) noexcept; ~Inst(); Inst& operator=(const Inst&) = delete; @@ -86,13 +87,25 @@ public: void ReplaceUsesWith(Value replacement); template - requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v) + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; std::memcpy(&ret, &flags, sizeof(ret)); return ret; } + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} @@ -103,7 +116,8 @@ private: IR::Opcode op{}; int use_count{}; - u64 flags{}; + u32 flags{}; + u32 definition{}; union { NonTriviallyDummy dummy{}; std::array args; diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 28bb9e798..c288eede0 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -6,13 +6,13 @@ namespace Shader::IR { -enum class FmzMode { +enum class FmzMode : u8 { None, // Denorms are not flushed, NAN is propagated (nouveau) FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) }; -enum class FpRounding { +enum class FpRounding : u8 { RN, // Round to nearest even, RM, // Round towards negative infinity RP, // Round towards positive infinity @@ -21,8 +21,8 @@ enum class FpRounding { struct FpControl { bool no_contraction{false}; - FpRounding rounding : 8 = FpRounding::RN; - FmzMode fmz_mode : 8 = FmzMode::FTZ; + FpRounding rounding{FpRounding::RN}; + FmzMode fmz_mode{FmzMode::FTZ}; }; -static_assert(sizeof(FpControl) <= sizeof(u64)); +static_assert(sizeof(FpControl) <= sizeof(u32)); } // namespace Shader::IR -- cgit v1.2.3 From d5d468cf2cbe235ee149dbd37951389d2a7e61da Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Feb 2021 00:09:11 -0300 Subject: shader: Improve object pool --- src/shader_recompiler/frontend/ir/structured_control_flow.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index 2e9ce2525..d145095d1 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -269,7 +269,7 @@ bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { class GotoPass { public: - explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) + explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); @@ -554,7 +554,7 @@ private: return offset; } - ObjectPool& pool; + ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -589,7 +589,7 @@ Block* TryFindForwardBlock(const Statement& stmt) { class TranslatePass { public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, - ObjectPool& stmt_pool_, Statement& root_stmt, + ObjectPool& stmt_pool_, Statement& root_stmt, const std::function& func_, BlockList& block_list_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, block_list{block_list_} { @@ -720,7 +720,7 @@ private: return block; } - ObjectPool& stmt_pool; + ObjectPool& stmt_pool; ObjectPool& inst_pool; ObjectPool& block_pool; const std::function& func; @@ -731,7 +731,7 @@ private: BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, std::span unordered_blocks, const std::function& func) { - ObjectPool stmt_pool; + ObjectPool stmt_pool{64}; GotoPass goto_pass{unordered_blocks, stmt_pool}; BlockList block_list; TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), -- cgit v1.2.3 From b5d7279d878211654b4abb165d94af763a365f47 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 04:10:22 -0300 Subject: spirv: Initial bindings support --- src/shader_recompiler/frontend/ir/basic_block.h | 16 ++++++++++++++++ src/shader_recompiler/frontend/ir/program.h | 2 ++ src/shader_recompiler/frontend/maxwell/program.cpp | 7 ++++--- 3 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 778b32e43..b14a35ec5 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,6 +11,7 @@ #include +#include "common/bit_cast.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" @@ -68,6 +69,18 @@ public: /// Gets an immutable span to the immediate predecessors. [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + [[nodiscard]] Condition BranchCondition() const noexcept { return branch_cond; } @@ -161,6 +174,9 @@ private: Block* branch_false{nullptr}; /// Block immediate predecessors std::vector imm_predecessors; + + /// Intrusively stored host definition of this block. + u32 definition{}; }; using BlockList = std::vector; diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index efaf1aa1e..98aab2dc6 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -9,11 +9,13 @@ #include #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { struct Program { boost::container::small_vector functions; + Info info; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dab6d68c0..8331d576c 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -53,21 +53,22 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Tue, 16 Feb 2021 19:49:24 -0300 Subject: shader: Add utility to resolve identities on a value --- src/shader_recompiler/frontend/ir/value.cpp | 7 +++++++ src/shader_recompiler/frontend/ir/value.h | 1 + 2 files changed, 8 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 9ea61813b..718314213 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -79,6 +79,13 @@ IR::Inst* Value::InstRecursive() const { return inst; } +IR::Value Value::Resolve() const { + if (IsIdentity()) { + return inst->Arg(0).Resolve(); + } + return *this; +} + IR::Reg Value::Reg() const { ValidateAccess(Type::Reg); return reg; diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 2f3688c73..8aba0bbf6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -41,6 +41,7 @@ public: [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Attribute Attribute() const; -- cgit v1.2.3 From 4b438f94cf56ac065cb3682a8e6ce1ea8df0ae7e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 19:49:45 -0300 Subject: shader: Simplify ISCADD --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index f92c0bbd6..f06046d4d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -50,12 +50,7 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } // Anonymous namespace void TranslatorVisitor::ISCADD_reg(u64 insn) { - union { - u64 raw; - BitField<20, 8, IR::Reg> op_b; - } const iscadd{insn}; - - ISCADD(*this, insn, X(iscadd.op_b)); + ISCADD(*this, insn, GetReg20(insn)); } void TranslatorVisitor::ISCADD_cbuf(u64) { -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 ++ src/shader_recompiler/frontend/ir/post_order.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +++++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 35 +++++++++++----------- .../maxwell/translate/impl/not_implemented.cpp | 4 --- 7 files changed, 30 insertions(+), 24 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 5ae91dd7d..ec029dfd6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map& block_to_ind return fmt::format("#{}", arg.U32()); case Type::U64: return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); case Type::Reg: return fmt::format("{}", arg.Reg()); case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index a48b8dec5..8709a2ea1 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) { if (!visited.insert(branch).second) { return false; } - // Calling push_back twice is faster than insert on msvc + // Calling push_back twice is faster than insert on MSVC block_stack.push_back(block); block_stack.push_back(branch); return true; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c..8c44ebb29 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d7..8bd468244 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d1068..c3c4b9abd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acb..6b2a1356b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 70 +++++++++++----------- .../frontend/ir/microinstruction.cpp | 4 ++ .../frontend/ir/microinstruction.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- src/shader_recompiler/frontend/ir/program.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../impl/floating_point_conversion_integer.cpp | 62 ++++++++++++++----- .../frontend/maxwell/translate/impl/impl.h | 2 +- 9 files changed, 97 insertions(+), 55 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 16b4ae888..51c2f15cf 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f42489d41..559ab9cca 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -547,11 +547,11 @@ F32 IREmitter::FPSqrt(const F32& value) { F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPSaturate16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPSaturate32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPSaturate64, value); default: ThrowInvalidType(value.Type()); @@ -560,11 +560,11 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPRoundEven16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPRoundEven32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRoundEven64, value); default: ThrowInvalidType(value.Type()); @@ -573,11 +573,11 @@ F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPFloor16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPFloor32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPFloor64, value); default: ThrowInvalidType(value.Type()); @@ -586,11 +586,11 @@ F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPCeil16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPCeil32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPCeil64, value); default: ThrowInvalidType(value.Type()); @@ -599,11 +599,11 @@ F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPTrunc16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPTrunc32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPTrunc64, value); default: ThrowInvalidType(value.Type()); @@ -729,33 +729,33 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS64F64, value); default: ThrowInvalidType(value.Type()); @@ -769,33 +769,33 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU64F64, value); default: ThrowInvalidType(value.Type()); @@ -829,10 +829,10 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { case 64: switch (value.Type()) { case Type::U32: + return Inst(Opcode::ConvertU64U32, value); + case Type::U64: // Nothing to do return value; - case Type::U64: - return Inst(Opcode::ConvertU64U32, value); default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ee76db9ad..d6a9be87d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -216,6 +216,10 @@ void Inst::ReplaceUsesWith(Value replacement) { } } +void Inst::ReplaceOpcode(IR::Opcode opcode) { + op = opcode; +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 5b244fa0b..321393dd7 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -86,6 +86,8 @@ public: void ReplaceUsesWith(Value replacement); + void ReplaceOpcode(IR::Opcode opcode); + template requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ede5e20c2..50da77535 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -119,8 +119,10 @@ OPCODE(PackUint2x32, U64, U32x OPCODE(UnpackUint2x32, U32x2, U64, ) OPCODE(PackFloat2x16, U32, F16x2, ) OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackDouble2x32, U64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, U64, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit OPCODE(GetZeroFromOp, U1, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 0ce99ef2a..8c301c3a1 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -35,4 +35,4 @@ std::string DumpProgram(const Program& program) { return ret; } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8c44ebb29..16cdc12e2 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool dest_format; BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; - BitField<39, 1, Rounding> rounding; + BitField<39, 2, Rounding> rounding; BitField<49, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; @@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) { } } +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; @@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { const size_t bitsize{BitSize(f2i.dest_format)}; const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; - v.X(f2i.dest_reg, result); + if (bitsize == 64) { + const IR::Value vector{v.ir.UnpackUint2x32(result)}; + v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); + v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + } else { + v.X(f2i.dest_reg, result); + } if (f2i.cc != 0) { - v.SetZFlag(v.ir.GetZeroFromOp(result)); - if (is_signed) { - v.SetSFlag(v.ir.GetSignFromOp(result)); - } else { - v.ResetSFlag(); - } - v.ResetCFlag(); - - // TODO: Investigate if out of bound conversions sets the overflow flag - v.ResetOFlag(); + throw NotImplementedException("F2I CC"); } } } // Anonymous namespace @@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) { f2i.base.src_format.Value()); } }()}; - TranslateF2I(*this, insn, op_a); } -void TranslatorVisitor::F2I_cbuf(u64) { - throw NotImplementedException("{}", Opcode::F2I_cbuf); +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetCbufF(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); } void TranslatorVisitor::F2I_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8bd468244..27aba2cf8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { class TranslatorVisitor { public: - explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} Environment& env; IR::IREmitter ir; -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 32 +++++++++++----------- src/shader_recompiler/frontend/ir/ir_emitter.h | 8 +++--- src/shader_recompiler/frontend/ir/modifiers.h | 23 ++++++++++------ .../impl/floating_point_conversion_integer.cpp | 19 ++++++++++--- 4 files changed, 49 insertions(+), 33 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } -F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -129,10 +129,10 @@ public: [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,25 +4,30 @@ #pragma once +#include "common/common_types.h" + namespace Shader::IR { enum class FmzMode : u8 { - None, // Denorms are not flushed, NAN is propagated (nouveau) - FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) - FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) }; enum class FpRounding : u8 { - RN, // Round to nearest even, - RM, // Round towards negative infinity - RP, // Round towards positive infinity - RZ, // Round towards zero + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero }; struct FpControl { bool no_contraction{false}; - FpRounding rounding{FpRounding::RN}; - FmzMode fmz_mode{FmzMode::FTZ}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; }; static_assert(sizeof(FpControl) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } -- cgit v1.2.3 From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: shader: Rename, implement FADD.SAT and P2R (imm) --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 +++--- src/shader_recompiler/frontend/ir/pred.h | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../maxwell/translate/impl/floating_point_add.cpp | 20 +++---- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +- .../translate/impl/floating_point_multiply.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 17 +++++- .../frontend/maxwell/translate/impl/impl.h | 7 ++- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../translate/impl/move_predicate_to_register.cpp | 66 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 12 files changed, 111 insertions(+), 45 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8f120a2f6..34c2f67fb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -468,11 +468,11 @@ F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPAbs16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPAbs32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPAbs64, value); default: ThrowInvalidType(value.Type()); @@ -481,11 +481,11 @@ F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPNeg16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPNeg32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPNeg64, value); default: ThrowInvalidType(value.Type()); @@ -495,10 +495,10 @@ F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { F16F32F64 result{value}; if (abs) { - result = FPAbs(value); + result = FPAbs(result); } if (neg) { - result = FPNeg(value); + result = FPNeg(result); } return result; } diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index c6f2f82bf..4e7f32423 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -19,8 +19,8 @@ enum class Pred : u64 { PT, }; -constexpr size_t NUM_USER_PREDS = 6; -constexpr size_t NUM_PREDS = 7; +constexpr size_t NUM_USER_PREDS = 7; +constexpr size_t NUM_PREDS = 8; [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { return static_cast(pred); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 16cdc12e2..ed5dbf41f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool src_a; } const fadd{insn}; - if (sat) { - throw NotImplementedException("FADD SAT"); - } if (cc) { throw NotImplementedException("FADD CC"); } @@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); } void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20F(insn)); + FADD(*this, insn, GetRegFloat20(insn)); } -void TranslatorVisitor::FADD_cbuf(u64) { - throw NotImplementedException("FADD (cbuf)"); +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FADD_imm(u64) { - throw NotImplementedException("FADD (imm)"); +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); } void TranslatorVisitor::FADD32I(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 4d82a0009..81175627f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) { case SrcFormat::F16: return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; case SrcFormat::F32: - return GetCbufF(insn); + return GetFloatCbuf(insn); case SrcFormat::F64: { return UnpackCbuf(*this, insn); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 1464f2807..758700d3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); + FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 1b1d38be7..5c38d3fc1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20F(insn)); + return FMUL(*this, insn, GetRegFloat20(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 079e3497f..be17bb0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { return ir.BitCast(GetCbuf(insn)); } @@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { return ir.Imm32(value); } +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; + const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 27aba2cf8..4d4cf2ebf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,13 +304,14 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetReg20F(u64 insn); - [[nodiscard]] IR::F32 GetReg39F(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::F32 GetCbufF(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 623e78ff8..1493e1815 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) { IADD(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::IADD_imm(u64) { - throw NotImplementedException("IADD (imm)"); +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::IADD32I(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6b2a1356b..628cf1c14 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) { ThrowNotImplemented(Opcode::OUT_imm); } -void TranslatorVisitor::P2R_reg(u64) { - ThrowNotImplemented(Opcode::P2R_reg); -} - -void TranslatorVisitor::P2R_cbuf(u64) { - ThrowNotImplemented(Opcode::P2R_cbuf); -} - -void TranslatorVisitor::P2R_imm(u64) { - ThrowNotImplemented(Opcode::P2R_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } -- cgit v1.2.3 From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: spirv: Fixes and Intel specific workarounds --- .../frontend/ir/structured_control_flow.cpp | 3 --- src/shader_recompiler/frontend/maxwell/program.cpp | 3 --- .../frontend/maxwell/translate/impl/impl.cpp | 15 +++++++++------ 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index d145095d1..032ac8fda 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -272,11 +272,9 @@ public: explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; - fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); } - fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); } Statement& RootStatement() noexcept { @@ -548,7 +546,6 @@ private: size_t Offset(ConstNode stmt) const { size_t offset{0}; if (!SearchNode(root_stmt.children, stmt, offset)) { - fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); throw LogicError("Node not found in tree"); } return offset; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ed5dbf41f..dbfc04f75 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,7 +56,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const s32 positive_value{static_cast(imm.value)}; - const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + if (imm.is_negative != 0) { + const s64 raw{static_cast(imm.value)}; + return ir.Imm32(static_cast(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast(imm.value)); + } } IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { @@ -94,9 +97,9 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; - const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 value{static_cast(imm.value) << 12}; + return ir.Imm32(Common::BitCast(value | sign_bit)); } IR::U32 TranslatorVisitor::GetImm32(u64 insn) { -- cgit v1.2.3 From 18a766b3622baa40596490dbd4912f94e9980a76 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 02:45:50 -0300 Subject: shader: Fix MOV(reg), add SHL variants and emit neg and abs instructions --- .../frontend/maxwell/translate/impl/integer_shift_left.cpp | 8 ++++---- .../frontend/maxwell/translate/impl/move_register.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index b752785d4..d8a5158b5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -56,12 +56,12 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { } } // Anonymous namespace -void TranslatorVisitor::SHL_reg(u64) { - throw NotImplementedException("SHL_reg"); +void TranslatorVisitor::SHL_reg(u64 insn) { + SHL(*this, insn, GetReg20(insn)); } -void TranslatorVisitor::SHL_cbuf(u64) { - throw NotImplementedException("SHL_cbuf"); +void TranslatorVisitor::SHL_cbuf(u64 insn) { + SHL(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::SHL_imm(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index c3c4b9abd..6bb08db8a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -26,7 +26,7 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - MOV(*this, insn, GetReg8(insn)); + MOV(*this, insn, GetReg20(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { -- cgit v1.2.3 From e44752ddc8804961eb84f8c225bb36d5b4c77bc1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 22:59:16 -0300 Subject: shader: FMUL, select, RRO, and MUFU fixes --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 144 ++++++++++++++++++--- src/shader_recompiler/frontend/ir/ir_emitter.h | 18 ++- src/shader_recompiler/frontend/ir/opcodes.inc | 50 ++++++- .../maxwell/translate/impl/common_encoding.h | 3 +- .../maxwell/translate/impl/floating_point_add.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +- .../impl/floating_point_multi_function.cpp | 8 +- .../translate/impl/floating_point_multiply.cpp | 42 ++++-- .../impl/floating_point_range_reduction.cpp | 41 ++++++ .../frontend/maxwell/translate/impl/impl.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.h | 5 +- .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 12 -- 13 files changed, 277 insertions(+), 66 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 34c2f67fb..8ba86e614 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -361,19 +361,21 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { } } -UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { +Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); } switch (true_value.Type()) { case Type::U8: - return Inst(Opcode::Select8, condition, true_value, false_value); + return Inst(Opcode::SelectU8, condition, true_value, false_value); case Type::U16: - return Inst(Opcode::Select16, condition, true_value, false_value); + return Inst(Opcode::SelectU16, condition, true_value, false_value); case Type::U32: - return Inst(Opcode::Select32, condition, true_value, false_value); + return Inst(Opcode::SelectU32, condition, true_value, false_value); case Type::U64: - return Inst(Opcode::Select64, condition, true_value, false_value); + return Inst(Opcode::SelectU64, condition, true_value, false_value); + case Type::F32: + return Inst(Opcode::SelectF32, condition, true_value, false_value); default: throw InvalidArgument("Invalid type {}", true_value.Type()); } @@ -503,12 +505,16 @@ F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { return result; } -F32 IREmitter::FPCosNotReduced(const F32& value) { - return Inst(Opcode::FPCosNotReduced, value); +F32 IREmitter::FPCos(const F32& value) { + return Inst(Opcode::FPCos, value); +} + +F32 IREmitter::FPSin(const F32& value) { + return Inst(Opcode::FPSin, value); } -F32 IREmitter::FPExp2NotReduced(const F32& value) { - return Inst(Opcode::FPExp2NotReduced, value); +F32 IREmitter::FPExp2(const F32& value) { + return Inst(Opcode::FPExp2, value); } F32 IREmitter::FPLog2(const F32& value) { @@ -517,9 +523,9 @@ F32 IREmitter::FPLog2(const F32& value) { F32F64 IREmitter::FPRecip(const F32F64& value) { switch (value.Type()) { - case Type::U32: + case Type::F32: return Inst(Opcode::FPRecip32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRecip64, value); default: ThrowInvalidType(value.Type()); @@ -528,19 +534,15 @@ F32F64 IREmitter::FPRecip(const F32F64& value) { F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { switch (value.Type()) { - case Type::U32: + case Type::F32: return Inst(Opcode::FPRecipSqrt32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRecipSqrt64, value); default: ThrowInvalidType(value.Type()); } } -F32 IREmitter::FPSinNotReduced(const F32& value) { - return Inst(Opcode::FPSinNotReduced, value); -} - F32 IREmitter::FPSqrt(const F32& value) { return Inst(Opcode::FPSqrt, value); } @@ -610,6 +612,114 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { } } +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs, + rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, + rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, + rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, + lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual16 + : Opcode::FPUnordGreaterThanEqual16, + lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 + : Opcode::FPUnordGreaterThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 + : Opcode::FPUnordGreaterThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 959f4f9da..2c923716a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -98,7 +98,8 @@ public: const Value& e4); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); - [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, + const Value& false_value); template [[nodiscard]] Dest BitCast(const Source& value); @@ -121,12 +122,12 @@ public: [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); - [[nodiscard]] F32 FPCosNotReduced(const F32& value); - [[nodiscard]] F32 FPExp2NotReduced(const F32& value); + [[nodiscard]] F32 FPCos(const F32& value); + [[nodiscard]] F32 FPSin(const F32& value); + [[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value); [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); - [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); @@ -134,6 +135,15 @@ public: [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + bool ordered = true); + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 50da77535..f2d71144a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -103,10 +103,12 @@ OPCODE(CompositeExtractF64x3, F64, F64x OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) // Select operations -OPCODE(Select8, U8, U1, U8, U8, ) -OPCODE(Select16, U16, U1, U16, U16, ) -OPCODE(Select32, U32, U1, U32, U32, ) -OPCODE(Select64, U64, U1, U64, U64, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) @@ -156,11 +158,8 @@ OPCODE(FPRecipSqrt32, F32, F32, OPCODE(FPRecipSqrt64, F64, F64, ) OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) -OPCODE(FPSinNotReduced, F32, F32, ) OPCODE(FPExp2, F32, F32, ) -OPCODE(FPExp2NotReduced, F32, F32, ) OPCODE(FPCos, F32, F32, ) -OPCODE(FPCosNotReduced, F32, F32, ) OPCODE(FPLog2, F32, F32, ) OPCODE(FPSaturate16, F16, F16, ) OPCODE(FPSaturate32, F32, F32, ) @@ -178,6 +177,43 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) + // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h index 3da37a2bb..fd73f656c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -46,7 +46,8 @@ inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { case FmzMode::FTZ: return IR::FmzMode::FTZ; case FmzMode::FMZ: - return IR::FmzMode::FMZ; + // FMZ is manually handled in the instruction + return IR::FmzMode::FTZ; case FmzMode::INVALIDFMZ3: break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 219ffcc6a..76a807d4e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetRegFloat20(insn)); + FADD(*this, insn, GetFloatReg20(insn)); } void TranslatorVisitor::FADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 758700d3c..c2ca0873b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index ba005fbf4..2f8605619 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { -enum class Operation { +enum class Operation : u64 { Cos = 0, Sin = 1, Ex2 = 2, // Base 2 exponent @@ -39,11 +39,11 @@ void TranslatorVisitor::MUFU(u64 insn) { IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: - return ir.FPCosNotReduced(op_a); + return ir.FPCos(op_a); case Operation::Sin: - return ir.FPSinNotReduced(op_a); + return ir.FPSin(op_a); case Operation::Ex2: - return ir.FPExp2NotReduced(op_a); + return ir.FPExp2(op_a); case Operation::Lg2: return ir.FPLog2(op_a); case Operation::Rcp: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 5c38d3fc1..edf2cadae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -55,9 +55,6 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode if (cc) { throw NotImplementedException("FMUL CC"); } - if (sat) { - throw NotImplementedException("FMUL SAT"); - } IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { @@ -71,7 +68,20 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, zero, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fmul.dest_reg, value); } void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -83,27 +93,33 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; - } fmul{insn}; - + } const fmul{insn}; FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetRegFloat20(insn)); + return FMUL(*this, insn, GetFloatReg20(insn)); } -void TranslatorVisitor::FMUL_cbuf(u64) { - throw NotImplementedException("FMUL (cbuf)"); +void TranslatorVisitor::FMUL_cbuf(u64 insn) { + return FMUL(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FMUL_imm(u64) { - throw NotImplementedException("FMUL (imm)"); +void TranslatorVisitor::FMUL_imm(u64 insn) { + return FMUL(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FMUL32I(u64) { - throw NotImplementedException("FMUL32I"); +void TranslatorVisitor::FMUL32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz; + BitField<55, 1, u64> sat; + } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, + fmul32i.sat != 0, fmul32i.cc != 0, false); } } // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + SINCOS, + EX2, +}; + +void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 1, Mode> mode; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + } const rro{insn}; + + v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); +} +} // Anonymous namespace + +void TranslatorVisitor::RRO_reg(u64 insn) { + RRO(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::RRO_cbuf(u64 insn) { + RRO(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::RRO_imm(u64) { + throw NotImplementedException("RRO (imm)"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 165d475b9..a5a0e1a9b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -110,6 +110,14 @@ IR::U32 TranslatorVisitor::GetImm32(u64 insn) { return ir.Imm32(static_cast(imm.value)); } +IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(Common::BitCast(static_cast(imm.value))); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4d4cf2ebf..4e722e205 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,8 +304,8 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); @@ -314,6 +314,7 @@ public: [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d8a5158b5..20af68852 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -50,7 +50,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { // const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; - result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; } v.X(shl.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 628cf1c14..4114e10be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -721,18 +721,6 @@ void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } -void TranslatorVisitor::RRO_reg(u64) { - ThrowNotImplemented(Opcode::RRO_reg); -} - -void TranslatorVisitor::RRO_cbuf(u64) { - ThrowNotImplemented(Opcode::RRO_cbuf); -} - -void TranslatorVisitor::RRO_imm(u64) { - ThrowNotImplemented(Opcode::RRO_imm); -} - void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } -- cgit v1.2.3 From 9d6a98d950da39dd2a7ca5ad25525de4fb825415 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Feb 2021 04:46:39 -0300 Subject: shader: Implement more of XMAD and FFMA32I and fix XMAD.CBCC --- .../impl/floating_point_fused_multiply_add.cpp | 41 +++++++++++++----- .../translate/impl/floating_point_multiply.cpp | 2 + .../translate/impl/integer_short_multiply_add.cpp | 49 ++++++++++++++++------ 3 files changed, 70 insertions(+), 22 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index c2ca0873b..18561bc9c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -17,9 +17,6 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s BitField<8, 8, IR::Reg> src_a; } const ffma{insn}; - if (sat) { - throw NotImplementedException("FFMA SAT"); - } if (cc) { throw NotImplementedException("FFMA CC"); } @@ -31,7 +28,20 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); + IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, op_c, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(ffma.dest_reg, value); } void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { @@ -54,20 +64,31 @@ void TranslatorVisitor::FFMA_reg(u64 insn) { FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_rc(u64) { - throw NotImplementedException("FFMA (rc)"); +void TranslatorVisitor::FFMA_rc(u64 insn) { + FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); } void TranslatorVisitor::FFMA_cr(u64 insn) { FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA_imm(u64) { - throw NotImplementedException("FFMA (imm)"); +void TranslatorVisitor::FFMA_imm(u64 insn) { + FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); } -void TranslatorVisitor::FFMA32I(u64) { - throw NotImplementedException("FFMA32I"); +void TranslatorVisitor::FFMA32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz_mode; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> neg_c; + } const ffma32i{insn}; + + FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, + ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index edf2cadae..72f0a18ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -94,6 +94,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; } const fmul{insn}; + FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } @@ -118,6 +119,7 @@ void TranslatorVisitor::FMUL32I(u64 insn) { BitField<53, 2, FmzMode> fmz; BitField<55, 1, u64> sat; } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, fmul32i.sat != 0, fmul32i.cc != 0, false); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp index 70a7c76c5..2932cdc42 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp @@ -58,7 +58,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s case SelectMode::CHI: return ExtractHalf(v, src_c, Half::H1, false); case SelectMode::CBCC: - return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); + return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); case SelectMode::CSFU: throw NotImplementedException("XMAD CSFU"); } @@ -78,16 +78,44 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s } } // Anonymous namespace -void TranslatorVisitor::XMAD_reg(u64) { - throw NotImplementedException("XMAD (reg)"); +void TranslatorVisitor::XMAD_reg(u64 insn) { + union { + u64 raw; + BitField<35, 1, Half> half_b; + BitField<36, 1, u64> psl; + BitField<37, 1, u64> mrg; + BitField<38, 1, u64> x; + BitField<50, 3, SelectMode> select_mode; + } const xmad{insn}; + + XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } -void TranslatorVisitor::XMAD_rc(u64) { - throw NotImplementedException("XMAD (rc)"); +void TranslatorVisitor::XMAD_rc(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + } const xmad{insn}; + + XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, + xmad.x != 0); } -void TranslatorVisitor::XMAD_cr(u64) { - throw NotImplementedException("XMAD (cr)"); +void TranslatorVisitor::XMAD_cr(u64 insn) { + union { + u64 raw; + BitField<50, 2, SelectMode> select_mode; + BitField<52, 1, Half> half_b; + BitField<54, 1, u64> x; + BitField<55, 1, u64> psl; + BitField<56, 1, u64> mrg; + } const xmad{insn}; + + XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, + xmad.mrg != 0, xmad.x != 0); } void TranslatorVisitor::XMAD_imm(u64 insn) { @@ -97,14 +125,11 @@ void TranslatorVisitor::XMAD_imm(u64 insn) { BitField<36, 1, u64> psl; BitField<37, 1, u64> mrg; BitField<38, 1, u64> x; - BitField<39, 8, IR::Reg> src_c; BitField<50, 3, SelectMode> select_mode; } const xmad{insn}; - const IR::U32 src_b{ir.Imm32(static_cast(xmad.src_b))}; - const IR::U32 src_c{X(xmad.src_c)}; - XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, - xmad.x != 0); + XMAD(*this, insn, ir.Imm32(static_cast(xmad.src_b)), GetReg39(insn), xmad.select_mode, + Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3 From e87a502da2d5a8356a639d53c0a16a77890de4c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 05:21:30 -0300 Subject: shader: Fix control flow --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 27 ++++++++++++++-------- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- .../frontend/ir/structured_control_flow.cpp | 15 ++++++++---- src/shader_recompiler/frontend/ir/value.cpp | 6 ++++- src/shader_recompiler/frontend/ir/value.h | 1 + .../frontend/maxwell/control_flow.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 4 ++-- 8 files changed, 39 insertions(+), 20 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8ba86e614..0209d5540 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -134,18 +134,27 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U1 IREmitter::Condition(IR::Condition cond) { - if (cond == IR::Condition{true}) { - return Imm1(true); - } else if (cond == IR::Condition{false}) { - return Imm1(false); +static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { + switch (flow_test) { + case FlowTest::T: + return ir.Imm1(true); + case FlowTest::F: + return ir.Imm1(false); + case FlowTest::EQ: + // TODO: Test this + return ir.GetZFlag(); + case FlowTest::NE: + // TODO: Test this + return ir.LogicalNot(ir.GetZFlag()); + default: + throw NotImplementedException("Flow test {}", flow_test); } +} + +U1 IREmitter::Condition(IR::Condition cond) { const FlowTest flow_test{cond.FlowTest()}; const auto [pred, is_negated]{cond.Pred()}; - if (flow_test == FlowTest::T) { - return GetPred(pred, is_negated); - } - throw NotImplementedException("Condition {}", cond); + return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } F32 IREmitter::GetAttribute(IR::Attribute attribute) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f2d71144a..289e76f32 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -4,8 +4,8 @@ // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... OPCODE(Phi, Opaque, ) -OPCODE(Void, Void, ) OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow OPCODE(Branch, Void, Label, ) diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index 032ac8fda..bfba55a7e 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -329,7 +329,6 @@ private: if (!sibling) { throw LogicError("Not siblings"); } - // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it @@ -351,9 +350,14 @@ private: const std::unordered_map labels_map{BuildLabels(blocks)}; Tree& root{root_stmt.children}; auto insert_point{root.begin()}; + // Skip all goto variables zero-initialization + std::advance(insert_point, labels_map.size()); + for (Block* const block : blocks) { - ++insert_point; // Skip label - ++insert_point; // Skip set variable + // Skip label + ++insert_point; + // Skip set variable + ++insert_point; root.insert(insert_point, *pool.Create(block, &root_stmt)); if (block->IsTerminationBlock()) { @@ -391,6 +395,7 @@ private: labels_map.emplace(block, root.insert(root.end(), *label)); Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); + root.push_front(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); ++label_id; } return labels_map; @@ -457,10 +462,10 @@ private: } body.erase(goto_stmt); - // Update nested if condition switch (label_nested_stmt->type) { case StatementType::If: - label_nested_stmt->cond = pool.Create(Or{}, neg_var, label_nested_stmt->cond); + // Update nested if condition + label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); break; case StatementType::Loop: break; diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 718314213..791ba2690 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -36,6 +36,10 @@ bool Value::IsIdentity() const noexcept { return type == Type::Opaque && inst->Opcode() == Opcode::Identity; } +bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->Opcode() == Opcode::Phi; +} + bool Value::IsEmpty() const noexcept { return type == Type::Void; } @@ -52,7 +56,7 @@ bool Value::IsLabel() const noexcept { } IR::Type Value::Type() const noexcept { - if (IsIdentity()) { + if (IsIdentity() || IsPhi()) { return inst->Arg(0).Type(); } if (type == Type::Opaque) { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8aba0bbf6..9b7e1480b 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -33,6 +33,7 @@ public: explicit Value(f64 value) noexcept; [[nodiscard]] bool IsIdentity() const noexcept; + [[nodiscard]] bool IsPhi() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e766b555b..52be41b84 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -354,7 +354,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4e722e205..672e140b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -209,7 +209,7 @@ public: void P2R_cbuf(u64 insn); void P2R_imm(u64 insn); void PBK(); - void PCNT(u64 insn); + void PCNT(); void PEXIT(u64 insn); void PIXLD(u64 insn); void PLONGJMP(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4114e10be..3f6dedfdd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -637,8 +637,8 @@ void TranslatorVisitor::PBK() { // PBK is a no-op } -void TranslatorVisitor::PCNT(u64) { - ThrowNotImplemented(Opcode::PCNT); +void TranslatorVisitor::PCNT() { + // PCNT is a no-op } void TranslatorVisitor::PEXIT(u64) { -- cgit v1.2.3 From 622d676202bad317a58529efc3c15d08fd04aad1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Feb 2021 18:32:00 -0300 Subject: shader: Fix conditional execution of exit instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 +++++---- src/shader_recompiler/frontend/maxwell/control_flow.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 52be41b84..d0dc66330 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -322,12 +322,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond) { + EndClass insn_end_class, IR::Condition cond, + bool visit_conditional_inst) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -354,7 +355,7 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = pc + 1; + conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; @@ -423,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 8179787b8..209c9e551 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -128,7 +128,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond); + IR::Condition cond, bool visit_conditional_inst); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, -- cgit v1.2.3 From 8810c88b7e3de2766bf47e07e941fb2c58c6b4b0 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 24 Feb 2021 20:31:15 -0500 Subject: shader: Implement SEL --- .../maxwell/translate/impl/not_implemented.cpp | 12 ------ .../impl/select_source_with_predicate.cpp | 44 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3f6dedfdd..82c73bf8c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -729,18 +729,6 @@ void TranslatorVisitor::SAM(u64) { ThrowNotImplemented(Opcode::SAM); } -void TranslatorVisitor::SEL_reg(u64) { - ThrowNotImplemented(Opcode::SEL_reg); -} - -void TranslatorVisitor::SEL_cbuf(u64) { - ThrowNotImplemented(Opcode::SEL_cbuf); -} - -void TranslatorVisitor::SEL_imm(u64) { - ThrowNotImplemented(Opcode::SEL_imm); -} - void TranslatorVisitor::SETCRSPTR(u64) { ThrowNotImplemented(Opcode::SETCRSPTR); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..25fc6b437 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> op_a; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + } const sel{insn}; + + const IR::U1 pred = v.ir.GetPred(sel.pred); + IR::U32 op_a{v.X(sel.op_a)}; + IR::U32 op_b{src}; + if (sel.neg_pred != 0) { + std::swap(op_a, op_b); + } + const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; + + v.X(sel.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SEL_reg(u64 insn) { + SEL(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SEL_cbuf(u64 insn) { + SEL(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SEL_imm(u64 insn) { + SEL(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell -- cgit v1.2.3 From cc55d289494c991e7e0e456e428a110569708c2e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Feb 2021 00:46:40 -0500 Subject: shader: Implement SHR --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../maxwell/translate/impl/integer_shift_right.cpp | 62 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 5 files changed, 68 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0209d5540..7c3908398 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -804,6 +804,10 @@ U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& co count); } +U32 IREmitter::BitReverse(const U32& value) { + return Inst(Opcode::BitReverse32, value); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2c923716a..f7998e156 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -159,6 +159,7 @@ public: const U32& count); [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, bool is_signed); + [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 289e76f32..f420f1161 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -231,6 +231,7 @@ OPCODE(BitwiseXor32, U32, U32, OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..a34ccb851 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<39, 1, u64> is_wrapped; + BitField<40, 1, u64> brev; + BitField<43, 1, u64> xmode; + BitField<48, 1, u64> is_arithmetic; + } const shr{insn}; + + if (shr.xmode != 0) { + throw NotImplementedException("SHR.XMODE"); + } + + IR::U32 base{v.X(shr.src_reg_a)}; + if (shr.brev == 1) { + base = v.ir.BitReverse(base); + } + IR::U32 result; + const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); + if (shr.is_arithmetic == 1) { + result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; + } else { + result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; + } + + if (shr.is_wrapped == 0) { + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 safe_bits{v.ir.Imm32(32)}; + + const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; + const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; + const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; + } + v.X(shr.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHR_reg(u64 insn) { + SHR(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::SHR_cbuf(u64 insn) { + SHR(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::SHR_imm(u64 insn) { + SHR(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 82c73bf8c..45ed04e25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -757,18 +757,6 @@ void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -void TranslatorVisitor::SHR_reg(u64) { - ThrowNotImplemented(Opcode::SHR_reg); -} - -void TranslatorVisitor::SHR_cbuf(u64) { - ThrowNotImplemented(Opcode::SHR_cbuf); -} - -void TranslatorVisitor::SHR_imm(u64) { - ThrowNotImplemented(Opcode::SHR_imm); -} - void TranslatorVisitor::SSY() { // SSY is a no-op } -- cgit v1.2.3 From a8c41c50d3f7a1c2871487862f68925db8b5e27f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 26 Feb 2021 21:41:46 -0500 Subject: shader: Implement POPC --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../maxwell/translate/impl/integer_popcount.cpp | 36 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -------- 5 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7c3908398..54fdf9559 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -808,6 +808,14 @@ U32 IREmitter::BitReverse(const U32& value) { return Inst(Opcode::BitReverse32, value); } +U32 IREmitter::BitCount(const U32& value) { + return Inst(Opcode::BitCount32, value); +} + +U32 IREmitter::BitwiseNot(const U32& a) { + return Inst(Opcode::BitwiseNot32, a); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f7998e156..9dec22145 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -160,6 +160,8 @@ public: [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, bool is_signed); [[nodiscard]] U32 BitReverse(const U32& value); + [[nodiscard]] U32 BitCount(const U32& value); + [[nodiscard]] U32 BitwiseNot(const U32& a); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f420f1161..59a13e911 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -232,6 +232,8 @@ OPCODE(BitFieldInsert, U32, U32, OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + } const popc{insn}; + + const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); + const IR::U32 result = v.ir.BitCount(operand); + v.X(popc.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::POPC_reg(u64 insn) { + POPC(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::POPC_cbuf(u64 insn) { + POPC(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::POPC_imm(u64 insn) { + POPC(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 45ed04e25..127686b43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -653,18 +653,6 @@ void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } -void TranslatorVisitor::POPC_reg(u64) { - ThrowNotImplemented(Opcode::POPC_reg); -} - -void TranslatorVisitor::POPC_cbuf(u64) { - ThrowNotImplemented(Opcode::POPC_cbuf); -} - -void TranslatorVisitor::POPC_imm(u64) { - ThrowNotImplemented(Opcode::POPC_imm); -} - void TranslatorVisitor::PRET(u64) { ThrowNotImplemented(Opcode::PRET); } -- cgit v1.2.3 From 34ac9b4d7e71198503d7fca88c0494e1f97701e7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Feb 2021 01:45:13 -0500 Subject: shader: Implement BFE --- .../maxwell/translate/impl/bitfield_extract.cpp | 66 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..4a03e6939 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_reg; + BitField<40, 1, u64> brev; + BitField<48, 1, u64> is_signed; + } const bfe{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + + // Common constants + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 one{v.ir.Imm32(1)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + // Edge case conditions + const IR::U1 zero_count{v.ir.IEqual(count, zero)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; + const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; + + IR::U32 base{v.X(bfe.offset_reg)}; + if (bfe.brev != 0) { + base = v.ir.BitReverse(base); + } + IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; + if (bfe.is_signed != 0) { + const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; + const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; + const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; + // Replicate condition + result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; + // Exceeding condition + const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; + result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; + } + // Zero count condition + result = IR::U32{v.ir.Select(zero_count, zero, result)}; + + v.X(bfe.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::BFE_reg(u64 insn) { + BFE(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::BFE_cbuf(u64 insn) { + BFE(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::BFE_imm(u64 insn) { + BFE(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 127686b43..3714f5f4f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -49,18 +49,6 @@ void TranslatorVisitor::BAR(u64) { ThrowNotImplemented(Opcode::BAR); } -void TranslatorVisitor::BFE_reg(u64) { - ThrowNotImplemented(Opcode::BFE_reg); -} - -void TranslatorVisitor::BFE_cbuf(u64) { - ThrowNotImplemented(Opcode::BFE_cbuf); -} - -void TranslatorVisitor::BFE_imm(u64) { - ThrowNotImplemented(Opcode::BFE_imm); -} - void TranslatorVisitor::BFI_reg(u64) { ThrowNotImplemented(Opcode::BFI_reg); } -- cgit v1.2.3 From 08a9e95905fa90d9d2455c9aedf66cebcfc6f6ba Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Feb 2021 16:37:49 -0500 Subject: shader: Implement BFI --- .../maxwell/translate/impl/bitfield_insert.cpp | 56 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 ------- 2 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..ee312c30d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> insert_reg; + } const bfi{insn}; + + const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; + const IR::U32 max_size{v.ir.Imm32(32)}; + + // Edge case conditions + const IR::U1 zero_offset{v.ir.IEqual(offset, v.ir.Imm32(0))}; + const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThanEqual(unsafe_count, max_size, false)}; + + const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; + const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; + + const IR::U32 insert{v.X(bfi.insert_reg)}; + IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; + + result = IR::U32{v.ir.Select(exceed_offset, base, result)}; + result = IR::U32{v.ir.Select(zero_offset, base, result)}; + + v.X(bfi.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::BFI_reg(u64 insn) { + BFI(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_rc(u64 insn) { + BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::BFI_cr(u64 insn) { + BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::BFI_imm(u64 insn) { + BFI(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3714f5f4f..ed2cfac60 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -49,22 +49,6 @@ void TranslatorVisitor::BAR(u64) { ThrowNotImplemented(Opcode::BAR); } -void TranslatorVisitor::BFI_reg(u64) { - ThrowNotImplemented(Opcode::BFI_reg); -} - -void TranslatorVisitor::BFI_rc(u64) { - ThrowNotImplemented(Opcode::BFI_rc); -} - -void TranslatorVisitor::BFI_cr(u64) { - ThrowNotImplemented(Opcode::BFI_cr); -} - -void TranslatorVisitor::BFI_imm(u64) { - ThrowNotImplemented(Opcode::BFI_imm); -} - void TranslatorVisitor::BPT(u64) { ThrowNotImplemented(Opcode::BPT); } -- cgit v1.2.3 From 20390c0548d6eef2af67a363ee120a630267b741 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Feb 2021 23:33:53 -0500 Subject: shader: Implement IMNMX --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 4 ++ .../translate/impl/integer_minimum_maximum.cpp | 59 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 5 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 54fdf9559..04edcdfd8 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -816,6 +816,22 @@ U32 IREmitter::BitwiseNot(const U32& a) { return Inst(Opcode::BitwiseNot32, a); } +U32 IREmitter::SMin(const U32& a, const U32& b) { + return Inst(Opcode::SMin32, a, b); +} + +U32 IREmitter::UMin(const U32& a, const U32& b) { + return Inst(Opcode::UMin32, a, b); +} + +U32 IREmitter::SMax(const U32& a, const U32& b) { + return Inst(Opcode::SMax32, a, b); +} + +U32 IREmitter::UMax(const U32& a, const U32& b) { + return Inst(Opcode::UMax32, a, b); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 9dec22145..00ba2e4cd 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -163,6 +163,11 @@ public: [[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& a); + [[nodiscard]] U32 SMin(const U32& a, const U32& b); + [[nodiscard]] U32 UMin(const U32& a, const U32& b); + [[nodiscard]] U32 SMax(const U32& a, const U32& b); + [[nodiscard]] U32 UMax(const U32& a, const U32& b); + [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 59a13e911..2c4a997dc 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -235,6 +235,10 @@ OPCODE(BitReverse32, U32, U32, OPCODE(BitCount32, U32, U32, ) OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) OPCODE(IEqual, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..12c6aae3d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 2, u64> mode; + BitField<48, 1, u64> is_signed; + } const imnmx{insn}; + + if (imnmx.mode != 0) { + throw NotImplementedException("IMNMX.MODE"); + } + + IR::U1 pred = v.ir.GetPred(imnmx.pred); + const IR::U32 op_a{v.X(imnmx.src_reg)}; + IR::U32 min; + IR::U32 max; + + if (imnmx.is_signed != 0) { + min = IR::U32{v.ir.SMin(op_a, op_b)}; + max = IR::U32{v.ir.SMax(op_a, op_b)}; + } else { + min = IR::U32{v.ir.UMin(op_a, op_b)}; + max = IR::U32{v.ir.UMax(op_a, op_b)}; + } + if (imnmx.neg_pred != 0) { + std::swap(min, max); + } + + const IR::U32 result{v.ir.Select(pred, min, max)}; + v.X(imnmx.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IMNMX_reg(u64 insn) { + IMNMX(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::IMNMX_cbuf(u64 insn) { + IMNMX(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::IMNMX_imm(u64 insn) { + IMNMX(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ed2cfac60..615e3c3b5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -453,18 +453,6 @@ void TranslatorVisitor::IMADSP_imm(u64) { ThrowNotImplemented(Opcode::IMADSP_imm); } -void TranslatorVisitor::IMNMX_reg(u64) { - ThrowNotImplemented(Opcode::IMNMX_reg); -} - -void TranslatorVisitor::IMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::IMNMX_cbuf); -} - -void TranslatorVisitor::IMNMX_imm(u64) { - ThrowNotImplemented(Opcode::IMNMX_imm); -} - void TranslatorVisitor::IMUL_reg(u64) { ThrowNotImplemented(Opcode::IMUL_reg); } -- cgit v1.2.3 From bce0b1dcca4e83ab8bb6692e98d021ded5c0ad5f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 00:18:30 -0500 Subject: shader: Implement ICMP --- .../maxwell/translate/impl/integer_compare.cpp | 83 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 ----- 2 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..1f604b0ee --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ComparisonOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +[[nodiscard]] IR::U1 CompareToZero(TranslatorVisitor& v, const IR::U32& operand, + ComparisonOp compare_op, bool is_signed) { + const IR::U32 zero{v.ir.Imm32(0)}; + switch (compare_op) { + case ComparisonOp::False: + return v.ir.Imm1(false); + case ComparisonOp::LessThan: + return v.ir.ILessThan(operand, zero, is_signed); + case ComparisonOp::Equal: + return v.ir.IEqual(operand, zero); + case ComparisonOp::LessThanEqual: + return v.ir.ILessThanEqual(operand, zero, is_signed); + case ComparisonOp::GreaterThan: + return v.ir.IGreaterThan(operand, zero, is_signed); + case ComparisonOp::NotEqual: + return v.ir.INotEqual(operand, zero); + case ComparisonOp::GreaterThanEqual: + return v.ir.IGreaterThanEqual(operand, zero, is_signed); + case ComparisonOp::True: + return v.ir.Imm1(true); + default: + throw NotImplementedException("ICMP.CMP"); + } +} + +void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<48, 1, u64> is_signed; + BitField<49, 3, ComparisonOp> compare_op; + } const icmp{insn}; + + const IR::U32 zero{v.ir.Imm32(0)}; + const bool is_signed{icmp.is_signed != 0}; + const IR::U1 cmp_result{CompareToZero(v, operand, icmp.compare_op, is_signed)}; + + const IR::U32 src_reg{v.X(icmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(icmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ICMP_reg(u64 insn) { + ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_rc(u64 insn) { + ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); +} + +void TranslatorVisitor::ICMP_cr(u64 insn) { + ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::ICMP_imm(u64 insn) { + ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 615e3c3b5..8d4044ee8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -389,22 +389,6 @@ void TranslatorVisitor::IADD3_imm(u64) { ThrowNotImplemented(Opcode::IADD3_imm); } -void TranslatorVisitor::ICMP_reg(u64) { - ThrowNotImplemented(Opcode::ICMP_reg); -} - -void TranslatorVisitor::ICMP_rc(u64) { - ThrowNotImplemented(Opcode::ICMP_rc); -} - -void TranslatorVisitor::ICMP_cr(u64) { - ThrowNotImplemented(Opcode::ICMP_cr); -} - -void TranslatorVisitor::ICMP_imm(u64) { - ThrowNotImplemented(Opcode::ICMP_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From bec7d3111d3de2a7a8384b1e761bc3692afef9c7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 00:25:15 -0500 Subject: shader: Make IMNMX, SHR, SEL stylistically more consistent --- .../frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | 2 +- .../frontend/maxwell/translate/impl/integer_shift_right.cpp | 4 ++-- .../frontend/maxwell/translate/impl/select_source_with_predicate.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 12c6aae3d..5303db612 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -23,7 +23,7 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { throw NotImplementedException("IMNMX.MODE"); } - IR::U1 pred = v.ir.GetPred(imnmx.pred); + IR::U1 pred{v.ir.GetPred(imnmx.pred)}; const IR::U32 op_a{v.X(imnmx.src_reg)}; IR::U32 min; IR::U32 max; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp index a34ccb851..4025b1358 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -16,7 +16,7 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { BitField<39, 1, u64> is_wrapped; BitField<40, 1, u64> brev; BitField<43, 1, u64> xmode; - BitField<48, 1, u64> is_arithmetic; + BitField<48, 1, u64> is_signed; } const shr{insn}; if (shr.xmode != 0) { @@ -29,7 +29,7 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { } IR::U32 result; const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); - if (shr.is_arithmetic == 1) { + if (shr.is_signed == 1) { result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; } else { result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp index 25fc6b437..93baa75a9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp @@ -13,13 +13,13 @@ void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> op_a; + BitField<8, 8, IR::Reg> src_reg; BitField<39, 3, IR::Pred> pred; BitField<42, 1, u64> neg_pred; } const sel{insn}; const IR::U1 pred = v.ir.GetPred(sel.pred); - IR::U32 op_a{v.X(sel.op_a)}; + IR::U32 op_a{v.X(sel.src_reg)}; IR::U32 op_b{src}; if (sel.neg_pred != 0) { std::swap(op_a, op_b); -- cgit v1.2.3 From e0389286165805258fa2e54014c2dc506ffb9f35 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 01:30:09 -0500 Subject: shader: Implement ISET, add common_funcs --- .../maxwell/translate/impl/common_funcs.cpp | 46 ++++++++++++++++ .../frontend/maxwell/translate/impl/common_funcs.h | 17 ++++++ .../frontend/maxwell/translate/impl/impl.h | 19 +++++++ .../maxwell/translate/impl/integer_compare.cpp | 39 +------------- .../translate/impl/integer_compare_and_set.cpp | 62 ++++++++++++++++++++++ .../translate/impl/integer_minimum_maximum.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 7 files changed, 147 insertions(+), 50 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..3ec146b1a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, + const IR::U32& operand_2, ComparisonOp compare_op, + bool is_signed) { + switch (compare_op) { + case ComparisonOp::False: + return v.ir.Imm1(false); + case ComparisonOp::LessThan: + return v.ir.ILessThan(operand_1, operand_2, is_signed); + case ComparisonOp::Equal: + return v.ir.IEqual(operand_1, operand_2); + case ComparisonOp::LessThanEqual: + return v.ir.ILessThanEqual(operand_1, operand_2, is_signed); + case ComparisonOp::GreaterThan: + return v.ir.IGreaterThan(operand_1, operand_2, is_signed); + case ComparisonOp::NotEqual: + return v.ir.INotEqual(operand_1, operand_2); + case ComparisonOp::GreaterThanEqual: + return v.ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case ComparisonOp::True: + return v.ir.Imm1(true); + default: + throw NotImplementedException("CMP"); + } +} + +[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop) { + switch (bop) { + case BooleanOp::And: + return v.ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::Or: + return v.ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::Xor: + return v.ir.LogicalXor(predicate_1, predicate_2); + default: + throw NotImplementedException("BOP"); + } +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..293fcce2e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -0,0 +1,17 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, + const IR::U32& operand_2, ComparisonOp compare_op, + bool is_signed); + +[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, + const IR::U1& predicate_2, BooleanOp bop); +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 672e140b2..232f8c894 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -9,6 +11,23 @@ namespace Shader::Maxwell { +enum class ComparisonOp : u64 { + False, + LessThan, + Equal, + LessThanEqual, + GreaterThan, + NotEqual, + GreaterThanEqual, + True, +}; + +enum class BooleanOp : u64 { + And, + Or, + Xor, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp index 1f604b0ee..d844974d8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -4,46 +4,11 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { -enum class ComparisonOp : u64 { - False, - LessThan, - Equal, - LessThanEqual, - GreaterThan, - NotEqual, - GreaterThanEqual, - True, -}; - -[[nodiscard]] IR::U1 CompareToZero(TranslatorVisitor& v, const IR::U32& operand, - ComparisonOp compare_op, bool is_signed) { - const IR::U32 zero{v.ir.Imm32(0)}; - switch (compare_op) { - case ComparisonOp::False: - return v.ir.Imm1(false); - case ComparisonOp::LessThan: - return v.ir.ILessThan(operand, zero, is_signed); - case ComparisonOp::Equal: - return v.ir.IEqual(operand, zero); - case ComparisonOp::LessThanEqual: - return v.ir.ILessThanEqual(operand, zero, is_signed); - case ComparisonOp::GreaterThan: - return v.ir.IGreaterThan(operand, zero, is_signed); - case ComparisonOp::NotEqual: - return v.ir.INotEqual(operand, zero); - case ComparisonOp::GreaterThanEqual: - return v.ir.IGreaterThanEqual(operand, zero, is_signed); - case ComparisonOp::True: - return v.ir.Imm1(true); - default: - throw NotImplementedException("ICMP.CMP"); - } -} - void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { union { u64 insn; @@ -55,7 +20,7 @@ void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& o const IR::U32 zero{v.ir.Imm32(0)}; const bool is_signed{icmp.is_signed != 0}; - const IR::U1 cmp_result{CompareToZero(v, operand, icmp.compare_op, is_signed)}; + const IR::U1 cmp_result{IntegerCompare(v, operand, zero, icmp.compare_op, is_signed)}; const IR::U32 src_reg{v.X(icmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..b6a7b593d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> x; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> is_signed; + BitField<49, 3, ComparisonOp> compare_op; + } const iset{insn}; + + if (iset.x != 0) { + throw NotImplementedException("ISET.X"); + } + + const IR::U32 src_reg{v.X(iset.src_reg)}; + const bool is_signed{iset.is_signed != 0}; + IR::U1 pred{v.ir.GetPred(iset.pred)}; + if (iset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{IntegerCompare(v, src_reg, src_a, iset.compare_op, is_signed)}; + const IR::U1 bop_result{PredicateCombine(v, cmp_result, pred, iset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; + + const IR::U32 result{v.ir.Select(bop_result, pass_result, fail_result)}; + + v.X(iset.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::ISET_reg(u64 insn) { + ISET(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::ISET_cbuf(u64 insn) { + ISET(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::ISET_imm(u64 insn) { + ISET(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 5303db612..40f14ab8a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -23,7 +23,7 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { throw NotImplementedException("IMNMX.MODE"); } - IR::U1 pred{v.ir.GetPred(imnmx.pred)}; + const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; const IR::U32 op_a{v.X(imnmx.src_reg)}; IR::U32 min; IR::U32 max; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 8d4044ee8..f327e6fa5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -457,18 +457,6 @@ void TranslatorVisitor::ISBERD(u64) { ThrowNotImplemented(Opcode::ISBERD); } -void TranslatorVisitor::ISET_reg(u64) { - ThrowNotImplemented(Opcode::ISET_reg); -} - -void TranslatorVisitor::ISET_cbuf(u64) { - ThrowNotImplemented(Opcode::ISET_cbuf); -} - -void TranslatorVisitor::ISET_imm(u64) { - ThrowNotImplemented(Opcode::ISET_imm); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } -- cgit v1.2.3 From 103b9da4f7115ff47eee52d0dbd31b5b7a18b257 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 1 Mar 2021 15:58:16 -0500 Subject: shader: Implement FLO --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 +++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../maxwell/translate/impl/find_leading_one.cpp | 46 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ------ 5 files changed, 61 insertions(+), 15 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 04edcdfd8..0f1cab57a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -812,8 +812,16 @@ U32 IREmitter::BitCount(const U32& value) { return Inst(Opcode::BitCount32, value); } -U32 IREmitter::BitwiseNot(const U32& a) { - return Inst(Opcode::BitwiseNot32, a); +U32 IREmitter::BitwiseNot(const U32& value) { + return Inst(Opcode::BitwiseNot32, value); +} + +U32 IREmitter::FindSMsb(const U32& value) { + return Inst(Opcode::FindSMsb32, value); +} + +U32 IREmitter::FindUMsb(const U32& value) { + return Inst(Opcode::FindUMsb32, value); } U32 IREmitter::SMin(const U32& a, const U32& b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 00ba2e4cd..03a67985f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -161,8 +161,10 @@ public: bool is_signed); [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitCount(const U32& value); - [[nodiscard]] U32 BitwiseNot(const U32& a); + [[nodiscard]] U32 BitwiseNot(const U32& value); + [[nodiscard]] U32 FindSMsb(const U32& value); + [[nodiscard]] U32 FindUMsb(const U32& value); [[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b); [[nodiscard]] U32 SMax(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 2c4a997dc..aedbc5c3e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -235,6 +235,8 @@ OPCODE(BitReverse32, U32, U32, OPCODE(BitCount32, U32, U32, ) OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) OPCODE(SMin32, U32, U32, U32, ) OPCODE(UMin32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..d5361bec5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FLO(TranslatorVisitor& v, u64 insn, const IR::U32& src) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<40, 1, u64> tilde; + BitField<41, 1, u64> shift; + BitField<48, 1, u64> is_signed; + } const flo{insn}; + + const bool invert{flo.tilde != 0}; + const bool is_signed{flo.is_signed != 0}; + const bool shift_op{flo.shift != 0}; + + const IR::U32 operand{invert ? v.ir.BitwiseNot(src) : src}; + const IR::U32 find_result{is_signed ? v.ir.FindSMsb(operand) : v.ir.FindUMsb(operand)}; + const IR::U1 find_fail{v.ir.IEqual(find_result, v.ir.Imm32(-1))}; + const IR::U32 offset{v.ir.Imm32(31)}; + const IR::U32 success_result{shift_op ? IR::U32{v.ir.ISub(offset, find_result)} : find_result}; + + const IR::U32 result{v.ir.Select(find_fail, find_result, success_result)}; + v.X(flo.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FLO_reg(u64 insn) { + FLO(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::FLO_cbuf(u64 insn) { + FLO(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::FLO_imm(u64 insn) { + FLO(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index f327e6fa5..2da0b87c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -217,18 +217,6 @@ void TranslatorVisitor::FCMP_imm(u64) { ThrowNotImplemented(Opcode::FCMP_imm); } -void TranslatorVisitor::FLO_reg(u64) { - ThrowNotImplemented(Opcode::FLO_reg); -} - -void TranslatorVisitor::FLO_cbuf(u64) { - ThrowNotImplemented(Opcode::FLO_cbuf); -} - -void TranslatorVisitor::FLO_imm(u64) { - ThrowNotImplemented(Opcode::FLO_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } -- cgit v1.2.3 From ce9b116cfe4fcd96df889ed8997c93c6cd2a502c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 01:05:57 -0500 Subject: Implement PSET, refactor common comparison funcs --- .../maxwell/translate/impl/common_funcs.cpp | 54 +++++++++--------- .../frontend/maxwell/translate/impl/common_funcs.h | 7 +-- .../frontend/maxwell/translate/impl/impl.h | 8 +-- .../maxwell/translate/impl/integer_compare.cpp | 4 +- .../translate/impl/integer_compare_and_set.cpp | 6 +- .../translate/impl/integer_set_predicate.cpp | 64 +++------------------- .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../maxwell/translate/impl/predicate_set.cpp | 41 ++++++++++++++ 8 files changed, 87 insertions(+), 101 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 3ec146b1a..62f825a92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -5,42 +5,42 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, - const IR::U32& operand_2, ComparisonOp compare_op, +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed) { switch (compare_op) { - case ComparisonOp::False: - return v.ir.Imm1(false); - case ComparisonOp::LessThan: - return v.ir.ILessThan(operand_1, operand_2, is_signed); - case ComparisonOp::Equal: - return v.ir.IEqual(operand_1, operand_2); - case ComparisonOp::LessThanEqual: - return v.ir.ILessThanEqual(operand_1, operand_2, is_signed); - case ComparisonOp::GreaterThan: - return v.ir.IGreaterThan(operand_1, operand_2, is_signed); - case ComparisonOp::NotEqual: - return v.ir.INotEqual(operand_1, operand_2); - case ComparisonOp::GreaterThanEqual: - return v.ir.IGreaterThanEqual(operand_1, operand_2, is_signed); - case ComparisonOp::True: - return v.ir.Imm1(true); + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return ir.ILessThan(operand_1, operand_2, is_signed); + case CompareOp::Equal: + return ir.IEqual(operand_1, operand_2); + case CompareOp::LessThanEqual: + return ir.ILessThanEqual(operand_1, operand_2, is_signed); + case CompareOp::GreaterThan: + return ir.IGreaterThan(operand_1, operand_2, is_signed); + case CompareOp::NotEqual: + return ir.INotEqual(operand_1, operand_2); + case CompareOp::GreaterThanEqual: + return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); + case CompareOp::True: + return ir.Imm1(true); default: - throw NotImplementedException("CMP"); + throw NotImplementedException("Invalid compare op {}", compare_op); } } -[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { - case BooleanOp::And: - return v.ir.LogicalAnd(predicate_1, predicate_2); - case BooleanOp::Or: - return v.ir.LogicalOr(predicate_1, predicate_2); - case BooleanOp::Xor: - return v.ir.LogicalXor(predicate_1, predicate_2); + case BooleanOp::AND: + return ir.LogicalAnd(predicate_1, predicate_2); + case BooleanOp::OR: + return ir.LogicalOr(predicate_1, predicate_2); + case BooleanOp::XOR: + return ir.LogicalXor(predicate_1, predicate_2); default: - throw NotImplementedException("BOP"); + throw NotImplementedException("Invalid bop {}", bop); } } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index 293fcce2e..61e13fa18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -8,10 +8,9 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(TranslatorVisitor& v, const IR::U32& operand_1, - const IR::U32& operand_2, ComparisonOp compare_op, - bool is_signed); +[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, bool is_signed); -[[nodiscard]] IR::U1 PredicateCombine(TranslatorVisitor& v, const IR::U1& predicate_1, +[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 232f8c894..ad09ade7c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { -enum class ComparisonOp : u64 { +enum class CompareOp : u64 { False, LessThan, Equal, @@ -23,9 +23,9 @@ enum class ComparisonOp : u64 { }; enum class BooleanOp : u64 { - And, - Or, - Xor, + AND, + OR, + XOR, }; class TranslatorVisitor { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp index d844974d8..ba6e01926 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp @@ -15,12 +15,12 @@ void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& o BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; BitField<48, 1, u64> is_signed; - BitField<49, 3, ComparisonOp> compare_op; + BitField<49, 3, CompareOp> compare_op; } const icmp{insn}; const IR::U32 zero{v.ir.Imm32(0)}; const bool is_signed{icmp.is_signed != 0}; - const IR::U1 cmp_result{IntegerCompare(v, operand, zero, icmp.compare_op, is_signed)}; + const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; const IR::U32 src_reg{v.X(icmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index b6a7b593d..914af010f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -20,7 +20,7 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; - BitField<49, 3, ComparisonOp> compare_op; + BitField<49, 3, CompareOp> compare_op; } const iset{insn}; if (iset.x != 0) { @@ -33,8 +33,8 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (iset.neg_pred != 0) { pred = v.ir.LogicalNot(pred); } - const IR::U1 cmp_result{IntegerCompare(v, src_reg, src_a, iset.compare_op, is_signed)}; - const IR::U1 bop_result{PredicateCombine(v, cmp_result, pred, iset.bop)}; + const IR::U1 cmp_result{IntegerCompare(v.ir, src_reg, src_a, iset.compare_op, is_signed)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 1bc9ef363..7743701d0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -4,62 +4,11 @@ #include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { -enum class CompareOp : u64 { - F, // Always false - LT, // Less than - EQ, // Equal - LE, // Less than or equal - GT, // Greater than - NE, // Not equal - GE, // Greater than or equal - T, // Always true -}; - -enum class Bop : u64 { - AND, - OR, - XOR, -}; - -IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, - bool is_signed) { - switch (op) { - case CompareOp::F: - return ir.Imm1(false); - case CompareOp::LT: - return ir.ILessThan(lhs, rhs, is_signed); - case CompareOp::EQ: - return ir.IEqual(lhs, rhs); - case CompareOp::LE: - return ir.ILessThanEqual(lhs, rhs, is_signed); - case CompareOp::GT: - return ir.IGreaterThan(lhs, rhs, is_signed); - case CompareOp::NE: - return ir.INotEqual(lhs, rhs); - case CompareOp::GE: - return ir.IGreaterThanEqual(lhs, rhs, is_signed); - case CompareOp::T: - return ir.Imm1(true); - } - throw NotImplementedException("Invalid ISETP compare op {}", op); -} - -IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { - switch (bop) { - case Bop::AND: - return ir.LogicalAnd(comparison, bop_pred); - case Bop::OR: - return ir.LogicalOr(comparison, bop_pred); - case Bop::XOR: - return ir.LogicalXor(comparison, bop_pred); - } - throw NotImplementedException("Invalid ISETP bop {}", bop); -} - void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -68,17 +17,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; - BitField<45, 2, Bop> bop; + BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; - const Bop bop{isetp.bop}; + const BooleanOp bop{isetp.bop}; + const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; + const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; - const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; - const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; v.ir.SetPred(isetp.dest_pred_a, result_a); v.ir.SetPred(isetp.dest_pred_b, result_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2da0b87c4..291d7a4bc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -593,10 +593,6 @@ void TranslatorVisitor::PRMT_imm(u64) { ThrowNotImplemented(Opcode::PRMT_imm); } -void TranslatorVisitor::PSET(u64) { - ThrowNotImplemented(Opcode::PSET); -} - void TranslatorVisitor::PSETP(u64) { ThrowNotImplemented(Opcode::PSETP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp new file mode 100644 index 000000000..6c15963fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 false_result{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + + X(pset.dest_reg, result); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From c2155f04d4220b71432b046694983963036ab6e2 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 13:42:09 -0500 Subject: shader: Implement PSETP --- .../maxwell/translate/impl/not_implemented.cpp | 4 --- .../maxwell/translate/impl/predicate_set.cpp | 41 ---------------------- .../translate/impl/predicate_set_predicate.cpp | 38 ++++++++++++++++++++ .../translate/impl/predicate_set_register.cpp | 41 ++++++++++++++++++++++ 4 files changed, 79 insertions(+), 45 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 291d7a4bc..91a9858c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -593,10 +593,6 @@ void TranslatorVisitor::PRMT_imm(u64) { ThrowNotImplemented(Opcode::PRMT_imm); } -void TranslatorVisitor::PSETP(u64) { - ThrowNotImplemented(Opcode::PSETP); -} - void TranslatorVisitor::R2B(u64) { ThrowNotImplemented(Opcode::R2B); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp deleted file mode 100644 index 6c15963fa..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -void TranslatorVisitor::PSET(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<12, 3, IR::Pred> pred_a; - BitField<15, 1, u64> neg_pred_a; - BitField<24, 2, BooleanOp> bop_1; - BitField<29, 3, IR::Pred> pred_b; - BitField<32, 1, u64> neg_pred_b; - BitField<39, 3, IR::Pred> pred_c; - BitField<42, 1, u64> neg_pred_c; - BitField<44, 1, u64> bf; - BitField<45, 2, BooleanOp> bop_2; - } const pset{insn}; - - const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; - const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; - const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; - - const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; - const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; - - const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; - const IR::U32 false_result{ir.Imm32(0)}; - - const IR::U32 result{ir.Select(res_2, true_result, false_result)}; - - X(pset.dest_reg, result); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp @@ -0,0 +1,38 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; + const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; + const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; + + ir.SetPred(pset.dest_pred_a, result_a); + ir.SetPred(pset.dest_pred_b, result_b); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..6c15963fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::PSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<12, 3, IR::Pred> pred_a; + BitField<15, 1, u64> neg_pred_a; + BitField<24, 2, BooleanOp> bop_1; + BitField<29, 3, IR::Pred> pred_b; + BitField<32, 1, u64> neg_pred_b; + BitField<39, 3, IR::Pred> pred_c; + BitField<42, 1, u64> neg_pred_c; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop_2; + } const pset{insn}; + + const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; + const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; + const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; + + const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; + const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; + + const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; + const IR::U32 false_result{ir.Imm32(0)}; + + const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + + X(pset.dest_reg, result); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 382cba94ed52f4fae7db437a3056563ba2110e8b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 2 Mar 2021 14:59:28 -0500 Subject: shader: Implement IADD3 --- .../translate/impl/integer_add_three_input.cpp | 103 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 2 files changed, 103 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..c2dbd7998 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -0,0 +1,103 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Shift : u64 { + None, + Right, + Left, +}; +enum class Half : u64 { + All, + Lower, + Upper, +}; + +[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { + constexpr bool is_signed{false}; + switch (half) { + case Half::Lower: + return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); + case Half::Upper: + return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); + default: + return value; + } +} + +[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { + switch (shift) { + case Shift::Right: + return ir.ShiftRightLogical(value, ir.Imm32(16)); + case Shift::Left: + return ir.ShiftLeftLogical(value, ir.Imm32(16)); + default: + return value; + } +} + +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + BitField<31, 2, Half> half_c; + BitField<33, 2, Half> half_b; + BitField<35, 2, Half> half_a; + BitField<37, 2, Shift> shift; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> x; + BitField<49, 1, u64> neg_c; + BitField<50, 1, u64> neg_b; + BitField<51, 1, u64> neg_a; + } iadd3{insn}; + + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X"); + } + if (iadd3.cc != 0) { + throw NotImplementedException("IADD3 CC"); + } + + IR::U32 op_a{v.X(iadd3.src_a)}; + op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); + op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); + op_c = IntegerHalf(v.ir, op_c, iadd3.half_c); + + if (iadd3.neg_a != 0) { + op_a = v.ir.INeg(op_a); + } + if (iadd3.neg_b != 0) { + op_b = v.ir.INeg(op_b); + } + if (iadd3.neg_c != 0) { + op_c = v.ir.INeg(op_c); + } + + IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; + lhs = IntegerShift(v.ir, lhs, iadd3.shift); + const IR::U32 result{v.ir.IAdd(lhs, op_c)}; + + v.X(iadd3.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::IADD3_reg(u64 insn) { + IADD3(*this, insn, GetReg20(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_cbuf(u64 insn) { + IADD3(*this, insn, GetCbuf(insn), GetReg39(insn)); +} + +void TranslatorVisitor::IADD3_imm(u64 insn) { + IADD3(*this, insn, GetImm20(insn), GetReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 91a9858c6..c93304a67 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -365,18 +365,6 @@ void TranslatorVisitor::I2I_imm(u64) { ThrowNotImplemented(Opcode::I2I_imm); } -void TranslatorVisitor::IADD3_reg(u64) { - ThrowNotImplemented(Opcode::IADD3_reg); -} - -void TranslatorVisitor::IADD3_cbuf(u64) { - ThrowNotImplemented(Opcode::IADD3_cbuf); -} - -void TranslatorVisitor::IADD3_imm(u64) { - ThrowNotImplemented(Opcode::IADD3_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From 980cafdc27444484a2a2794be5de92ea18de6e27 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 3 Mar 2021 00:41:05 -0500 Subject: shader: Implement LOP and LOP3 --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/common_funcs.cpp | 25 ++++- .../frontend/maxwell/translate/impl/common_funcs.h | 2 + .../frontend/maxwell/translate/impl/impl.h | 7 ++ .../maxwell/translate/impl/logic_operation.cpp | 77 ++++++++++++++ .../translate/impl/logic_operation_three_input.cpp | 117 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 24 ----- 7 files changed, 225 insertions(+), 31 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1515285bf..5d0b91598 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -178,8 +178,8 @@ INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") -INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----") -INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----") +INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") +INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") INST(LOP32I, "LOP32I", "0000 01-- ---- ----") INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 62f825a92..9d4ac2e36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -5,9 +5,8 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" namespace Shader::Maxwell { -[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, - const IR::U32& operand_2, CompareOp compare_op, - bool is_signed) { +IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { switch (compare_op) { case CompareOp::False: return ir.Imm1(false); @@ -30,8 +29,8 @@ namespace Shader::Maxwell { } } -[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, - const IR::U1& predicate_2, BooleanOp bop) { +IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, + BooleanOp bop) { switch (bop) { case BooleanOp::AND: return ir.LogicalAnd(predicate_1, predicate_2); @@ -43,4 +42,20 @@ namespace Shader::Maxwell { throw NotImplementedException("Invalid bop {}", bop); } } + +IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { + switch (op) { + case PredicateOp::False: + return ir.Imm1(false); + case PredicateOp::True: + return ir.Imm1(true); + case PredicateOp::Zero: + return ir.IEqual(result, ir.Imm32(0)); + case PredicateOp::NonZero: + return ir.INotEqual(result, ir.Imm32(0)); + default: + throw NotImplementedException("Invalid Predicate operation {}", op); + } +} + } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index 61e13fa18..c9ae5c500 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -13,4 +13,6 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); + +[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ad09ade7c..c6253c40c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -28,6 +28,13 @@ enum class BooleanOp : u64 { XOR, }; +enum class PredicateOp : u64 { + False, + True, + Zero, + NonZero, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..e786a388e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -0,0 +1,77 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class LogicalOp : u64 { + AND, + OR, + XOR, + PASS_B, +}; + +[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, LogicalOp op) { + switch (op) { + case LogicalOp::AND: + return ir.BitwiseAnd(operand_1, operand_2); + case LogicalOp::OR: + return ir.BitwiseOr(operand_1, operand_2); + case LogicalOp::XOR: + return ir.BitwiseXor(operand_1, operand_2); + case LogicalOp::PASS_B: + return operand_2; + default: + throw NotImplementedException("Invalid Logical operation {}", op); + } +} + +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<39, 1, u64> neg_a; + BitField<40, 1, u64> neg_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop{insn}; + + if (lop.x != 0) { + throw NotImplementedException("LOP X"); + } + IR::U32 op_a{v.X(lop.src_reg)}; + if (lop.neg_a != 0) { + op_a = v.ir.BitwiseNot(op_a); + } + if (lop.neg_b != 0) { + op_b = v.ir.BitwiseNot(op_b); + } + + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; + const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; + v.X(lop.dest_reg, result); + v.ir.SetPred(lop.pred, pred_result); +} +} // Anonymous namespace + +void TranslatorVisitor::LOP_reg(u64 insn) { + LOP(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LOP_cbuf(u64 insn) { + LOP(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LOP_imm(u64 insn) { + LOP(*this, insn, GetImm20(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..256c47504 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -0,0 +1,117 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 +// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) +IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, + u64 ttbl) { + IR::U32 r{ir.Imm32(0)}; + const IR::U32 not_a{ir.BitwiseNot(a)}; + const IR::U32 not_b{ir.BitwiseNot(b)}; + const IR::U32 not_c{ir.BitwiseNot(c)}; + if (ttbl & 0x01) { + // r |= ~a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x02) { + // r |= ~a & ~b & c; + const auto lhs{ir.BitwiseAnd(not_a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x04) { + // r |= ~a & b & ~c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x08) { + // r |= ~a & b & c; + const auto lhs{ir.BitwiseAnd(not_a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x10) { + // r |= a & ~b & ~c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x20) { + // r |= a & ~b & c; + const auto lhs{ir.BitwiseAnd(a, not_b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x40) { + // r |= a & b & ~c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, not_c)}; + r = ir.BitwiseOr(r, rhs); + } + if (ttbl & 0x80) { + // r |= a & b & c; + const auto lhs{ir.BitwiseAnd(a, b)}; + const auto rhs{ir.BitwiseAnd(lhs, c)}; + r = ir.BitwiseOr(r, rhs); + } + return r; +} + +IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + } const lop3{insn}; + + const IR::U32 op_a{v.X(lop3.src_reg)}; + const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; + v.X(lop3.dest_reg, result); + return result; +} + +u64 GetLut48(u64 insn) { + union { + u64 raw; + BitField<48, 8, u64> lut; + } const lut{insn}; + return lut.lut; +} +} // Anonymous namespace + +void TranslatorVisitor::LOP3_reg(u64 insn) { + union { + u64 insn; + BitField<28, 8, u64> lut; + BitField<38, 1, u64> x; + BitField<36, 2, PredicateOp> pred_op; + BitField<48, 3, IR::Pred> pred; + } const lop3{insn}; + + if (lop3.x != 0) { + throw NotImplementedException("LOP3 X"); + } + const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; + const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; + ir.SetPred(lop3.pred, pred_result); +} + +void TranslatorVisitor::LOP3_cbuf(u64 insn) { + LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); +} + +void TranslatorVisitor::LOP3_imm(u64 insn) { + LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c93304a67..a0535f1c2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -493,30 +493,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::LOP_reg(u64) { - ThrowNotImplemented(Opcode::LOP_reg); -} - -void TranslatorVisitor::LOP_cbuf(u64) { - ThrowNotImplemented(Opcode::LOP_cbuf); -} - -void TranslatorVisitor::LOP_imm(u64) { - ThrowNotImplemented(Opcode::LOP_imm); -} - -void TranslatorVisitor::LOP3_reg(u64) { - ThrowNotImplemented(Opcode::LOP3_reg); -} - -void TranslatorVisitor::LOP3_cbuf(u64) { - ThrowNotImplemented(Opcode::LOP3_cbuf); -} - -void TranslatorVisitor::LOP3_imm(u64) { - ThrowNotImplemented(Opcode::LOP3_imm); -} - void TranslatorVisitor::LOP32I(u64) { ThrowNotImplemented(Opcode::LOP32I); } -- cgit v1.2.3 From 4006929c986a2e0e52429fe21201a7ad5ca3fea9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Mar 2021 03:07:19 -0300 Subject: shader: Implement HADD2 --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 90 +++++++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 17 +- .../translate/impl/half_floating_point_add.cpp | 184 +++++++++++++++++++++ .../maxwell/translate/impl/load_store_memory.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 16 -- 6 files changed, 290 insertions(+), 23 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0f1cab57a..186920d8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu } Value IREmitter::CompositeExtract(const Value& vector, size_t element) { - const auto read = [&](Opcode opcode, size_t limit) -> Value { + const auto read{[&](Opcode opcode, size_t limit) -> Value { if (element >= limit) { throw InvalidArgument("Out of bounds element {}", element); } return Inst(opcode, vector, Value{static_cast(element)}); - }; + }}; switch (vector.Type()) { case Type::U32x2: return read(Opcode::CompositeExtractU32x2, 2); @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { } } +Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { + const auto insert{[&](Opcode opcode, size_t limit) { + if (element >= limit) { + throw InvalidArgument("Out of bounds element {}", element); + } + return Inst(opcode, vector, object, Value{static_cast(element)}); + }}; + switch (vector.Type()) { + case Type::U32x2: + return insert(Opcode::CompositeInsertU32x2, 2); + case Type::U32x3: + return insert(Opcode::CompositeInsertU32x3, 3); + case Type::U32x4: + return insert(Opcode::CompositeInsertU32x4, 4); + case Type::F16x2: + return insert(Opcode::CompositeInsertF16x2, 2); + case Type::F16x3: + return insert(Opcode::CompositeInsertF16x3, 3); + case Type::F16x4: + return insert(Opcode::CompositeInsertF16x4, 4); + case Type::F32x2: + return insert(Opcode::CompositeInsertF32x2, 2); + case Type::F32x3: + return insert(Opcode::CompositeInsertF32x3, 3); + case Type::F32x4: + return insert(Opcode::CompositeInsertF32x4, 4); + case Type::F64x2: + return insert(Opcode::CompositeInsertF64x2, 2); + case Type::F64x3: + return insert(Opcode::CompositeInsertF64x3, 3); + case Type::F64x4: + return insert(Opcode::CompositeInsertF64x4, 4); + default: + ThrowInvalidType(vector.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { } Value IREmitter::UnpackFloat2x16(const U32& value) { - return Inst(Opcode::UnpackFloat2x16, value); + return Inst(Opcode::UnpackFloat2x16, value); } F64 IREmitter::PackDouble2x32(const Value& vector) { @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { +U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: switch (value.Type()) { @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { + switch (result_bitsize) { + case 16: + switch (value.Type()) { + case Type::F16: + // Nothing to do + return value; + case Type::F32: + return Inst(Opcode::ConvertF16F32, value); + case Type::F64: + throw LogicError("Illegal conversion from F64 to F16"); + default: + break; + } + break; + case 32: + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::ConvertF32F16, value); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst(Opcode::ConvertF32F64, value); + default: + break; + } + break; + case 64: + switch (value.Type()) { + case Type::F16: + throw LogicError("Illegal conversion from F16 to F64"); + case Type::F32: + // Nothing to do + return value; + case Type::F64: + return Inst(Opcode::ConvertF32F64, value); + default: + break; + } + break; + } + throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 03a67985f..5beb99895 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -97,6 +97,7 @@ public: [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, const Value& e4); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); + [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); @@ -186,7 +187,8 @@ public: [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); + [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aedbc5c3e..acfc0a829 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32, OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations OPCODE(SelectU8, U8, U1, U8, U8, ) @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64, OPCODE(ConvertU64F16, U64, F16, ) OPCODE(ConvertU64F32, U64, F32, ) OPCODE(ConvertU64F64, U64, F64, ) - OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -0,0 +1,184 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hadd2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<32, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, Swizzle> swizzle_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + GetReg20(insn)); +} + +void TranslatorVisitor::HADD2_cbuf(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> abs_b; + } const hadd2{insn}; + + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + GetCbuf(insn)); +} + +void TranslatorVisitor::HADD2_imm(u64 insn) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, + hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HADD2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_a; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hadd2{insn}; + + const u32 imm{static_cast(hadd2.imm32)}; + HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, + hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); +} +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { const IR::U64 address{[&]() -> IR::U64 { if (mem.e == 0) { // LDG/STG without .E uses a 32-bit pointer, zero-extend it - return v.ir.ConvertU(64, v.X(mem.addr_reg)); + return v.ir.UConvert(64, v.X(mem.addr_reg)); } if (!IR::IsAligned(mem.addr_reg, 2)) { throw NotImplementedException("Unaligned address register"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HADD2_reg(u64) { - ThrowNotImplemented(Opcode::HADD2_reg); -} - -void TranslatorVisitor::HADD2_cbuf(u64) { - ThrowNotImplemented(Opcode::HADD2_cbuf); -} - -void TranslatorVisitor::HADD2_imm(u64) { - ThrowNotImplemented(Opcode::HADD2_imm); -} - -void TranslatorVisitor::HADD2_32I(u64) { - ThrowNotImplemented(Opcode::HADD2_32I); -} - void TranslatorVisitor::HFMA2_reg(u64) { ThrowNotImplemented(Opcode::HFMA2_reg); } -- cgit v1.2.3 From 81f72471e831a0bc4205df6df61e5b510a5c25ac Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 4 Mar 2021 01:02:44 -0500 Subject: shader: Implement I2I --- .../impl/integer_to_integer_conversion.cpp | 99 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 2 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..ca28c6dd9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class IntegerWidth : u64 { + Byte, + Short, + Word, +}; + +[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { + switch (width) { + case IntegerWidth::Byte: + return ir.Imm32(8); + case IntegerWidth::Short: + return ir.Imm32(16); + case IntegerWidth::Word: + return ir.Imm32(32); + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, + IntegerWidth dst_width) { + const IR::U32 zero{ir.Imm32(0)}; + switch (dst_width) { + case IntegerWidth::Byte: + return ir.BitFieldExtract(src, zero, ir.Imm32(8), false); + case IntegerWidth::Short: + return ir.BitFieldExtract(src, zero, ir.Imm32(16), false); + case IntegerWidth::Word: + return ir.BitFieldExtract(src, zero, ir.Imm32(32), false); + default: + throw NotImplementedException("Invalid width {}", dst_width); + } +} + +void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, IntegerWidth> dst_fmt; + BitField<12, 1, u64> dst_fmt_sign; + BitField<10, 2, IntegerWidth> src_fmt; + BitField<13, 1, u64> src_fmt_sign; + BitField<41, 3, u64> selector; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + BitField<50, 1, u64> sat; + } const i2i{insn}; + + if (i2i.sat != 0) { + throw NotImplementedException("I2I SAT"); + } + if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { + throw NotImplementedException("16-bit source format incompatible with selector {}", + i2i.selector); + } + if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { + throw NotImplementedException("32-bit source format incompatible with selector {}", + i2i.selector); + } + + const s32 selector{static_cast(i2i.selector)}; + const IR::U32 offset{v.ir.Imm32(selector * 8)}; + const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, i2i.src_fmt_sign != 0)}; + if (i2i.abs) { + src_values = v.ir.IAbs(src_values); + } + if (i2i.neg) { + src_values = v.ir.INeg(src_values); + } + + const IR::U32 result{ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; + v.X(i2i.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::I2I_reg(u64 insn) { + I2I(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::I2I_cbuf(u64 insn) { + I2I(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::I2I_imm(u64 insn) { + I2I(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c24f29ff7..bd7a7a8b7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -337,18 +337,6 @@ void TranslatorVisitor::I2F_imm(u64) { ThrowNotImplemented(Opcode::I2F_imm); } -void TranslatorVisitor::I2I_reg(u64) { - ThrowNotImplemented(Opcode::I2I_reg); -} - -void TranslatorVisitor::I2I_cbuf(u64) { - ThrowNotImplemented(Opcode::I2I_cbuf); -} - -void TranslatorVisitor::I2I_imm(u64) { - ThrowNotImplemented(Opcode::I2I_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From d1edc16ba87f3247ad220042050bfea2999067ff Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 4 Mar 2021 20:12:44 -0300 Subject: shader: Deduplicate HADD2 code --- .../translate/impl/half_floating_point_add.cpp | 35 ++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 6965adfb3..c292d5e87 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -107,54 +107,52 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool } v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); } -} // Anonymous namespace -void TranslatorVisitor::HADD2_reg(u64 insn) { +void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, + const IR::U32& src_b) { union { u64 raw; BitField<49, 2, Merge> merge; BitField<39, 1, u64> ftz; - BitField<32, 1, u64> sat; BitField<43, 1, u64> neg_a; BitField<44, 1, u64> abs_a; BitField<47, 2, Swizzle> swizzle_a; + } const hadd2{insn}; + + HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, + hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); +} +} // Anonymous namespace + +void TranslatorVisitor::HADD2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; BitField<31, 1, u64> neg_b; BitField<30, 1, u64> abs_b; BitField<28, 2, Swizzle> swizzle_b; } const hadd2{insn}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, GetReg20(insn)); } void TranslatorVisitor::HADD2_cbuf(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<52, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<56, 1, u64> neg_b; BitField<54, 1, u64> abs_b; } const hadd2{insn}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, + HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, GetCbuf(insn)); } void TranslatorVisitor::HADD2_imm(u64 insn) { union { u64 raw; - BitField<49, 2, Merge> merge; - BitField<39, 1, u64> ftz; BitField<52, 1, u64> sat; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, Swizzle> swizzle_a; BitField<56, 1, u64> neg_high; BitField<30, 9, u64> high; BitField<29, 1, u64> neg_low; @@ -163,8 +161,7 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; - HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, - hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); + HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } void TranslatorVisitor::HADD2_32I(u64 insn) { -- cgit v1.2.3 From 5465cb156107a27df525dfedbfd4e920b7f71253 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 5 Mar 2021 01:15:16 -0500 Subject: shader: Implement LEA --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 22 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/load_effective_address.cpp | 100 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 20 ----- 5 files changed, 122 insertions(+), 26 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 186920d8f..01f52183c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -798,8 +798,15 @@ U32 IREmitter::IMul(const U32& a, const U32& b) { return Inst(Opcode::IMul32, a, b); } -U32 IREmitter::INeg(const U32& value) { - return Inst(Opcode::INeg32, value); +U32U64 IREmitter::INeg(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::INeg32, value); + case Type::U64: + return Inst(Opcode::INeg64, value); + default: + ThrowInvalidType(value.Type()); + } } U32 IREmitter::IAbs(const U32& value) { @@ -810,8 +817,15 @@ U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { return Inst(Opcode::ShiftLeftLogical32, base, shift); } -U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { - return Inst(Opcode::ShiftRightLogical32, base, shift); +U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftRightLogical32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftRightLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } } U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5beb99895..33bf2a7d0 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -148,10 +148,10 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); - [[nodiscard]] U32 INeg(const U32& value); + [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); - [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index acfc0a829..b51aaaef5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -233,9 +233,11 @@ OPCODE(ISub32, U32, U32, OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) OPCODE(BitwiseAnd32, U32, U32, U32, ) OPCODE(BitwiseOr32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..784588e83 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, + bool neg, bool x) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + + if (x) { + throw NotImplementedException("LEA.HI X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; + const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; + + const s32 hi_scale{32 - static_cast(scale)}; + const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; + const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; + v.X(lea.dest_reg, result); +} + +void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<39, 5, u64> scale; + BitField<45, 1, u64> neg; + BitField<46, 1, u64> x; + BitField<48, 3, IR::Pred> pred; + } const lea{insn}; + if (lea.x != 0) { + throw NotImplementedException("LEA.LO X"); + } + if (lea.pred != IR::Pred::PT) { + throw NotImplementedException("LEA.LO Pred"); + } + + const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; + const s32 scale{static_cast(lea.scale)}; + const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; + const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; + + IR::U32 result{v.ir.IAdd(base, scaled_offset)}; + v.X(lea.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::LEA_hi_reg(u64 insn) { + union { + u64 insn; + BitField<28, 5, u64> scale; + BitField<37, 1, u64> neg; + BitField<38, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { + union { + u64 insn; + BitField<51, 5, u64> scale; + BitField<56, 1, u64> neg; + BitField<57, 1, u64> x; + } const lea{insn}; + + LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); +} + +void TranslatorVisitor::LEA_lo_reg(u64 insn) { + LEA_lo(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { + LEA_lo(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::LEA_lo_imm(u64 insn) { + LEA_lo(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index bd7a7a8b7..62863aff6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -437,26 +437,6 @@ void TranslatorVisitor::LDS(u64) { ThrowNotImplemented(Opcode::LDS); } -void TranslatorVisitor::LEA_hi_reg(u64) { - ThrowNotImplemented(Opcode::LEA_hi_reg); -} - -void TranslatorVisitor::LEA_hi_cbuf(u64) { - ThrowNotImplemented(Opcode::LEA_hi_cbuf); -} - -void TranslatorVisitor::LEA_lo_reg(u64) { - ThrowNotImplemented(Opcode::LEA_lo_reg); -} - -void TranslatorVisitor::LEA_lo_cbuf(u64) { - ThrowNotImplemented(Opcode::LEA_lo_cbuf); -} - -void TranslatorVisitor::LEA_lo_imm(u64) { - ThrowNotImplemented(Opcode::LEA_lo_imm); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } -- cgit v1.2.3 From 924f0a9149b6777782347be3d2c833a5f8e90058 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 7 Mar 2021 14:48:03 -0500 Subject: shader: Implement SHF --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 22 +++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/integer_funnel_shift.cpp | 77 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 ----- 5 files changed, 99 insertions(+), 22 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 01f52183c..1659b7f3b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -813,8 +813,15 @@ U32 IREmitter::IAbs(const U32& value) { return Inst(Opcode::IAbs32, value); } -U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { - return Inst(Opcode::ShiftLeftLogical32, base, shift); +U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftLeftLogical32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftLeftLogical64, base, shift); + default: + ThrowInvalidType(base.Type()); + } } U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { @@ -828,8 +835,15 @@ U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { } } -U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { - return Inst(Opcode::ShiftRightArithmetic32, base, shift); +U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { + switch (base.Type()) { + case Type::U32: + return Inst(Opcode::ShiftRightArithmetic32, base, shift); + case Type::U64: + return Inst(Opcode::ShiftRightArithmetic64, base, shift); + default: + ThrowInvalidType(base.Type()); + } } U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 33bf2a7d0..6e29bf0e2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -150,9 +150,9 @@ public: [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); - [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); + [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); - [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); + [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b51aaaef5..75f09ebfc 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -236,9 +236,11 @@ OPCODE(INeg32, U32, U32, OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) OPCODE(ShiftRightLogical64, U64, U64, U32, ) OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) OPCODE(BitwiseAnd32, U32, U32, U32, ) OPCODE(BitwiseOr32, U32, U32, U32, ) OPCODE(BitwiseXor32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..d8d6c939e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -0,0 +1,77 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class MaxShift : u64 { + U32, + Undefined, + U64, + S64, +}; + +IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, + bool right_shift, bool is_signed) { + if (!right_shift) { + return ir.ShiftLeftLogical(packed_int, safe_shift); + } + if (is_signed) { + return ir.ShiftRightArithmetic(packed_int, safe_shift); + } + return ir.ShiftRightLogical(packed_int, safe_shift); +} + +void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, + bool right_shift) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<0, 8, IR::Reg> lo_bits_reg; + BitField<37, 2, MaxShift> max_shift; + BitField<48, 2, u64> x_mode; + BitField<50, 1, u64> wrap; + } const shf{insn}; + if (shf.x_mode != 0) { + throw NotImplementedException("SHF X Mode"); + } + if (shf.max_shift == MaxShift::Undefined) { + throw NotImplementedException("SHF Use of undefined MaxShift value"); + } + const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; + const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; + const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; + const IR::U32 safe_shift{shf.wrap != 0 + ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) + : v.ir.UMin(shift, max_shift)}; + + const bool is_signed{shf.max_shift == MaxShift::S64}; + const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; + const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; + + const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; + v.X(shf.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHF_l_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_l_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); +} + +void TranslatorVisitor::SHF_r_reg(u64 insn) { + SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); +} + +void TranslatorVisitor::SHF_r_imm(u64 insn) { + SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 62863aff6..2ab90d1bf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,22 +553,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { ThrowNotImplemented(Opcode::SETLMEMBASE); } -void TranslatorVisitor::SHF_l_reg(u64) { - ThrowNotImplemented(Opcode::SHF_l_reg); -} - -void TranslatorVisitor::SHF_l_imm(u64) { - ThrowNotImplemented(Opcode::SHF_l_imm); -} - -void TranslatorVisitor::SHF_r_reg(u64) { - ThrowNotImplemented(Opcode::SHF_r_reg); -} - -void TranslatorVisitor::SHF_r_imm(u64) { - ThrowNotImplemented(Opcode::SHF_r_imm); -} - void TranslatorVisitor::SHFL(u64) { ThrowNotImplemented(Opcode::SHFL); } -- cgit v1.2.3 From 7d6ba5b9840a4ba00a9b0f207c1c119d60dcf8b7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 7 Mar 2021 22:01:22 -0500 Subject: shader: Implement R2P --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../translate/impl/move_register_to_predicate.cpp | 71 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 4 files changed, 74 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1659b7f3b..f38b46bac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -412,6 +412,8 @@ Value IREmitter::Select(const U1& condition, const Value& true_value, const Valu throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); } switch (true_value.Type()) { + case Type::U1: + return Inst(Opcode::SelectU1, condition, true_value, false_value); case Type::U8: return Inst(Opcode::SelectU8, condition, true_value, false_value); case Type::U16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 75f09ebfc..c4e72c84d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -115,6 +115,7 @@ OPCODE(CompositeInsertF64x3, F64x3, F64x OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations +OPCODE(SelectU1, U1, U1, U1, U1, ) OPCODE(SelectU8, U8, U1, U8, U8, ) OPCODE(SelectU16, U16, U1, U16, U16, ) OPCODE(SelectU32, U32, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp @@ -0,0 +1,71 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; + +void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { + switch (index) { + case 0: + return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); + case 1: + return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); + case 2: + return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); + case 3: + return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); + default: + throw LogicError("Unreachable R2P index"); + } +} + +void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { + union { + u64 raw; + BitField<8, 8, IR::Reg> src_reg; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const r2p{insn}; + const IR::U32 src{v.X(r2p.src_reg)}; + const IR::U32 count{v.ir.Imm32(1)}; + const bool pr_mode{r2p.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset_base{static_cast(r2p.byte_selector) * 8}; + for (u32 index = 0; index < num_items; ++index) { + const IR::U32 offset{v.ir.Imm32(offset_base + index)}; + const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; + const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; + const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; + const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; + if (pr_mode) { + const IR::Pred pred{index}; + v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); + } else { + SetFlag(v.ir, inv_mask_bit, src_bit, index); + } + } +} +} // Anonymous namespace + +void TranslatorVisitor::R2P_reg(u64 insn) { + R2P(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::R2P_cbuf(u64 insn) { + R2P(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::R2P_imm(u64 insn) { + R2P(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2ab90d1bf..fc6030e04 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -513,18 +513,6 @@ void TranslatorVisitor::R2B(u64) { ThrowNotImplemented(Opcode::R2B); } -void TranslatorVisitor::R2P_reg(u64) { - ThrowNotImplemented(Opcode::R2P_reg); -} - -void TranslatorVisitor::R2P_cbuf(u64) { - ThrowNotImplemented(Opcode::R2P_cbuf); -} - -void TranslatorVisitor::R2P_imm(u64) { - ThrowNotImplemented(Opcode::R2P_imm); -} - void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 133 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 21 +- .../frontend/ir/microinstruction.cpp | 73 ++- .../frontend/ir/microinstruction.h | 22 +- src/shader_recompiler/frontend/ir/modifiers.h | 10 + src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 569 +++++++++++---------- src/shader_recompiler/frontend/ir/reg.h | 11 + src/shader_recompiler/frontend/ir/value.h | 1 + src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_sample.cpp | 232 +++++++++ 13 files changed, 772 insertions(+), 315 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f38b46bac..ae3354c66 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -7,11 +7,24 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { - -[[noreturn]] static void ThrowInvalidType(Type type) { +namespace { +[[noreturn]] void ThrowInvalidType(Type type) { throw InvalidArgument("Invalid type {}", type); } +Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { + if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { + return ir.CompositeConstruct(bias_lod, lod_clamp); + } else if (!bias_lod.IsEmpty()) { + return bias_lod; + } else if (!lod_clamp.IsEmpty()) { + return lod_clamp; + } else { + return Value{}; + } +} +} // Anonymous namespace + U1 IREmitter::Imm1(bool value) const { return U1{Value{value}}; } @@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } +U1 IREmitter::GetSparseFromOp(const Value& op) { + return Inst(Opcode::GetSparseFromOp, op); +} + F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); @@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } +F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16S32, value); + case Type::U64: + return Inst(Opcode::ConvertF16S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32S32, value); + case Type::U64: + return Inst(Opcode::ConvertF32S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64S32, value); + case Type::U64: + return Inst(Opcode::ConvertF64S64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16U32, value); + case Type::U64: + return Inst(Opcode::ConvertF16U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32U32, value); + case Type::U64: + return Inst(Opcode::ConvertF32U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64U32, value); + case Type::U64: + return Inst(Opcode::ConvertF64U64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { + if (is_signed) { + return ConvertSToF(bitsize, value); + } else { + return ConvertUToF(bitsize, value); + } +} + U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: @@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod + : Opcode::BindlessImageSampleImplicitLod}; + return Inst(op, Flags{info}, handle, coords, bias_lc, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod + : Opcode::BindlessImageSampleExplicitLod}; + return Inst(op, Flags{info}, handle, coords, lod_lc, offset); +} + +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod + : Opcode::BindlessImageSampleDrefImplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, bias_lc, offset); +} + +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& lod, const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod + : Opcode::BindlessImageSampleDrefExplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e29bf0e2..cb2a7710a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -91,6 +91,7 @@ public: [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + [[nodiscard]] U1 GetSparseFromOp(const Value& op); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); @@ -159,7 +160,7 @@ public: [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, const U32& count); [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, - bool is_signed); + bool is_signed = false); [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& value); @@ -186,10 +187,28 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, + const F32& lod, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index d6a9be87d..88e186f21 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -10,26 +10,27 @@ #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { - -static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { +namespace { +void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { if (inst && inst->Opcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } -static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { +void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { if (dest_inst) { throw LogicError("Only one of each type of pseudo-op allowed"); } dest_inst = pseudo_inst; } -static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { +void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { if (inst->Opcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } +} // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { @@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetSignFromOp: case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: + case Opcode::GetSparseFromOp: return true; default: return false; @@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { [](const IR::Value& value) { return value.IsImmediate(); }); } -bool Inst::HasAssociatedPseudoOperation() const noexcept { - return zero_inst || sign_inst || carry_inst || overflow_inst; -} - Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { - // This is faster than doing a search through the block. + if (!associated_insts) { + return nullptr; + } switch (opcode) { case Opcode::GetZeroFromOp: - CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); - return zero_inst; + CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); + return associated_insts->zero_inst; case Opcode::GetSignFromOp: - CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); - return sign_inst; + CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); + return associated_insts->sign_inst; case Opcode::GetCarryFromOp: - CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); - return carry_inst; + CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); + return associated_insts->carry_inst; case Opcode::GetOverflowFromOp: - CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); - return overflow_inst; + CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); + return associated_insts->overflow_inst; + case Opcode::GetSparseFromOp: + CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); + return associated_insts->sparse_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(inst->zero_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(inst->sign_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(inst->carry_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(inst->overflow_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->overflow_inst, this); + break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sparse_inst, this); break; default: break; @@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { Inst* const inst{value.Inst()}; --inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 321393dd7..d5336c438 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -22,7 +22,7 @@ namespace Shader::IR { class Block; -constexpr size_t MAX_ARG_COUNT = 4; +struct AssociatedInsts; class Inst : public boost::intrusive::list_base_hook<> { public: @@ -50,6 +50,11 @@ public: return op; } + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + /// Determines whether or not this instruction may have side effects. [[nodiscard]] bool MayHaveSideEffects() const noexcept; @@ -60,8 +65,6 @@ public: /// Determines if all arguments of this instruction are immediates. [[nodiscard]] bool AreAllArgsImmediates() const; - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); @@ -122,14 +125,21 @@ private: u32 definition{}; union { NonTriviallyDummy dummy{}; - std::array args; std::vector> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* sparse_inst; + Inst* zero_inst{}; }; - Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 44652eae7..ad07700ae 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,7 +4,9 @@ #pragma once +#include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { @@ -30,4 +32,12 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +union TextureInstInfo { + u32 raw; + BitField<0, 8, TextureType> type; + BitField<8, 1, u32> has_bias; + BitField<16, 1, u32> has_lod_clamp; +}; +static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1f188411a..8492a13d5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -14,7 +14,7 @@ namespace { struct OpcodeMeta { std::string_view name; Type type; - std::array arg_types; + std::array arg_types; }; using enum Type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c4e72c84d..aa011fab1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -2,301 +2,330 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Phi, Opaque, ) -OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Void, Void, ) +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... +OPCODE(Phi, Opaque, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) +OPCODE(Return, Void, ) // Context getters/setters -OPCODE(GetRegister, U32, Reg, ) -OPCODE(SetRegister, Void, Reg, U32, ) -OPCODE(GetPred, U1, Pred, ) -OPCODE(SetPred, Void, Pred, U1, ) -OPCODE(GetGotoVariable, U1, U32, ) -OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) -OPCODE(GetZFlag, U1, Void, ) -OPCODE(GetSFlag, U1, Void, ) -OPCODE(GetCFlag, U1, Void, ) -OPCODE(GetOFlag, U1, Void, ) -OPCODE(SetZFlag, Void, U1, ) -OPCODE(SetSFlag, Void, U1, ) -OPCODE(SetCFlag, Void, U1, ) -OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupId, U32x3, ) -OPCODE(LocalInvocationId, U32x3, ) +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(UndefU1, U1, ) -OPCODE(UndefU8, U8, ) -OPCODE(UndefU16, U16, ) -OPCODE(UndefU32, U32, ) -OPCODE(UndefU64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) // Storage buffer operations -OPCODE(LoadStorageU8, U32, U32, U32, ) -OPCODE(LoadStorageS8, U32, U32, U32, ) -OPCODE(LoadStorageU16, U32, U32, U32, ) -OPCODE(LoadStorageS16, U32, U32, U32, ) -OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, U32x2, U32, U32, ) -OPCODE(LoadStorage128, U32x4, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) -OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) // Vector utility -OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) -OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) -OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) -OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) -OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) -OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) -OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) -OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) -OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) -OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) -OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) -OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) -OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) -OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) -OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) -OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) -OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) -OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) -OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) -OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) -OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) -OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) -OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) -OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) -OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) -OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) -OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) -OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) -OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) -OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) -OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) -OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) -OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) -OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) -OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) -OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations -OPCODE(SelectU1, U1, U1, U1, U1, ) -OPCODE(SelectU8, U8, U1, U8, U8, ) -OPCODE(SelectU16, U16, U1, U16, U16, ) -OPCODE(SelectU32, U32, U1, U32, U32, ) -OPCODE(SelectU64, U64, U1, U64, U64, ) -OPCODE(SelectF16, F16, U1, F16, F16, ) -OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectU1, U1, U1, U1, U1, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions -OPCODE(BitCastU16F16, U16, F16, ) -OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastF16U16, F16, U16, ) -OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) -OPCODE(PackUint2x32, U64, U32x2, ) -OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x16, U32, F16x2, ) -OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackHalf2x16, U32, F32x2, ) -OPCODE(UnpackHalf2x16, F32x2, U32, ) -OPCODE(PackDouble2x32, F64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, F64, ) +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit -OPCODE(GetZeroFromOp, U1, Opaque, ) -OPCODE(GetSignFromOp, U1, Opaque, ) -OPCODE(GetCarryFromOp, U1, Opaque, ) -OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetSparseFromOp, U1, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, F16, F16, ) -OPCODE(FPAbs32, F32, F32, ) -OPCODE(FPAbs64, F64, F64, ) -OPCODE(FPAdd16, F16, F16, F16, ) -OPCODE(FPAdd32, F32, F32, F32, ) -OPCODE(FPAdd64, F64, F64, F64, ) -OPCODE(FPFma16, F16, F16, F16, F16, ) -OPCODE(FPFma32, F32, F32, F32, F32, ) -OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) -OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) -OPCODE(FPMin64, F64, F64, F64, ) -OPCODE(FPMul16, F16, F16, F16, ) -OPCODE(FPMul32, F32, F32, F32, ) -OPCODE(FPMul64, F64, F64, F64, ) -OPCODE(FPNeg16, F16, F16, ) -OPCODE(FPNeg32, F32, F32, ) -OPCODE(FPNeg64, F64, F64, ) -OPCODE(FPRecip32, F32, F32, ) -OPCODE(FPRecip64, F64, F64, ) -OPCODE(FPRecipSqrt32, F32, F32, ) -OPCODE(FPRecipSqrt64, F64, F64, ) -OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPSin, F32, F32, ) -OPCODE(FPExp2, F32, F32, ) -OPCODE(FPCos, F32, F32, ) -OPCODE(FPLog2, F32, F32, ) -OPCODE(FPSaturate16, F16, F16, ) -OPCODE(FPSaturate32, F32, F32, ) -OPCODE(FPSaturate64, F64, F64, ) -OPCODE(FPRoundEven16, F16, F16, ) -OPCODE(FPRoundEven32, F32, F32, ) -OPCODE(FPRoundEven64, F64, F64, ) -OPCODE(FPFloor16, F16, F16, ) -OPCODE(FPFloor32, F32, F32, ) -OPCODE(FPFloor64, F64, F64, ) -OPCODE(FPCeil16, F16, F16, ) -OPCODE(FPCeil32, F32, F32, ) -OPCODE(FPCeil64, F64, F64, ) -OPCODE(FPTrunc16, F16, F16, ) -OPCODE(FPTrunc32, F32, F32, ) -OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPOrdEqual16, U1, F16, F16, ) -OPCODE(FPOrdEqual32, U1, F32, F32, ) -OPCODE(FPOrdEqual64, U1, F64, F64, ) -OPCODE(FPUnordEqual16, U1, F16, F16, ) -OPCODE(FPUnordEqual32, U1, F32, F32, ) -OPCODE(FPUnordEqual64, U1, F64, F64, ) -OPCODE(FPOrdNotEqual16, U1, F16, F16, ) -OPCODE(FPOrdNotEqual32, U1, F32, F32, ) -OPCODE(FPOrdNotEqual64, U1, F64, F64, ) -OPCODE(FPUnordNotEqual16, U1, F16, F16, ) -OPCODE(FPUnordNotEqual32, U1, F32, F32, ) -OPCODE(FPUnordNotEqual64, U1, F64, F64, ) -OPCODE(FPOrdLessThan16, U1, F16, F16, ) -OPCODE(FPOrdLessThan32, U1, F32, F32, ) -OPCODE(FPOrdLessThan64, U1, F64, F64, ) -OPCODE(FPUnordLessThan16, U1, F16, F16, ) -OPCODE(FPUnordLessThan32, U1, F32, F32, ) -OPCODE(FPUnordLessThan64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) -OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) // Integer operations -OPCODE(IAdd32, U32, U32, U32, ) -OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(ISub32, U32, U32, U32, ) -OPCODE(ISub64, U64, U64, U64, ) -OPCODE(IMul32, U32, U32, U32, ) -OPCODE(INeg32, U32, U32, ) -OPCODE(INeg64, U64, U64, ) -OPCODE(IAbs32, U32, U32, ) -OPCODE(ShiftLeftLogical32, U32, U32, U32, ) -OPCODE(ShiftLeftLogical64, U64, U64, U32, ) -OPCODE(ShiftRightLogical32, U32, U32, U32, ) -OPCODE(ShiftRightLogical64, U64, U64, U32, ) -OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) -OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) -OPCODE(BitwiseAnd32, U32, U32, U32, ) -OPCODE(BitwiseOr32, U32, U32, U32, ) -OPCODE(BitwiseXor32, U32, U32, U32, ) -OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) -OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) -OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) -OPCODE(BitReverse32, U32, U32, ) -OPCODE(BitCount32, U32, U32, ) -OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) -OPCODE(FindSMsb32, U32, U32, ) -OPCODE(FindUMsb32, U32, U32, ) -OPCODE(SMin32, U32, U32, U32, ) -OPCODE(UMin32, U32, U32, U32, ) -OPCODE(SMax32, U32, U32, U32, ) -OPCODE(UMax32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) -OPCODE(IEqual, U1, U32, U32, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) -OPCODE(INotEqual, U1, U32, U32, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations -OPCODE(LogicalOr, U1, U1, U1, ) -OPCODE(LogicalAnd, U1, U1, U1, ) -OPCODE(LogicalXor, U1, U1, U1, ) -OPCODE(LogicalNot, U1, U1, ) +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) // Conversion operations -OPCODE(ConvertS16F16, U32, F16, ) -OPCODE(ConvertS16F32, U32, F32, ) -OPCODE(ConvertS16F64, U32, F64, ) -OPCODE(ConvertS32F16, U32, F16, ) -OPCODE(ConvertS32F32, U32, F32, ) -OPCODE(ConvertS32F64, U32, F64, ) -OPCODE(ConvertS64F16, U64, F16, ) -OPCODE(ConvertS64F32, U64, F32, ) -OPCODE(ConvertS64F64, U64, F64, ) -OPCODE(ConvertU16F16, U32, F16, ) -OPCODE(ConvertU16F32, U32, F32, ) -OPCODE(ConvertU16F64, U32, F64, ) -OPCODE(ConvertU32F16, U32, F16, ) -OPCODE(ConvertU32F32, U32, F32, ) -OPCODE(ConvertU32F64, U32, F64, ) -OPCODE(ConvertU64F16, U64, F16, ) -OPCODE(ConvertU64F32, U64, F32, ) -OPCODE(ConvertU64F64, U64, F64, ) -OPCODE(ConvertU64U32, U64, U32, ) -OPCODE(ConvertU32U64, U32, U64, ) -OPCODE(ConvertF16F32, F16, F32, ) -OPCODE(ConvertF32F16, F32, F16, ) -OPCODE(ConvertF32F64, F32, F64, ) -OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S32, F16, U32, ) +OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U32, F16, U32, ) +OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S32, F32, U32, ) +OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U32, F32, U32, ) +OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S32, F64, U32, ) +OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF64U64, F64, U64, ) + +// Image operations +OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 771094eb9..8fea05f7b 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } +[[nodiscard]] constexpr Reg operator++(Reg& reg) { + reg = reg + 1; + return reg; +} + +[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { + const Reg copy{reg}; + reg = reg + 1; + return copy; +} + [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { return static_cast(reg); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 9b7e1480b..3602883d6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -75,6 +75,7 @@ private: f64 imm_f64; }; }; +static_assert(std::is_trivially_copyable_v); template class TypedValue : public Value { diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") INST(SYNC, "SYNC", "1111 0000 1111 1---") -INST(TEX, "TEX", "1100 00-- --11 1---") -INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEX, "TEX", "1100 0--- ---- ----") +INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 3a63fa0477ea8297c80133d35494e1dfdc012f95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 9 Mar 2021 17:14:57 -0300 Subject: shader: Partial implementation of LDC --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 22 +++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 8 +- .../frontend/maxwell/translate/impl/impl.cpp | 16 +++- .../maxwell/translate/impl/load_constant.cpp | 85 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 4 - 6 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ae3354c66..33819dd36 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -112,7 +112,27 @@ void IREmitter::SetPred(IR::Pred pred, const U1& value) { } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { - return Inst(Opcode::GetCbuf, binding, byte_offset); + return Inst(Opcode::GetCbufU32, binding, byte_offset); +} + +UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed) { + switch (bitsize) { + case 8: + return Inst(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); + case 16: + return Inst(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset); + case 32: + return Inst(Opcode::GetCbufU32, binding, byte_offset); + case 64: + return Inst(Opcode::GetCbufU64, binding, byte_offset); + default: + throw InvalidArgument("Invalid bit size {}", bitsize); + } +} + +F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) { + return Inst(Opcode::GetCbufF32, binding, byte_offset); } U1 IREmitter::GetZFlag() { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index cb2a7710a..e4d110540 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -47,6 +47,9 @@ public: void SetGotoVariable(u32 id, const U1& value); [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); + [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed); + [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); [[nodiscard]] U1 GetSFlag(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aa011fab1..64bd495ed 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -21,7 +21,13 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetCbufU8, U32, U32, U32, ) +OPCODE(GetCbufS8, U32, U32, U32, ) +OPCODE(GetCbufU16, U32, U32, U32, ) +OPCODE(GetCbufS16, U32, U32, U32, ) +OPCODE(GetCbufU32, U32, U32, U32, ) +OPCODE(GetCbufF32, F32, U32, U32, ) +OPCODE(GetCbufU64, U64, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, U32, ) OPCODE(GetAttributeIndexed, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index a5a0e1a9b..7564aeeb2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -56,25 +56,32 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } -IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { +static std::pair CbufAddr(u64 insn) { union { u64 raw; BitField<20, 14, s64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; + if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } - const IR::U32 binding{ir.Imm32(static_cast(cbuf.binding))}; - const IR::U32 byte_offset{ir.Imm32(static_cast(cbuf.offset) * 4)}; + const IR::Value binding{static_cast(cbuf.binding)}; + const IR::Value byte_offset{static_cast(cbuf.offset) * 4}; + return {IR::U32{binding}, IR::U32{byte_offset}}; +} + +IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { + const auto[binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - return ir.BitCast(GetCbuf(insn)); + const auto[binding, byte_offset]{CbufAddr(insn)}; + return ir.GetFloatCbuf(binding, byte_offset); } IR::U32 TranslatorVisitor::GetImm20(u64 insn) { @@ -83,6 +90,7 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; + if (imm.is_negative != 0) { const s64 raw{static_cast(imm.value)}; return ir.Imm32(static_cast(-(1LL << 19) + raw)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..39becf93c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -0,0 +1,85 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, + const IR::U32& reg, const IR::U32& imm) { + switch (mode) { + case Mode::Default: + return {imm_index, ir.IAdd(reg, imm)}; + default: + break; + } + throw NotImplementedException("Mode {}", mode); +} +} // Anonymous namespace + +void TranslatorVisitor::LDC(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; + } const ldc{insn}; + + const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; + const IR::U32 reg{X(ldc.src_reg)}; + const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; + const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; + switch (ldc.size) { + case Size::U8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + break; + case Size::S8: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + break; + case Size::U16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + break; + case Size::S16: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + break; + case Size::B32: + X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + break; + case Size::B64: { + if (!IR::IsAligned(ldc.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register"); + } + const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + for (int i = 0; i < 2; ++i) { + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid size {}", ldc.size.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ff429c126..5b153acff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -425,10 +425,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDC(u64) { - ThrowNotImplemented(Opcode::LDC); -} - void TranslatorVisitor::LDL(u64) { ThrowNotImplemented(Opcode::LDL); } -- cgit v1.2.3 From ba8c1d2eb479d04b2b0d847efd67468b688765d4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 10 Mar 2021 22:42:17 -0500 Subject: shader: Implement FCMP still need to configure some settings for NV denorm flush and intel NaN --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 80 +++++++++----- src/shader_recompiler/frontend/ir/ir_emitter.h | 19 ++-- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/floating_point_compare.cpp | 116 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 --- 5 files changed, 184 insertions(+), 49 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 33819dd36..5d475207e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -697,93 +697,107 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { } } -U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control}, + lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control}, + lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control}, + lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: return Inst(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F32: return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F64: return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, - lhs, rhs); + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } @@ -791,20 +805,32 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, boo case Type::F16: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual16 : Opcode::FPUnordGreaterThanEqual16, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F32: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 : Opcode::FPUnordGreaterThanEqual32, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F64: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 : Opcode::FPUnordGreaterThanEqual64, - lhs, rhs); + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } +U1 IREmitter::FPIsNan(const F32& value) { + return Inst(Opcode::FPIsNan32, value); +} + +U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { + return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); +} + +U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { + return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e4d110540..5cfe1a54a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -140,14 +140,21 @@ public: [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); - [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, + FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, - bool ordered = true); + FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, - bool ordered = true); + FpControl control = {}, bool ordered = true); + [[nodiscard]] U1 FPIsNan(const F32& value); + [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); + [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 64bd495ed..476281789 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -197,6 +197,7 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) +<<<<<<< HEAD OPCODE(FPOrdEqual16, U1, F16, F16, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) @@ -233,6 +234,7 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan32, U1, F32, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..21cb80d67 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fcmp{insn}; + + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; + IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; + const IR::U32 src_reg{v.X(fcmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(fcmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FCMP_reg(u64 insn) { + FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_rc(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FCMP_cr(u64 insn) { + FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_imm(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 5b153acff..e1904472f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,22 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FCMP_reg(u64) { - ThrowNotImplemented(Opcode::FCMP_reg); -} - -void TranslatorVisitor::FCMP_rc(u64) { - ThrowNotImplemented(Opcode::FCMP_rc); -} - -void TranslatorVisitor::FCMP_cr(u64) { - ThrowNotImplemented(Opcode::FCMP_cr); -} - -void TranslatorVisitor::FCMP_imm(u64) { - ThrowNotImplemented(Opcode::FCMP_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } -- cgit v1.2.3 From 2d422b2498868e297939c6907a7ef1386ceb1d57 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 13 Mar 2021 02:23:26 -0300 Subject: shader: Fix rebase issue --- src/shader_recompiler/frontend/ir/opcodes.inc | 1 - 1 file changed, 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 476281789..9052a4903 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -197,7 +197,6 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) -<<<<<<< HEAD OPCODE(FPOrdEqual16, U1, F16, F16, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) -- cgit v1.2.3 From 8d470c2e63c2dac334ccff2bcda9a0607ce76377 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:23:56 -0500 Subject: shader: Implement FMNMX And add a const in FCMP --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 28 +++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 + .../translate/impl/floating_point_compare.cpp | 2 +- .../translate/impl/floating_point_min_max.cpp | 57 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 5 files changed, 88 insertions(+), 13 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5d475207e..556961fa4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -831,6 +831,34 @@ U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMax32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMax64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMin32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMin64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5cfe1a54a..74fb3dbcb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -155,6 +155,8 @@ public: [[nodiscard]] U1 FPIsNan(const F32& value); [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 21cb80d67..f254ecb3a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -88,7 +88,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c3180a9bd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const fmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; + IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; + + if (fmnmx.neg_pred != 0) { + std::swap(min, max); + } + + v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FMNMX_reg(u64 insn) { + FMNMX(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMNMX_cbuf(u64 insn) { + FMNMX(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMNMX_imm(u64 insn) { + FMNMX(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e1904472f..01ecbb4cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,18 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FMNMX_reg(u64) { - ThrowNotImplemented(Opcode::FMNMX_reg); -} - -void TranslatorVisitor::FMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::FMNMX_cbuf); -} - -void TranslatorVisitor::FMNMX_imm(u64) { - ThrowNotImplemented(Opcode::FMNMX_imm); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } -- cgit v1.2.3 From 71f96fa6366dc6dd306a953bca1b958fb32bc55a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Mar 2021 03:41:05 -0300 Subject: shader: Implement CAL inlining function calls --- src/shader_recompiler/frontend/ir/function.cpp | 5 - src/shader_recompiler/frontend/ir/function.h | 18 - src/shader_recompiler/frontend/ir/program.cpp | 18 +- src/shader_recompiler/frontend/ir/program.h | 5 +- .../frontend/ir/structured_control_flow.cpp | 744 -------------------- .../frontend/ir/structured_control_flow.h | 22 - .../frontend/maxwell/control_flow.cpp | 78 +-- .../frontend/maxwell/control_flow.h | 19 +- src/shader_recompiler/frontend/maxwell/program.cpp | 71 +- .../frontend/maxwell/structured_control_flow.cpp | 770 +++++++++++++++++++++ .../frontend/maxwell/structured_control_flow.h | 24 + .../frontend/maxwell/translate/impl/impl.h | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 4 +- 13 files changed, 879 insertions(+), 901 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/function.cpp delete mode 100644 src/shader_recompiler/frontend/ir/function.h delete mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.cpp delete mode 100644 src/shader_recompiler/frontend/ir/structured_control_flow.h create mode 100644 src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/structured_control_flow.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/function.cpp b/src/shader_recompiler/frontend/ir/function.cpp deleted file mode 100644 index d1fc9461d..000000000 --- a/src/shader_recompiler/frontend/ir/function.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/frontend/ir/function.h" diff --git a/src/shader_recompiler/frontend/ir/function.h b/src/shader_recompiler/frontend/ir/function.h deleted file mode 100644 index d1f061146..000000000 --- a/src/shader_recompiler/frontend/ir/function.h +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" - -namespace Shader::IR { - -struct Function { - BlockList blocks; - BlockList post_order_blocks; -}; - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 8c301c3a1..5f51aeb5f 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -9,7 +9,8 @@ #include -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::IR { @@ -19,18 +20,13 @@ std::string DumpProgram(const Program& program) { std::map inst_to_index; std::map block_to_index; - for (const IR::Function& function : program.functions) { - for (const IR::Block* const block : function.blocks) { - block_to_index.emplace(block, index); - ++index; - } + for (const IR::Block* const block : program.blocks) { + block_to_index.emplace(block, index); + ++index; } std::string ret; - for (const IR::Function& function : program.functions) { - ret += fmt::format("Function\n"); - for (const auto& block : function.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - } + for (const auto& block : program.blocks) { + ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; } return ret; } diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 98aab2dc6..bce8b19b3 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -8,13 +8,14 @@ #include -#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" namespace Shader::IR { struct Program { - boost::container::small_vector functions; + BlockList blocks; + BlockList post_order_blocks; Info info; }; diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp deleted file mode 100644 index bfba55a7e..000000000 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::IR { -namespace { -struct Statement; - -// Use normal_link because we are not guaranteed to destroy the tree in order -using ListBaseHook = - boost::intrusive::list_base_hook>; - -using Tree = boost::intrusive::list, - // Avoid linear complexity on splice, size is never called - boost::intrusive::constant_time_size>; -using Node = Tree::iterator; -using ConstNode = Tree::const_iterator; - -enum class StatementType { - Code, - Goto, - Label, - If, - Loop, - Break, - Return, - Function, - Identity, - Not, - Or, - SetVariable, - Variable, -}; - -bool HasChildren(StatementType type) { - switch (type) { - case StatementType::If: - case StatementType::Loop: - case StatementType::Function: - return true; - default: - return false; - } -} - -struct Goto {}; -struct Label {}; -struct If {}; -struct Loop {}; -struct Break {}; -struct Return {}; -struct FunctionTag {}; -struct Identity {}; -struct Not {}; -struct Or {}; -struct SetVariable {}; -struct Variable {}; - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement -#endif -struct Statement : ListBaseHook { - Statement(Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} - Statement(Goto, Statement* cond_, Node label_, Statement* up_) - : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} - Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} - Statement(If, Statement* cond_, Tree&& children_, Statement* up_) - : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} - Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) - : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} - Statement(Break, Statement* cond_, Statement* up_) - : cond{cond_}, up{up_}, type{StatementType::Break} {} - Statement(Return) : type{StatementType::Return} {} - Statement(FunctionTag) : children{}, type{StatementType::Function} {} - Statement(Identity, Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} - Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} - Statement(Or, Statement* op_a_, Statement* op_b_) - : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} - Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) - : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} - Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} - - ~Statement() { - if (HasChildren(type)) { - std::destroy_at(&children); - } - } - - union { - Block* code; - Node label; - Tree children; - Condition guest_cond; - Statement* op; - Statement* op_a; - }; - union { - Statement* cond; - Statement* op_b; - u32 id; - }; - Statement* up{}; - StatementType type; -}; -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -std::string DumpExpr(const Statement* stmt) { - switch (stmt->type) { - case StatementType::Identity: - return fmt::format("{}", stmt->guest_cond); - case StatementType::Not: - return fmt::format("!{}", DumpExpr(stmt->op)); - case StatementType::Or: - return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); - case StatementType::Variable: - return fmt::format("goto_L{}", stmt->id); - default: - return ""; - } -} - -std::string DumpTree(const Tree& tree, u32 indentation = 0) { - std::string ret; - std::string indent(indentation, ' '); - for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { - switch (stmt->type) { - case StatementType::Code: - ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); - break; - case StatementType::Goto: - ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), - stmt->label->id); - break; - case StatementType::Label: - ret += fmt::format("{}L{}:\n", indent, stmt->id); - break; - case StatementType::If: - ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); - ret += DumpTree(stmt->children, indentation + 4); - ret += fmt::format("{} }}\n", indent); - break; - case StatementType::Loop: - ret += fmt::format("{} do {{\n", indent); - ret += DumpTree(stmt->children, indentation + 4); - ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); - break; - case StatementType::Break: - ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); - break; - case StatementType::Return: - ret += fmt::format("{} return;\n", indent); - break; - case StatementType::SetVariable: - ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); - break; - case StatementType::Function: - case StatementType::Identity: - case StatementType::Not: - case StatementType::Or: - case StatementType::Variable: - throw LogicError("Statement can't be printed"); - } - } - return ret; -} - -bool HasNode(const Tree& tree, ConstNode stmt) { - const auto end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { - return true; - } - } - return false; -} - -Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { - const ConstNode label_stmt{goto_stmt->label}; - const ConstNode end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { - return it; - } - } - throw LogicError("Lift label not in tree"); -} - -void SanitizeNoBreaks(const Tree& tree) { - if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { - throw NotImplementedException("Capturing statement with break nodes"); - } -} - -size_t Level(Node stmt) { - size_t level{0}; - Statement* node{stmt->up}; - while (node) { - ++level; - node = node->up; - } - return level; -} - -bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { - const size_t goto_level{Level(goto_stmt)}; - const size_t label_level{Level(label_stmt)}; - size_t min_level; - size_t max_level; - Node min; - Node max; - if (label_level < goto_level) { - min_level = label_level; - max_level = goto_level; - min = label_stmt; - max = goto_stmt; - } else { // goto_level < label_level - min_level = goto_level; - max_level = label_level; - min = goto_stmt; - max = label_stmt; - } - while (max_level > min_level) { - --max_level; - max = max->up; - } - return min->up == max->up; -} - -bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { - return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); -} - -bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { - ++offset; - - const auto end = tree.end(); - for (ConstNode it = tree.begin(); it != end; ++it) { - ++offset; - if (stmt == it) { - return true; - } - if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { - return true; - } - } - return false; -} - -class GotoPass { -public: - explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) - : pool{stmt_pool} { - std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); - } - } - - Statement& RootStatement() noexcept { - return root_stmt; - } - -private: - void RemoveGoto(Node goto_stmt) { - // Force goto_stmt and label_stmt to be directly related - const Node label_stmt{goto_stmt->label}; - if (IsIndirectlyRelated(goto_stmt, label_stmt)) { - // Move goto_stmt out using outward-movement transformation until it becomes - // directly related to label_stmt - while (!IsDirectlyRelated(goto_stmt, label_stmt)) { - goto_stmt = MoveOutward(goto_stmt); - } - } - // Force goto_stmt and label_stmt to be siblings - if (IsDirectlyRelated(goto_stmt, label_stmt)) { - const size_t label_level{Level(label_stmt)}; - size_t goto_level{Level(goto_stmt)}; - if (goto_level > label_level) { - // Move goto_stmt out of its level using outward-movement transformations - while (goto_level > label_level) { - goto_stmt = MoveOutward(goto_stmt); - --goto_level; - } - } else { // Level(goto_stmt) < Level(label_stmt) - if (Offset(goto_stmt) > Offset(label_stmt)) { - // Lift goto_stmt to above stmt containing label_stmt using goto-lifting - // transformations - goto_stmt = Lift(goto_stmt); - } - // Move goto_stmt into label_stmt's level using inward-movement transformation - while (goto_level < label_level) { - goto_stmt = MoveInward(goto_stmt); - ++goto_level; - } - } - } - // TODO: Remove this - Node it{goto_stmt}; - bool sibling{false}; - do { - sibling |= it == label_stmt; - --it; - } while (it != goto_stmt->up->children.begin()); - while (it != goto_stmt->up->children.end()) { - sibling |= it == label_stmt; - ++it; - } - if (!sibling) { - throw LogicError("Not siblings"); - } - // goto_stmt and label_stmt are guaranteed to be siblings, eliminate - if (std::next(goto_stmt) == label_stmt) { - // Simply eliminate the goto if the label is next to it - goto_stmt->up->children.erase(goto_stmt); - } else if (Offset(goto_stmt) < Offset(label_stmt)) { - // Eliminate goto_stmt with a conditional - EliminateAsConditional(goto_stmt, label_stmt); - } else { - // Eliminate goto_stmt with a loop - EliminateAsLoop(goto_stmt, label_stmt); - } - } - - std::vector BuildUnorderedTreeGetGotos(std::span blocks) { - // Assume all blocks have two branches - std::vector gotos; - gotos.reserve(blocks.size() * 2); - - const std::unordered_map labels_map{BuildLabels(blocks)}; - Tree& root{root_stmt.children}; - auto insert_point{root.begin()}; - // Skip all goto variables zero-initialization - std::advance(insert_point, labels_map.size()); - - for (Block* const block : blocks) { - // Skip label - ++insert_point; - // Skip set variable - ++insert_point; - root.insert(insert_point, *pool.Create(block, &root_stmt)); - - if (block->IsTerminationBlock()) { - root.insert(insert_point, *pool.Create(Return{})); - continue; - } - const Condition cond{block->BranchCondition()}; - Statement* const true_cond{pool.Create(Identity{}, Condition{true})}; - if (cond == Condition{true} || cond == Condition{false}) { - const bool is_true{cond == Condition{true}}; - const Block* const branch{is_true ? block->TrueBranch() : block->FalseBranch()}; - const Node label{labels_map.at(branch)}; - Statement* const goto_stmt{pool.Create(Goto{}, true_cond, label, &root_stmt)}; - gotos.push_back(root.insert(insert_point, *goto_stmt)); - } else { - Statement* const ident_cond{pool.Create(Identity{}, cond)}; - const Node true_label{labels_map.at(block->TrueBranch())}; - const Node false_label{labels_map.at(block->FalseBranch())}; - Statement* goto_true{pool.Create(Goto{}, ident_cond, true_label, &root_stmt)}; - Statement* goto_false{pool.Create(Goto{}, true_cond, false_label, &root_stmt)}; - gotos.push_back(root.insert(insert_point, *goto_true)); - gotos.push_back(root.insert(insert_point, *goto_false)); - } - } - return gotos; - } - - std::unordered_map BuildLabels(std::span blocks) { - // TODO: Consider storing labels intrusively inside the block - std::unordered_map labels_map; - Tree& root{root_stmt.children}; - u32 label_id{0}; - for (const Block* const block : blocks) { - Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; - labels_map.emplace(block, root.insert(root.end(), *label)); - Statement* const false_stmt{pool.Create(Identity{}, Condition{false})}; - root.push_back(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); - root.push_front(*pool.Create(SetVariable{}, label_id, false_stmt, &root_stmt)); - ++label_id; - } - return labels_map; - } - - void UpdateTreeUp(Statement* tree) { - for (Statement& stmt : tree->children) { - stmt.up = tree; - } - } - - void EliminateAsConditional(Node goto_stmt, Node label_stmt) { - Tree& body{goto_stmt->up->children}; - Tree if_body; - if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); - Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; - Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; - UpdateTreeUp(if_stmt); - body.insert(goto_stmt, *if_stmt); - body.erase(goto_stmt); - } - - void EliminateAsLoop(Node goto_stmt, Node label_stmt) { - Tree& body{goto_stmt->up->children}; - Tree loop_body; - loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); - Statement* const cond{goto_stmt->cond}; - Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; - UpdateTreeUp(loop); - body.insert(goto_stmt, *loop); - body.erase(goto_stmt); - } - - [[nodiscard]] Node MoveOutward(Node goto_stmt) { - switch (goto_stmt->up->type) { - case StatementType::If: - return MoveOutwardIf(goto_stmt); - case StatementType::Loop: - return MoveOutwardLoop(goto_stmt); - default: - throw LogicError("Invalid outward movement"); - } - } - - [[nodiscard]] Node MoveInward(Node goto_stmt) { - Statement* const parent{goto_stmt->up}; - Tree& body{parent->children}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const Node label{goto_stmt->label}; - const u32 label_id{label->id}; - - Statement* const goto_cond{goto_stmt->cond}; - Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - body.insert(goto_stmt, *set_var); - - Tree if_body; - if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const neg_var{pool.Create(Not{}, variable)}; - if (!if_body.empty()) { - Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; - UpdateTreeUp(if_stmt); - body.insert(goto_stmt, *if_stmt); - } - body.erase(goto_stmt); - - switch (label_nested_stmt->type) { - case StatementType::If: - // Update nested if condition - label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); - break; - case StatementType::Loop: - break; - default: - throw LogicError("Invalid inward movement"); - } - Tree& nested_tree{label_nested_stmt->children}; - Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; - return nested_tree.insert(nested_tree.begin(), *new_goto); - } - - [[nodiscard]] Node Lift(Node goto_stmt) { - Statement* const parent{goto_stmt->up}; - Tree& body{parent->children}; - const Node label{goto_stmt->label}; - const u32 label_id{label->id}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; - - Tree loop_body; - loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); - SanitizeNoBreaks(loop_body); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; - UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; - - Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; - loop_stmt->children.push_front(*new_goto); - const Node new_goto_node{loop_stmt->children.begin()}; - - Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; - loop_stmt->children.push_back(*set_var); - - body.erase(goto_stmt); - return new_goto_node; - } - - Node MoveOutwardIf(Node goto_stmt) { - const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; - Tree& body{parent->children}; - const u32 label_id{goto_stmt->label->id}; - Statement* const goto_cond{goto_stmt->cond}; - Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; - body.insert(goto_stmt, *set_goto_var); - - Tree if_body; - if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); - if_body.pop_front(); - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const neg_cond{pool.Create(Not{}, cond)}; - Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; - UpdateTreeUp(if_stmt); - body.insert(goto_stmt, *if_stmt); - - body.erase(goto_stmt); - - Statement* const new_cond{pool.Create(Variable{}, label_id)}; - Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; - Tree& parent_tree{parent->up->children}; - return parent_tree.insert(std::next(parent), *new_goto); - } - - Node MoveOutwardLoop(Node goto_stmt) { - Statement* const parent{goto_stmt->up}; - Tree& body{parent->children}; - const u32 label_id{goto_stmt->label->id}; - Statement* const goto_cond{goto_stmt->cond}; - Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; - body.insert(goto_stmt, *set_goto_var); - body.insert(goto_stmt, *break_stmt); - body.erase(goto_stmt); - - const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; - Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; - Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; - Tree& parent_tree{loop->up->children}; - return parent_tree.insert(std::next(loop), *new_goto); - } - - size_t Offset(ConstNode stmt) const { - size_t offset{0}; - if (!SearchNode(root_stmt.children, stmt, offset)) { - throw LogicError("Node not found in tree"); - } - return offset; - } - - ObjectPool& pool; - Statement root_stmt{FunctionTag{}}; -}; - -Block* TryFindForwardBlock(const Statement& stmt) { - const Tree& tree{stmt.up->children}; - const ConstNode end{tree.cend()}; - ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; - while (forward_node != end && !HasChildren(forward_node->type)) { - if (forward_node->type == StatementType::Code) { - return forward_node->code; - } - ++forward_node; - } - return nullptr; -} - -[[nodiscard]] U1 VisitExpr(IREmitter& ir, const Statement& stmt) { - switch (stmt.type) { - case StatementType::Identity: - return ir.Condition(stmt.guest_cond); - case StatementType::Not: - return ir.LogicalNot(U1{VisitExpr(ir, *stmt.op)}); - case StatementType::Or: - return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); - case StatementType::Variable: - return ir.GetGotoVariable(stmt.id); - default: - throw NotImplementedException("Statement type {}", stmt.type); - } -} - -class TranslatePass { -public: - TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, - ObjectPool& stmt_pool_, Statement& root_stmt, - const std::function& func_, BlockList& block_list_) - : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, func{func_}, - block_list{block_list_} { - Visit(root_stmt, nullptr, nullptr); - } - -private: - void Visit(Statement& parent, Block* continue_block, Block* break_block) { - Tree& tree{parent.children}; - Block* current_block{nullptr}; - - for (auto it = tree.begin(); it != tree.end(); ++it) { - Statement& stmt{*it}; - switch (stmt.type) { - case StatementType::Label: - // Labels can be ignored - break; - case StatementType::Code: { - if (current_block && current_block != stmt.code) { - IREmitter ir{*current_block}; - ir.Branch(stmt.code); - } - current_block = stmt.code; - func(stmt.code); - block_list.push_back(stmt.code); - break; - } - case StatementType::SetVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } - IREmitter ir{*current_block}; - ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); - break; - } - case StatementType::If: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - Block* const merge_block{MergeBlock(parent, stmt)}; - - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, merge_block, break_block); - - // Implement if header block - Block* const first_if_block{block_list.at(first_block_index)}; - IREmitter ir{*current_block}; - const U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.SelectionMerge(merge_block); - ir.BranchConditional(cond, first_if_block, merge_block); - - current_block = merge_block; - break; - } - case StatementType::Loop: { - Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - IREmitter{*current_block}.Branch(loop_header_block); - } - block_list.push_back(loop_header_block); - - Block* const new_continue_block{block_pool.Create(inst_pool)}; - Block* const merge_block{MergeBlock(parent, stmt)}; - - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, new_continue_block, merge_block); - - // The continue block is located at the end of the loop - block_list.push_back(new_continue_block); - - // Implement loop header block - Block* const first_loop_block{block_list.at(first_block_index)}; - IREmitter ir{*loop_header_block}; - ir.LoopMerge(merge_block, new_continue_block); - ir.Branch(first_loop_block); - - // Implement continue block - IREmitter continue_ir{*new_continue_block}; - const U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; - continue_ir.BranchConditional(continue_cond, ir.block, merge_block); - - current_block = merge_block; - break; - } - case StatementType::Break: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - Block* const skip_block{MergeBlock(parent, stmt)}; - - IREmitter ir{*current_block}; - ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); - - current_block = skip_block; - break; - } - case StatementType::Return: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IREmitter{*current_block}.Return(); - current_block = nullptr; - break; - } - default: - throw NotImplementedException("Statement type {}", stmt.type); - } - } - if (current_block && continue_block) { - IREmitter ir{*current_block}; - ir.Branch(continue_block); - } - } - - Block* MergeBlock(Statement& parent, Statement& stmt) { - if (Block* const block{TryFindForwardBlock(stmt)}) { - return block; - } - // Create a merge block we can visit later - Block* const block{block_pool.Create(inst_pool)}; - Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; - parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); - return block; - } - - ObjectPool& stmt_pool; - ObjectPool& inst_pool; - ObjectPool& block_pool; - const std::function& func; - BlockList& block_list; -}; -} // Anonymous namespace - -BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span unordered_blocks, - const std::function& func) { - ObjectPool stmt_pool{64}; - GotoPass goto_pass{unordered_blocks, stmt_pool}; - BlockList block_list; - TranslatePass translate_pass{inst_pool, block_pool, stmt_pool, goto_pass.RootStatement(), - func, block_list}; - return block_list; -} - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.h b/src/shader_recompiler/frontend/ir/structured_control_flow.h deleted file mode 100644 index a574c24f7..000000000 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include - -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::IR { - -[[nodiscard]] BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - std::span unordered_blocks, - const std::function& func); - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index d0dc66330..715c0e92d 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -31,13 +31,12 @@ struct Compare { return lhs.begin < rhs.begin; } }; -} // Anonymous namespace -static u32 BranchOffset(Location pc, Instruction inst) { +u32 BranchOffset(Location pc, Instruction inst) { return pc.Offset() + inst.branch.Offset() + 8; } -static void Split(Block* old_block, Block* new_block, Location pc) { +void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } @@ -49,21 +48,19 @@ static void Split(Block* old_block, Block* new_block, Location pc) { .cond{old_block->cond}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, - .ir{nullptr}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, .stack{std::move(old_block->stack)}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{new_block}, .branch_false{nullptr}, - .ir{nullptr}, }; } -static Token OpcodeToken(Opcode opcode) { +Token OpcodeToken(Opcode opcode) { switch (opcode) { case Opcode::PBK: case Opcode::BRK: @@ -89,7 +86,7 @@ static Token OpcodeToken(Opcode opcode) { } } -static bool IsAbsoluteJump(Opcode opcode) { +bool IsAbsoluteJump(Opcode opcode) { switch (opcode) { case Opcode::JCAL: case Opcode::JMP: @@ -100,7 +97,7 @@ static bool IsAbsoluteJump(Opcode opcode) { } } -static bool HasFlowTest(Opcode opcode) { +bool HasFlowTest(Opcode opcode) { switch (opcode) { case Opcode::BRA: case Opcode::BRX: @@ -121,13 +118,14 @@ static bool HasFlowTest(Opcode opcode) { } } -static std::string NameOf(const Block& block) { +std::string NameOf(const Block& block) { if (block.begin.IsVirtual()) { return fmt::format("\"Virtual {}\"", block.begin); } else { return fmt::format("\"{}\"", block.begin); } } +} // Anonymous namespace void Stack::Push(Token token, Location target) { entries.push_back({ @@ -166,26 +164,24 @@ bool Block::Contains(Location pc) const noexcept { return pc >= begin && pc < end; } -Function::Function(Location start_address) +Function::Function(ObjectPool& block_pool, Location start_address) : entrypoint{start_address}, labels{{ .address{start_address}, - .block{nullptr}, + .block{block_pool.Create(Block{ + .begin{start_address}, + .end{start_address}, + .end_class{EndClass::Branch}, + .stack{}, + .cond{true}, + .branch_true{nullptr}, + .branch_false{nullptr}, + })}, .stack{}, }} {} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_} { - functions.emplace_back(start_address); - functions.back().labels.back().block = block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .stack{}, - .cond{IR::Condition{true}}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .ir{nullptr}, - }); + functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { Function& function{functions[function_id]}; @@ -308,11 +304,17 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; // Technically CAL pushes into PRET, but that's implicit in the function call for us // Insert the function into the list if it doesn't exist - if (std::ranges::find(functions, cal_pc, &Function::entrypoint) == functions.end()) { - functions.emplace_back(cal_pc); + const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; + const bool exists{it != functions.end()}; + const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + if (!exists) { + functions.emplace_back(block_pool, cal_pc); } - // Handle CAL like a regular instruction - break; + block->end_class = EndClass::Call; + block->function_call = call_id; + block->return_block = AddLabel(block, block->stack, pc + 1, function_id); + block->end = pc; + return AnalysisState::Branch; } default: break; @@ -348,7 +350,6 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .cond{cond}, .branch_true{conditional_block}, .branch_false{nullptr}, - .ir{nullptr}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -401,16 +402,6 @@ void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); } -void CFG::AnalyzeCAL(Location pc, Instruction inst, bool is_absolute) { - const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; - // Technically CAL pushes into PRET, but that's implicit in the function call for us - // Insert the function to the function list if it doesn't exist - const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; - if (it == functions.end()) { - functions.emplace_back(cal_pc); - } -} - CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst) { const IR::FlowTest flow_test{inst.branch.flow_test}; @@ -455,10 +446,9 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .end{pc}, .end_class{EndClass::Branch}, .stack{stack}, - .cond{IR::Condition{true}}, + .cond{true}, .branch_true{nullptr}, .branch_false{nullptr}, - .ir{nullptr}, })}; function.labels.push_back(Label{ .address{pc}, @@ -495,6 +485,14 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::Call: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); + dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", + node_uid, block.function_call); + dot += '\n'; + ++node_uid; + break; case EndClass::Exit: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 209c9e551..fe74f210f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -20,16 +20,13 @@ #include "shader_recompiler/frontend/maxwell/opcodes.h" #include "shader_recompiler/object_pool.h" -namespace Shader::IR { -class Block; -} - namespace Shader::Maxwell::Flow { using FunctionId = size_t; enum class EndClass { Branch, + Call, Exit, Return, }; @@ -75,9 +72,14 @@ struct Block : boost::intrusive::set_base_hook< EndClass end_class; Stack stack; IR::Condition cond; - Block* branch_true; - Block* branch_false; - IR::Block* ir; + union { + Block* branch_true; + FunctionId function_call; + }; + union { + Block* branch_false; + Block* return_block; + }; }; struct Label { @@ -87,7 +89,7 @@ struct Label { }; struct Function { - Function(Location start_address); + explicit Function(ObjectPool& block_pool, Location start_address); Location entrypoint; boost::container::small_vector labels; @@ -137,7 +139,6 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); - void AnalyzeCAL(Location pc, Instruction inst, bool is_absolute); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index b270bbccd..8bfa64326 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -8,67 +8,44 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/ir/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { -namespace { -IR::BlockList TranslateCode(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::Function& cfg_function) { - const size_t num_blocks{cfg_function.blocks.size()}; - std::vector blocks(cfg_function.blocks.size()); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - const u32 begin{cfg_block.begin.Offset()}; - const u32 end{cfg_block.end.Offset()}; - blocks[i] = block_pool.Create(inst_pool, begin, end); - cfg_block.ir = blocks[i]; - ++i; - }); - std::ranges::for_each(cfg_function.blocks, [&, i = size_t{0}](auto& cfg_block) mutable { - IR::Block* const block{blocks[i]}; - ++i; - if (cfg_block.end_class != Flow::EndClass::Branch) { - block->SetReturn(); - } else if (cfg_block.cond == IR::Condition{true}) { - block->SetBranch(cfg_block.branch_true->ir); - } else if (cfg_block.cond == IR::Condition{false}) { - block->SetBranch(cfg_block.branch_false->ir); - } else { - block->SetBranches(cfg_block.cond, cfg_block.branch_true->ir, - cfg_block.branch_false->ir); - } + +static void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const IR::BlockList& post_order{program.post_order_blocks}; + std::erase_if(program.blocks, [&](IR::Block* block) { + return std::ranges::find(post_order, block) == post_order.end(); }); - return IR::VisitAST(inst_pool, block_pool, blocks, - [&](IR::Block* block) { Translate(env, block); }); } -} // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - auto& functions{program.functions}; - functions.reserve(cfg.Functions().size()); - for (Flow::Function& cfg_function : cfg.Functions()) { - functions.push_back(IR::Function{ - .blocks{TranslateCode(inst_pool, block_pool, env, cfg_function)}, - .post_order_blocks{}, - }); - } + program.blocks = VisitAST(inst_pool, block_pool, env, cfg); + program.post_order_blocks = PostOrder(program.blocks); + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite Optimization::LowerFp16ToFp32(program); - for (IR::Function& function : functions) { - function.post_order_blocks = PostOrder(function.blocks); - Optimization::SsaRewritePass(function.post_order_blocks); - } + + Optimization::SsaRewritePass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - for (IR::Function& function : functions) { - Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); - Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); - Optimization::IdentityRemovalPass(function); - Optimization::VerificationPass(function); - } + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::IdentityRemovalPass(program); + Optimization::VerificationPass(program); Optimization::CollectShaderInfoPass(program); return program; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 000000000..5f5d9cf17 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -0,0 +1,770 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { +namespace { +struct Statement; + +// Use normal_link because we are not guaranteed to destroy the tree in order +using ListBaseHook = + boost::intrusive::list_base_hook>; + +using Tree = boost::intrusive::list, + // Avoid linear complexity on splice, size is never called + boost::intrusive::constant_time_size>; +using Node = Tree::iterator; +using ConstNode = Tree::const_iterator; + +enum class StatementType { + Code, + Goto, + Label, + If, + Loop, + Break, + Return, + Function, + Identity, + Not, + Or, + SetVariable, + Variable, +}; + +bool HasChildren(StatementType type) { + switch (type) { + case StatementType::If: + case StatementType::Loop: + case StatementType::Function: + return true; + default: + return false; + } +} + +struct Goto {}; +struct Label {}; +struct If {}; +struct Loop {}; +struct Break {}; +struct Return {}; +struct FunctionTag {}; +struct Identity {}; +struct Not {}; +struct Or {}; +struct SetVariable {}; +struct Variable {}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement +#endif +struct Statement : ListBaseHook { + Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(Goto, Statement* cond_, Node label_, Statement* up_) + : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} + Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} + Statement(If, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} + Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) + : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} + Statement(Break, Statement* cond_, Statement* up_) + : cond{cond_}, up{up_}, type{StatementType::Break} {} + Statement(Return) : type{StatementType::Return} {} + Statement(FunctionTag) : children{}, type{StatementType::Function} {} + Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_) + : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) + : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + + ~Statement() { + if (HasChildren(type)) { + std::destroy_at(&children); + } + } + + union { + IR::Block* code; + Node label; + Tree children; + IR::Condition guest_cond; + Statement* op; + Statement* op_a; + }; + union { + Statement* cond; + Statement* op_b; + u32 id; + }; + Statement* up{}; + StatementType type; +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +std::string DumpExpr(const Statement* stmt) { + switch (stmt->type) { + case StatementType::Identity: + return fmt::format("{}", stmt->guest_cond); + case StatementType::Not: + return fmt::format("!{}", DumpExpr(stmt->op)); + case StatementType::Or: + return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); + case StatementType::Variable: + return fmt::format("goto_L{}", stmt->id); + default: + return ""; + } +} + +std::string DumpTree(const Tree& tree, u32 indentation = 0) { + std::string ret; + std::string indent(indentation, ' '); + for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { + switch (stmt->type) { + case StatementType::Code: + ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + break; + case StatementType::Goto: + ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), + stmt->label->id); + break; + case StatementType::Label: + ret += fmt::format("{}L{}:\n", indent, stmt->id); + break; + case StatementType::If: + ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }}\n", indent); + break; + case StatementType::Loop: + ret += fmt::format("{} do {{\n", indent); + ret += DumpTree(stmt->children, indentation + 4); + ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Break: + ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); + break; + case StatementType::Return: + ret += fmt::format("{} return;\n", indent); + break; + case StatementType::SetVariable: + ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); + break; + case StatementType::Function: + case StatementType::Identity: + case StatementType::Not: + case StatementType::Or: + case StatementType::Variable: + throw LogicError("Statement can't be printed"); + } + } + return ret; +} + +bool HasNode(const Tree& tree, ConstNode stmt) { + const auto end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { + return true; + } + } + return false; +} + +Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { + const ConstNode label_stmt{goto_stmt->label}; + const ConstNode end{tree.end()}; + for (auto it = tree.begin(); it != end; ++it) { + if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { + return it; + } + } + throw LogicError("Lift label not in tree"); +} + +void SanitizeNoBreaks(const Tree& tree) { + if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { + throw NotImplementedException("Capturing statement with break nodes"); + } +} + +size_t Level(Node stmt) { + size_t level{0}; + Statement* node{stmt->up}; + while (node) { + ++level; + node = node->up; + } + return level; +} + +bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { + const size_t goto_level{Level(goto_stmt)}; + const size_t label_level{Level(label_stmt)}; + size_t min_level; + size_t max_level; + Node min; + Node max; + if (label_level < goto_level) { + min_level = label_level; + max_level = goto_level; + min = label_stmt; + max = goto_stmt; + } else { // goto_level < label_level + min_level = goto_level; + max_level = label_level; + min = goto_stmt; + max = label_stmt; + } + while (max_level > min_level) { + --max_level; + max = max->up; + } + return min->up == max->up; +} + +bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { + return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); +} + +bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { + ++offset; + + const auto end = tree.end(); + for (ConstNode it = tree.begin(); it != end; ++it) { + ++offset; + if (stmt == it) { + return true; + } + if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { + return true; + } + } + return false; +} + +class GotoPass { +public: + explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, + ObjectPool& block_pool_, ObjectPool& stmt_pool) + : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + std::vector gotos{BuildTree(cfg)}; + for (const Node& goto_stmt : gotos | std::views::reverse) { + RemoveGoto(goto_stmt); + } + } + + Statement& RootStatement() noexcept { + return root_stmt; + } + +private: + void RemoveGoto(Node goto_stmt) { + // Force goto_stmt and label_stmt to be directly related + const Node label_stmt{goto_stmt->label}; + if (IsIndirectlyRelated(goto_stmt, label_stmt)) { + // Move goto_stmt out using outward-movement transformation until it becomes + // directly related to label_stmt + while (!IsDirectlyRelated(goto_stmt, label_stmt)) { + goto_stmt = MoveOutward(goto_stmt); + } + } + // Force goto_stmt and label_stmt to be siblings + if (IsDirectlyRelated(goto_stmt, label_stmt)) { + const size_t label_level{Level(label_stmt)}; + size_t goto_level{Level(goto_stmt)}; + if (goto_level > label_level) { + // Move goto_stmt out of its level using outward-movement transformations + while (goto_level > label_level) { + goto_stmt = MoveOutward(goto_stmt); + --goto_level; + } + } else { // Level(goto_stmt) < Level(label_stmt) + if (Offset(goto_stmt) > Offset(label_stmt)) { + // Lift goto_stmt to above stmt containing label_stmt using goto-lifting + // transformations + goto_stmt = Lift(goto_stmt); + } + // Move goto_stmt into label_stmt's level using inward-movement transformation + while (goto_level < label_level) { + goto_stmt = MoveInward(goto_stmt); + ++goto_level; + } + } + } + // TODO: Remove this + { + Node it{goto_stmt}; + bool sibling{false}; + do { + sibling |= it == label_stmt; + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + sibling |= it == label_stmt; + ++it; + } + if (!sibling) { + throw LogicError("Not siblings"); + } + } + // goto_stmt and label_stmt are guaranteed to be siblings, eliminate + if (std::next(goto_stmt) == label_stmt) { + // Simply eliminate the goto if the label is next to it + goto_stmt->up->children.erase(goto_stmt); + } else if (Offset(goto_stmt) < Offset(label_stmt)) { + // Eliminate goto_stmt with a conditional + EliminateAsConditional(goto_stmt, label_stmt); + } else { + // Eliminate goto_stmt with a loop + EliminateAsLoop(goto_stmt, label_stmt); + } + } + + std::vector BuildTree(Flow::CFG& cfg) { + u32 label_id{0}; + std::vector gotos; + Flow::Function& first_function{cfg.Functions().front()}; + BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); + return gotos; + } + + void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, + std::vector& gotos, Node function_insert_point, + std::optional return_label) { + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; + Tree& root{root_stmt.children}; + std::unordered_map local_labels; + local_labels.reserve(function.blocks.size()); + + for (Flow::Block& block : function.blocks) { + Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; + const Node label_it{root.insert(function_insert_point, *label)}; + local_labels.emplace(&block, label_it); + ++label_id; + } + for (Flow::Block& block : function.blocks) { + const Node label{local_labels.at(&block)}; + // Insertion point + const Node ip{std::next(label)}; + + // Reset goto variables before the first block and after its respective label + const auto make_reset_variable{[&]() -> Statement& { + return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); + }}; + root.push_front(make_reset_variable()); + root.insert(ip, make_reset_variable()); + + const u32 begin_offset{block.begin.Offset()}; + const u32 end_offset{block.end.Offset()}; + IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; + root.insert(ip, *pool.Create(ir_block, &root_stmt)); + + switch (block.end_class) { + case Flow::EndClass::Branch: { + Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + if (block.cond == IR::Condition{true}) { + const Node true_label{local_labels.at(block.branch_true)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); + } else if (block.cond == IR::Condition{false}) { + const Node false_label{local_labels.at(block.branch_false)}; + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } else { + const Node true_label{local_labels.at(block.branch_true)}; + const Node false_label{local_labels.at(block.branch_false)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + gotos.push_back( + root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); + gotos.push_back(root.insert( + ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); + } + break; + } + case Flow::EndClass::Call: { + Flow::Function& call{cfg.Functions()[block.function_call]}; + const Node call_return_label{local_labels.at(block.return_block)}; + BuildTree(cfg, call, label_id, gotos, ip, call_return_label); + break; + } + case Flow::EndClass::Exit: + root.insert(ip, *pool.Create(Return{})); + break; + case Flow::EndClass::Return: { + Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + break; + } + } + } + } + + void UpdateTreeUp(Statement* tree) { + for (Statement& stmt : tree->children) { + stmt.up = tree; + } + } + + void EliminateAsConditional(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); + Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + body.erase(goto_stmt); + } + + void EliminateAsLoop(Node goto_stmt, Node label_stmt) { + Tree& body{goto_stmt->up->children}; + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); + Statement* const cond{goto_stmt->cond}; + Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; + UpdateTreeUp(loop); + body.insert(goto_stmt, *loop); + body.erase(goto_stmt); + } + + [[nodiscard]] Node MoveOutward(Node goto_stmt) { + switch (goto_stmt->up->type) { + case StatementType::If: + return MoveOutwardIf(goto_stmt); + case StatementType::Loop: + return MoveOutwardLoop(goto_stmt); + default: + throw LogicError("Invalid outward movement"); + } + } + + [[nodiscard]] Node MoveInward(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + body.insert(goto_stmt, *set_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const neg_var{pool.Create(Not{}, variable)}; + if (!if_body.empty()) { + Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + } + body.erase(goto_stmt); + + switch (label_nested_stmt->type) { + case StatementType::If: + // Update nested if condition + label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); + break; + case StatementType::Loop: + break; + default: + throw LogicError("Invalid inward movement"); + } + Tree& nested_tree{label_nested_stmt->children}; + Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; + return nested_tree.insert(nested_tree.begin(), *new_goto); + } + + [[nodiscard]] Node Lift(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const Node label{goto_stmt->label}; + const u32 label_id{label->id}; + const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const auto type{label_nested_stmt->type}; + + Tree loop_body; + loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); + SanitizeNoBreaks(loop_body); + Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; + UpdateTreeUp(loop_stmt); + const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + + Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; + loop_stmt->children.push_front(*new_goto); + const Node new_goto_node{loop_stmt->children.begin()}; + + Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; + loop_stmt->children.push_back(*set_var); + + body.erase(goto_stmt); + return new_goto_node; + } + + Node MoveOutwardIf(Node goto_stmt) { + const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; + body.insert(goto_stmt, *set_goto_var); + + Tree if_body; + if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); + if_body.pop_front(); + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; + UpdateTreeUp(if_stmt); + body.insert(goto_stmt, *if_stmt); + + body.erase(goto_stmt); + + Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; + Tree& parent_tree{parent->up->children}; + return parent_tree.insert(std::next(parent), *new_goto); + } + + Node MoveOutwardLoop(Node goto_stmt) { + Statement* const parent{goto_stmt->up}; + Tree& body{parent->children}; + const u32 label_id{goto_stmt->label->id}; + Statement* const goto_cond{goto_stmt->cond}; + Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; + Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; + body.insert(goto_stmt, *set_goto_var); + body.insert(goto_stmt, *break_stmt); + body.erase(goto_stmt); + + const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; + Tree& parent_tree{loop->up->children}; + return parent_tree.insert(std::next(loop), *new_goto); + } + + size_t Offset(ConstNode stmt) const { + size_t offset{0}; + if (!SearchNode(root_stmt.children, stmt, offset)) { + throw LogicError("Node not found in tree"); + } + return offset; + } + + ObjectPool& inst_pool; + ObjectPool& block_pool; + ObjectPool& pool; + Statement root_stmt{FunctionTag{}}; +}; + +IR::Block* TryFindForwardBlock(const Statement& stmt) { + const Tree& tree{stmt.up->children}; + const ConstNode end{tree.cend()}; + ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; + while (forward_node != end && !HasChildren(forward_node->type)) { + if (forward_node->type == StatementType::Code) { + return forward_node->code; + } + ++forward_node; + } + return nullptr; +} + +[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { + switch (stmt.type) { + case StatementType::Identity: + return ir.Condition(stmt.guest_cond); + case StatementType::Not: + return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); + case StatementType::Or: + return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); + case StatementType::Variable: + return ir.GetGotoVariable(stmt.id); + default: + throw NotImplementedException("Statement type {}", stmt.type); + } +} + +class TranslatePass { +public: + TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, + ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, + IR::BlockList& block_list_) + : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, + block_list{block_list_} { + Visit(root_stmt, nullptr, nullptr); + } + +private: + void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + Tree& tree{parent.children}; + IR::Block* current_block{nullptr}; + + for (auto it = tree.begin(); it != tree.end(); ++it) { + Statement& stmt{*it}; + switch (stmt.type) { + case StatementType::Label: + // Labels can be ignored + break; + case StatementType::Code: { + if (current_block && current_block != stmt.code) { + IR::IREmitter{*current_block}.Branch(stmt.code); + } + current_block = stmt.code; + Translate(env, stmt.code); + block_list.push_back(stmt.code); + break; + } + case StatementType::SetVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); + break; + } + case StatementType::If: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, merge_block, break_block); + + // Implement if header block + IR::Block* const first_if_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*current_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.SelectionMerge(merge_block); + ir.BranchConditional(cond, first_if_block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Loop: { + IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; + if (current_block) { + IR::IREmitter{*current_block}.Branch(loop_header_block); + } + block_list.push_back(loop_header_block); + + IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const merge_block{MergeBlock(parent, stmt)}; + + // Visit children + const size_t first_block_index{block_list.size()}; + Visit(stmt, new_continue_block, merge_block); + + // The continue block is located at the end of the loop + block_list.push_back(new_continue_block); + + // Implement loop header block + IR::Block* const first_loop_block{block_list.at(first_block_index)}; + IR::IREmitter ir{*loop_header_block}; + ir.LoopMerge(merge_block, new_continue_block); + ir.Branch(first_loop_block); + + // Implement continue block + IR::IREmitter continue_ir{*new_continue_block}; + const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; + continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + + current_block = merge_block; + break; + } + case StatementType::Break: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::Block* const skip_block{MergeBlock(parent, stmt)}; + + IR::IREmitter ir{*current_block}; + ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); + + current_block = skip_block; + break; + } + case StatementType::Return: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Return(); + current_block = nullptr; + break; + } + default: + throw NotImplementedException("Statement type {}", stmt.type); + } + } + if (current_block && continue_block) { + IR::IREmitter{*current_block}.Branch(continue_block); + } + } + + IR::Block* MergeBlock(Statement& parent, Statement& stmt) { + if (IR::Block* const block{TryFindForwardBlock(stmt)}) { + return block; + } + // Create a merge block we can visit later + IR::Block* const block{block_pool.Create(inst_pool)}; + Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); + return block; + } + + ObjectPool& stmt_pool; + ObjectPool& inst_pool; + ObjectPool& block_pool; + Environment& env; + IR::BlockList& block_list; +}; +} // Anonymous namespace + +IR::BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { + ObjectPool stmt_pool{64}; + GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + Statement& root{goto_pass.RootStatement()}; + IR::BlockList block_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; + return block_list; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 000000000..e4797291e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -0,0 +1,24 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::BlockList VisitAST(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c6253c40c..45d6f5e06 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -62,7 +62,7 @@ public: void BRA(u64 insn); void BRK(u64 insn); void BRX(u64 insn); - void CAL(u64 insn); + void CAL(); void CCTL(u64 insn); void CCTLL(u64 insn); void CONT(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 01ecbb4cc..92da5c7e8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,8 +65,8 @@ void TranslatorVisitor::BRX(u64) { ThrowNotImplemented(Opcode::BRX); } -void TranslatorVisitor::CAL(u64) { - ThrowNotImplemented(Opcode::CAL); +void TranslatorVisitor::CAL() { + // CAL is a no-op } void TranslatorVisitor::CCTL(u64) { -- cgit v1.2.3 From 17a82b56d74afcebaad78ce4754d8ee99ea66f93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Mar 2021 04:54:43 -0300 Subject: shader: Implement TEXS --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 + src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/modifiers.h | 3 +- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../maxwell/translate/impl/texture_fetch.cpp | 232 ++++++++++++++++++ .../translate/impl/texture_fetch_swizzled.cpp | 262 +++++++++++++++++++++ .../maxwell/translate/impl/texture_sample.cpp | 232 ------------------ 7 files changed, 507 insertions(+), 237 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 556961fa4..d94596ee9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -512,6 +512,14 @@ Value IREmitter::UnpackFloat2x16(const U32& value) { return Inst(Opcode::UnpackFloat2x16, value); } +U32 IREmitter::PackHalf2x16(const Value& vector) { + return Inst(Opcode::PackHalf2x16, vector); +} + +Value IREmitter::UnpackHalf2x16(const U32& value) { + return Inst(Opcode::UnpackHalf2x16, value); +} + F64 IREmitter::PackDouble2x32(const Value& vector) { return Inst(Opcode::PackDouble2x32, vector); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 74fb3dbcb..27ff5a29d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -115,6 +115,9 @@ public: [[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] Value UnpackFloat2x16(const U32& value); + [[nodiscard]] U32 PackHalf2x16(const Value& vector); + [[nodiscard]] Value UnpackHalf2x16(const U32& value); + [[nodiscard]] F64 PackDouble2x32(const Value& vector); [[nodiscard]] Value UnpackDouble2x32(const F64& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index ad07700ae..308c00153 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -36,7 +36,8 @@ union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; BitField<8, 1, u32> has_bias; - BitField<16, 1, u32> has_lod_clamp; + BitField<9, 1, u32> has_lod_clamp; + BitField<10, 1, u32> relaxed_precision; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 92da5c7e8..9aa7b836c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -553,10 +553,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TEXS(u64) { - ThrowNotImplemented(Opcode::TEXS); -} - void TranslatorVisitor::TLD(u64) { ThrowNotImplemented(Opcode::TLD); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..98d9f4c64 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -0,0 +1,232 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..ac1615b00 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -0,0 +1,262 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<53, 4, u64> encoding; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +template +IR::Value Composite(TranslatorVisitor& v, Args... regs) { + return v.ir.CompositeConstruct(v.F(regs)...); +} + +IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { + return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding texs{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::Reg reg_a{texs.src_reg_a}; + const IR::Reg reg_b{texs.src_reg_b}; + IR::TextureInstInfo info{}; + if (texs.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + switch (texs.encoding) { + case 0: // 1D.LZ + info.type.Assign(TextureType::Color1D); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + case 1: // 2D + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); + case 2: // 2D.LZ + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + case 3: // 2D.LL + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color2D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, + {}, info); + case 4: // 2D.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + {}, {}, {}, info); + case 5: // 2D.LL.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), + v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + case 6: // 2D.LZ.DC + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Shadow2D); + return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), + zero, {}, {}, info); + case 7: // ARRAY_2D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleImplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + {}, {}, {}, info); + case 8: // ARRAY_2D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorArray2D); + return v.ir.ImageSampleExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + zero, {}, {}, info); + case 9: // ARRAY_2D.LZ.DC + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ShadowArray2D); + return v.ir.ImageSampleDrefExplicitLod( + handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), + v.F(reg_b + 1), zero, {}, {}, info); + case 10: // 3D + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 11: // 3D.LZ + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::Color3D); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, + {}, info); + case 12: // CUBE + CheckAlignment(reg_a, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, + {}, info); + case 13: // CUBE.LL + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + info.type.Assign(TextureType::ColorCube); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), + v.F(reg_b + 1), {}, {}, info); + default: + throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); + } +} + +unsigned Swizzle(u64 insn) { + const Encoding texs{insn}; + const size_t encoding{texs.swizzle}; + if (texs.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + const bool is_shadow{sample.Type() == IR::Type::F32}; + if (is_shadow) { + const bool is_alpha{component == 3}; + return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; + } else { + return IR::F32{v.ir.CompositeExtract(sample, component)}; + } +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding texs{insn}; + switch (index) { + case 0: + return texs.dest_reg_a; + case 1: + CheckAlignment(texs.dest_reg_a, 2); + return texs.dest_reg_a + 1; + case 2: + return texs.dest_reg_b; + case 3: + CheckAlignment(texs.dest_reg_b, 2); + return texs.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding texs{insn}; + switch (store_index) { + case 1: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEXS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp deleted file mode 100644 index 98d9f4c64..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/ir/modifiers.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { -namespace { -enum class Blod : u64 { - None, - LZ, - LB, - LL, - INVALIDBLOD4, - INVALIDBLOD5, - LBA, - LLA, -}; - -enum class TextureType : u64 { - _1D, - ARRAY_1D, - _2D, - ARRAY_2D, - _3D, - ARRAY_3D, - CUBE, - ARRAY_CUBE, -}; - -Shader::TextureType GetType(TextureType type, bool dc) { - switch (type) { - case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; - case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; - case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; - case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; - case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; - case TextureType::ARRAY_3D: - throw NotImplementedException("3D array texture type"); - case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; - case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; - } - throw NotImplementedException("Invalid texture type {}", type); -} - -IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; - switch (type) { - case TextureType::_1D: - return v.F(reg); - case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); - case TextureType::_2D: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); - case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); - case TextureType::_3D: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); - case TextureType::ARRAY_3D: - throw NotImplementedException("3D array texture type"); - case TextureType::CUBE: - return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); - case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); - } - throw NotImplementedException("Invalid texture type {}", type); -} - -IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { - switch (blod) { - case Blod::None: - return v.ir.Imm32(0.0f); - case Blod::LZ: - return v.ir.Imm32(0.0f); - case Blod::LB: - case Blod::LL: - case Blod::LBA: - case Blod::LLA: - return v.F(reg++); - case Blod::INVALIDBLOD4: - case Blod::INVALIDBLOD5: - break; - } - throw NotImplementedException("Invalid blod {}", blod); -} - -IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { - const IR::U32 value{v.X(reg++)}; - switch (type) { - case TextureType::_1D: - case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); - case TextureType::_2D: - case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); - case TextureType::_3D: - case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); - case TextureType::CUBE: - case TextureType::ARRAY_CUBE: - throw NotImplementedException("Illegal offset on CUBE sample"); - } - throw NotImplementedException("Invalid texture type {}", type); -} - -bool HasExplicitLod(Blod blod) { - switch (blod) { - case Blod::LL: - case Blod::LLA: - case Blod::LZ: - return true; - default: - return false; - } -} - -void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, - std::optional cbuf_offset) { - union { - u64 raw; - BitField<35, 1, u64> ndv; - BitField<49, 1, u64> nodep; - BitField<50, 1, u64> dc; - BitField<51, 3, IR::Pred> sparse_pred; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> coord_reg; - BitField<20, 8, IR::Reg> meta_reg; - BitField<28, 3, TextureType> type; - BitField<31, 4, u64> mask; - } const tex{insn}; - - if (lc) { - throw NotImplementedException("LC"); - } - const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; - - IR::Reg meta_reg{tex.meta_reg}; - IR::Value handle; - IR::Value offset; - IR::F32 dref; - IR::F32 lod_clamp; - if (cbuf_offset) { - handle = v.ir.Imm32(*cbuf_offset); - } else { - handle = v.X(meta_reg++); - } - const IR::F32 lod{MakeLod(v, meta_reg, blod)}; - if (aoffi) { - offset = MakeOffset(v, meta_reg, tex.type); - } - if (tex.dc != 0) { - dref = v.F(meta_reg++); - } - IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); - info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); - info.has_lod_clamp.Assign(lc ? 1 : 0); - - const IR::Value sample{[&]() -> IR::Value { - if (tex.dc == 0) { - if (HasExplicitLod(blod)) { - return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); - } else { - return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); - } - } - if (HasExplicitLod(blod)) { - return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); - } else { - return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); - } - }()}; - - for (int element = 0; element < 4; ++element) { - if (((tex.mask >> element) & 1) == 0) { - continue; - } - IR::F32 value; - if (tex.dc != 0) { - value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); - } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; - } - v.F(tex.dest_reg + element, value); - } - if (tex.sparse_pred != IR::Pred::PT) { - v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); - } -} -} // Anonymous namespace - -void TranslatorVisitor::TEX(u64 insn) { - union { - u64 raw; - BitField<54, 1, u64> aoffi; - BitField<55, 3, Blod> blod; - BitField<58, 1, u64> lc; - BitField<36, 13, u64> cbuf_offset; - } const tex{insn}; - - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); -} - -void TranslatorVisitor::TEX_b(u64 insn) { - union { - u64 raw; - BitField<36, 1, u64> aoffi; - BitField<37, 3, Blod> blod; - BitField<40, 1, u64> lc; - } const tex{insn}; - - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); -} - -} // namespace Shader::Maxwell -- cgit v1.2.3 From fa2f6e38f4d465ba6e5efe6c6bd23d8ef39b080d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 16 Mar 2021 00:57:07 -0400 Subject: shader: Implement FSET and FSETP Also fix oversight with adding SignedZeroInfNanPreserve execution mode. --- .../maxwell/translate/impl/common_funcs.cpp | 48 +++++++++++++++ .../frontend/maxwell/translate/impl/common_funcs.h | 6 ++ .../translate/impl/floating_point_compare.cpp | 68 ---------------------- .../impl/floating_point_compare_and_set.cpp | 65 +++++++++++++++++++++ .../impl/floating_point_set_predicate.cpp | 60 +++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.h | 19 ++++++ .../maxwell/translate/impl/not_implemented.cpp | 24 -------- 7 files changed, 198 insertions(+), 92 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 9d4ac2e36..af9a8f82c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -58,4 +58,52 @@ IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp } } +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid FP compare op {}", compare_op); + } +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index c9ae5c500..f8add3c34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -15,4 +15,10 @@ namespace Shader::Maxwell { const IR::U1& predicate_2, BooleanOp bop); [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); + +[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); + +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, + const IR::F32& operand_2, FPCompareOp compare_op, + IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index f254ecb3a..e78e9c4e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -9,74 +9,6 @@ namespace Shader::Maxwell { namespace { -enum class FPCompareOp : u64 { - F, - LT, - EQ, - LE, - GT, - NE, - GE, - NUM, - Nan, - LTU, - EQU, - LEU, - GTU, - NEU, - GEU, - T, -}; - -bool IsCompareOpOrdered(FPCompareOp op) { - switch (op) { - case FPCompareOp::LTU: - case FPCompareOp::EQU: - case FPCompareOp::LEU: - case FPCompareOp::GTU: - case FPCompareOp::NEU: - case FPCompareOp::GEU: - return false; - default: - return true; - } -} - -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, - FPCompareOp compare_op, IR::FpControl control) { - const bool ordered{IsCompareOpOrdered(compare_op)}; - switch (compare_op) { - case FPCompareOp::F: - return ir.Imm1(false); - case FPCompareOp::LT: - case FPCompareOp::LTU: - return ir.FPLessThan(operand_1, operand_2, control, ordered); - case FPCompareOp::EQ: - case FPCompareOp::EQU: - return ir.FPEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::LE: - case FPCompareOp::LEU: - return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GT: - case FPCompareOp::GTU: - return ir.FPGreaterThan(operand_1, operand_2, control, ordered); - case FPCompareOp::NE: - case FPCompareOp::NEU: - return ir.FPNotEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GE: - case FPCompareOp::GEU: - return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::NUM: - return ir.FPOrdered(operand_1, operand_2); - case FPCompareOp::Nan: - return ir.FPUnordered(operand_1, operand_2); - case FPCompareOp::T: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { union { u64 insn; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..c5417775e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -0,0 +1,65 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<55, 1, u64> ftz; + } const fset{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(fset.pred)}; + if (fset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + + v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FSET_reg(u64 insn) { + FSET(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSET_cbuf(u64 insn) { + FSET(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSET_imm(u64 insn) { + FSET(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..8ff9db843 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fsetp{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const BooleanOp bop{fsetp.bop}; + const FPCompareOp compare_op{fsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; + const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(fsetp.dest_pred_a, result_a); + v.ir.SetPred(fsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::FSETP_reg(u64 insn) { + FSETP(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSETP_cbuf(u64 insn) { + FSETP(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSETP_imm(u64 insn) { + FSETP(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 45d6f5e06..761b64666 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -35,6 +35,25 @@ enum class PredicateOp : u64 { NonZero, }; +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9aa7b836c..b31928370 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,30 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSET_reg(u64) { - ThrowNotImplemented(Opcode::FSET_reg); -} - -void TranslatorVisitor::FSET_cbuf(u64) { - ThrowNotImplemented(Opcode::FSET_cbuf); -} - -void TranslatorVisitor::FSET_imm(u64) { - ThrowNotImplemented(Opcode::FSET_imm); -} - -void TranslatorVisitor::FSETP_reg(u64) { - ThrowNotImplemented(Opcode::FSETP_reg); -} - -void TranslatorVisitor::FSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::FSETP_cbuf); -} - -void TranslatorVisitor::FSETP_imm(u64) { - ThrowNotImplemented(Opcode::FSETP_imm); -} - void TranslatorVisitor::FSWZADD(u64) { ThrowNotImplemented(Opcode::FSWZADD); } -- cgit v1.2.3 From 8dd0acfaeba9396fb5c1e142a431a2a29f345855 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Mar 2021 01:30:23 -0300 Subject: shader: Fix instruction transitions in and out of Phi --- .../frontend/ir/microinstruction.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 88e186f21..5946105d2 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -182,7 +182,7 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { void Inst::Invalidate() { ClearArgs(); - op = Opcode::Void; + ReplaceOpcode(Opcode::Void); } void Inst::ClearArgs() { @@ -206,20 +206,22 @@ void Inst::ClearArgs() { void Inst::ReplaceUsesWith(Value replacement) { Invalidate(); - - op = Opcode::Identity; - + ReplaceOpcode(Opcode::Identity); if (!replacement.IsImmediate()) { Use(replacement); } - if (op == Opcode::Phi) { - phi_args[0].second = replacement; - } else { - args[0] = replacement; - } + args[0] = replacement; } void Inst::ReplaceOpcode(IR::Opcode opcode) { + if (opcode == IR::Opcode::Phi) { + throw LogicError("Cannot transition into Phi"); + } + if (op == Opcode::Phi) { + // Transition out of phi arguments into non-phi + std::destroy_at(&phi_args); + std::construct_at(&args); + } op = opcode; } -- cgit v1.2.3 From 3b7fd3ad0fcb0419c455c16127f43d01b6dc7fc9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 17 Mar 2021 00:53:53 -0400 Subject: shader: Implement CSET and CSETP --- src/shader_recompiler/frontend/ir/flow_test.h | 5 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 60 ++++++++++++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + .../maxwell/translate/impl/condition_code_set.cpp | 54 +++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 8 --- 5 files changed, 113 insertions(+), 15 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h index ac883da13..09e113773 100644 --- a/src/shader_recompiler/frontend/ir/flow_test.h +++ b/src/shader_recompiler/frontend/ir/flow_test.h @@ -5,12 +5,13 @@ #pragma once #include - #include +#include "common/common_types.h" + namespace Shader::IR { -enum class FlowTest { +enum class FlowTest : u64 { F, LT, EQ, diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d94596ee9..958282160 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -169,16 +169,62 @@ void IREmitter::SetOFlag(const U1& value) { static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { switch (flow_test) { - case FlowTest::T: - return ir.Imm1(true); case FlowTest::F: return ir.Imm1(false); + case FlowTest::LT: + return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())), + ir.GetOFlag()); case FlowTest::EQ: - // TODO: Test this - return ir.GetZFlag(); + return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()); + case FlowTest::LE: + return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); + case FlowTest::GT: + return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()), + ir.LogicalNot(ir.GetZFlag())); case FlowTest::NE: - // TODO: Test this return ir.LogicalNot(ir.GetZFlag()); + case FlowTest::GE: + return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag())); + case FlowTest::NUM: + return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::NaN: + return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); + case FlowTest::LTU: + return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()); + case FlowTest::EQU: + return ir.GetZFlag(); + case FlowTest::LEU: + return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag()); + case FlowTest::GTU: + return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), + ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); + case FlowTest::NEU: + return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::GEU: + return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()), + ir.GetOFlag()); + case FlowTest::T: + return ir.Imm1(true); + case FlowTest::OFF: + return ir.LogicalNot(ir.GetOFlag()); + case FlowTest::LO: + return ir.LogicalNot(ir.GetCFlag()); + case FlowTest::SFF: + return ir.LogicalNot(ir.GetSFlag()); + case FlowTest::LS: + return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag())); + case FlowTest::HI: + return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::SFT: + return ir.GetSFlag(); + case FlowTest::HS: + return ir.GetCFlag(); + case FlowTest::OFT: + return ir.GetOFlag(); + case FlowTest::RLE: + return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); + case FlowTest::RGT: + return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); default: throw NotImplementedException("Flow test {}", flow_test); } @@ -190,6 +236,10 @@ U1 IREmitter::Condition(IR::Condition cond) { return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } +U1 IREmitter::GetFlowTestResult(FlowTest test) { + return GetFlowTest(*this, test); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute) { return Inst(Opcode::GetAttribute, attribute); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 27ff5a29d..05263fe8b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -62,6 +62,7 @@ public: void SetOFlag(const U1& value); [[nodiscard]] U1 Condition(IR::Condition cond); + [[nodiscard]] U1 GetFlowTestResult(FlowTest test); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..ea0c40a54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +void TranslatorVisitor::CSET(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<44, 1, u64> bf; + BitField<45, 2, BooleanOp> bop; + } const cset{insn}; + + const IR::U32 one_mask{ir.Imm32(-1)}; + const IR::U32 fp_one{ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{ir.Imm32(0)}; + const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; + const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; + const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; + const IR::U32 result{ir.Select(pred_result, pass_result, fail_result)}; + X(cset.dest_reg, result); +} + +void TranslatorVisitor::CSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<8, 5, IR::FlowTest> cc_test; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<45, 2, BooleanOp> bop; + } const csetp{insn}; + + const BooleanOp bop{csetp.bop}; + const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; + const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; + const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; + ir.SetPred(csetp.dest_pred_a, result_a); + ir.SetPred(csetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b31928370..0325f14ea 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -85,14 +85,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::CSET(u64) { - ThrowNotImplemented(Opcode::CSET); -} - -void TranslatorVisitor::CSETP(u64) { - ThrowNotImplemented(Opcode::CSETP); -} - void TranslatorVisitor::DADD_reg(u64) { ThrowNotImplemented(Opcode::DADD_reg); } -- cgit v1.2.3 From 72990df7bad1c81d6ebc51179d34e1bfc71e0caf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 18 Mar 2021 02:53:57 -0400 Subject: shader: Implement DADD --- src/shader_recompiler/frontend/ir/value.cpp | 8 +++ src/shader_recompiler/frontend/ir/value.h | 1 + .../frontend/maxwell/translate/impl/double_add.cpp | 67 ++++++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 52 ++++++++++++++++- .../frontend/maxwell/translate/impl/impl.h | 3 + .../maxwell/translate/impl/not_implemented.cpp | 12 ---- 6 files changed, 129 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 791ba2690..e8e4662e7 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -153,6 +153,14 @@ u64 Value::U64() const { return imm_u64; } +f64 Value::F64() const { + if (IsIdentity()) { + return inst->Arg(0).F64(); + } + ValidateAccess(Type::F64); + return imm_f64; +} + bool Value::operator==(const Value& other) const { if (type != other.type) { return false; diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 3602883d6..b27601e70 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -52,6 +52,7 @@ public: [[nodiscard]] u32 U32() const; [[nodiscard]] f32 F32() const; [[nodiscard]] u64 U64() const; + [[nodiscard]] f64 F64() const; [[nodiscard]] bool operator==(const Value& other) const; [[nodiscard]] bool operator!=(const Value& other) const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..bece191d7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<45, 1, u64> neg_b; + BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_a; + BitField<49, 1, u64> abs_b; + } const dadd{insn}; + + if (!IR::IsAligned(dadd.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value()); + } + if (!IR::IsAligned(dadd.src_a_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value()); + } + if (dadd.cc != 0) { + throw NotImplementedException("DADD CC"); + } + + const IR::Reg reg_a{dadd.src_a_reg}; + const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))}; + const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; + + IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dadd.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)}; + const IR::Value result{v.ir.UnpackDouble2x32(value)}; + + for (int i = 0; i < 2; i++) { + v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::DADD_reg(u64 insn) { + DADD(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DADD_cbuf(u64 insn) { + DADD(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DADD_imm(u64 insn) { + DADD(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 7564aeeb2..e444dcd4f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -7,6 +7,15 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { +namespace { +[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, + u32 offset) { + if (unaligned) { + return ir.Imm32(0); + } + return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); +} +} // Anonymous namespace IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); @@ -56,6 +65,18 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } +IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { + union { + u64 raw; + BitField<20, 8, IR::Reg> src; + } const index{insn}; + const IR::Reg reg{index.src}; + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1))); +} + static std::pair CbufAddr(u64 insn) { union { u64 raw; @@ -75,15 +96,31 @@ static std::pair CbufAddr(u64 insn) { } IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { - const auto[binding, byte_offset]{CbufAddr(insn)}; + const auto [binding, byte_offset]{CbufAddr(insn)}; return ir.GetCbuf(binding, byte_offset); } IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { - const auto[binding, byte_offset]{CbufAddr(insn)}; + const auto [binding, byte_offset]{CbufAddr(insn)}; return ir.GetFloatCbuf(binding, byte_offset); } +IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + const auto [binding, offset_value]{CbufAddr(insn)}; + const bool unaligned{cbuf.unaligned != 0}; + const u32 offset{offset_value.U32()}; + const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + + const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; + const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; + return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -110,6 +147,17 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { return ir.Imm32(Common::BitCast(value | sign_bit)); } +IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; + const u64 value{imm.value << 44}; + return ir.Imm64(Common::BitCast(value | sign_bit)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 761b64666..e3e298c3b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -351,12 +351,15 @@ public: [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); + [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 0325f14ea..9675cef54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -85,18 +85,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::DADD_reg(u64) { - ThrowNotImplemented(Opcode::DADD_reg); -} - -void TranslatorVisitor::DADD_cbuf(u64) { - ThrowNotImplemented(Opcode::DADD_cbuf); -} - -void TranslatorVisitor::DADD_imm(u64) { - ThrowNotImplemented(Opcode::DADD_imm); -} - void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- src/shader_recompiler/frontend/ir/attribute.cpp | 2 +- src/shader_recompiler/frontend/ir/attribute.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 11 ++- src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/ir/reg.h | 4 +- .../frontend/maxwell/control_flow.cpp | 31 ++++++-- .../frontend/maxwell/control_flow.h | 3 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 18 +++++ .../frontend/maxwell/translate/impl/exit.cpp | 15 ---- .../maxwell/translate/impl/exit_program.cpp | 43 +++++++++++ .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/load_store_attribute.cpp | 86 +++++++++++++++++++++- .../maxwell/translate/impl/not_implemented.cpp | 16 +--- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- 19 files changed, 212 insertions(+), 51 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 2fb7d576f..4811242ea 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept { return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; } -int GenericAttributeIndex(Attribute attribute) { +u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index bb2cad6af..34ec7e0cd 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -224,7 +224,7 @@ enum class Attribute : u64 { [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; -[[nodiscard]] int GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 958282160..672836c0b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,12 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::DemoteToHelperInvocation(Block* continue_label) { + block->SetBranch(continue_label); + continue_label->AddImmediatePredecessor(block); + Inst(Opcode::DemoteToHelperInvocation, continue_label); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } @@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { + Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); +} + +void IREmitter::SetFragDepth(const F32& value) { + Inst(Opcode::SetFragDepth, value); +} + U32 IREmitter::WorkgroupIdX() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 05263fe8b..72af5db37 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -36,6 +36,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void DemoteToHelperInvocation(Block* continue_label); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -67,6 +68,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); + void SetFragDepth(const F32& value); + [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5946105d2..21b7d8a9f 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::DemoteToHelperInvocation: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetFragColor: + case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: case Opcode::WriteGlobalU16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9052a4903..593faca52 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(DemoteToHelperInvocation, Void, Label, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) @@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU64, U64, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetAttribute, F32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, F32, ) +OPCODE(GetAttributeIndexed, F32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index bce8b19b3..733513c8b 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" namespace Shader::IR { @@ -17,6 +18,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; Info info; + Stage stage{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 8fea05f7b..3845ec5fb 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } -[[nodiscard]] constexpr Reg operator++(Reg& reg) { +constexpr Reg operator++(Reg& reg) { reg = reg + 1; return reg; } -[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { +constexpr Reg operator++(Reg& reg, int) { const Reg copy{reg}; reg = reg + 1; return copy; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d..4f6707fae 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { case Opcode::EXIT: case Opcode::JMP: case Opcode::JMX: + case Opcode::KIL: case Opcode::BRK: case Opcode::CONT: case Opcode::LONGJMP: @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati block->end = pc; return AnalysisState::Branch; } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } case Opcode::PBK: case Opcode::PCNT: case Opcode::PEXIT: @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond, - bool visit_conditional_inst) { + EndClass insn_end_class, IR::Condition cond) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block block->branch_false = endif_block; - // And branch to it from the conditional instruction if it is a branch - if (insn_end_class == EndClass::Branch) { + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { conditional_block->cond = IR::Condition{true}; conditional_block->branch_true = endif_block; conditional_block->branch_false = nullptr; @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } - block->end = pc; + block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; } @@ -505,6 +514,12 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; } } if (function.entrypoint == 8) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210f..22f134194 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -29,6 +29,7 @@ enum class EndClass { Call, Exit, Return, + Kill, }; enum class Token { @@ -130,7 +131,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond, bool visit_conditional_inst); + IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa64326..0074eb89b 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolid, DumpExpr(stmt->op)); break; @@ -424,6 +430,9 @@ private: gotos.push_back(root.insert(ip, *goto_stmt)); break; } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{})); + break; } } } @@ -729,6 +738,15 @@ private: current_block = nullptr; break; } + case StatementType::Kill: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d1..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { - -void TranslatorVisitor::EXIT(u64) { - ir.Exit(); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..ea9b33da9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + throw NotImplementedException("Sample mask"); + } + if (sph.ps.omap.depth != 0) { + throw NotImplementedException("Fragment depth"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b..ed81d9c36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -108,7 +108,7 @@ public: void DSETP_reg(u64 insn); void DSETP_cbuf(u64 insn); void DSETP_imm(u64 insn); - void EXIT(u64 insn); + void EXIT(); void F2F_reg(u64 insn); void F2F_cbuf(u64 insn); void F2F_imm(u64 insn); @@ -220,7 +220,7 @@ public: void JCAL(u64 insn); void JMP(u64 insn); void JMX(u64 insn); - void KIL(u64 insn); + void KIL(); void LD(u64 insn); void LDC(u64 insn); void LDG(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4..2922145ee 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -11,6 +11,13 @@ namespace Shader::Maxwell { namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + enum class InterpolationMode : u64 { Pass, Multiply, @@ -23,8 +30,85 @@ enum class SampleMode : u64 { Centroid, Offset, }; + +int NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} } // Anonymous namespace +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> stream_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + if (ald.o != 0) { + throw NotImplementedException("O"); + } + if (ald.patch != 0) { + throw NotImplementedException("P"); + } + if (ald.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed"); + } + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ald.size)}; + for (int element = 0; element < num_elements; ++element) { + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + } +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> stream_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.patch != 0) { + throw NotImplementedException("P"); + } + if (ast.stream_reg != IR::Reg::RZ) { + throw NotImplementedException("Stream store"); + } + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ast.size)}; + for (int element = 0; element < num_elements; ++element) { + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + } +} + void TranslatorVisitor::IPA(u64 insn) { // IPA is the instruction used to read varyings from a fragment shader. // gl_FragCoord is mapped to the gl_Position attribute. @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { // } const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; if (is_indexed) { - throw NotImplementedException("IPA.IDX"); + throw NotImplementedException("IDX"); } const IR::Attribute attribute{ipa.attribute}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef54..59252bcc5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { ThrowNotImplemented(Opcode::AL2P); } -void TranslatorVisitor::ALD(u64) { - ThrowNotImplemented(Opcode::ALD); -} - -void TranslatorVisitor::AST(u64) { - ThrowNotImplemented(Opcode::AST); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::EXIT(u64) { - throw LogicError("Visting EXIT instruction"); -} - void TranslatorVisitor::F2F_reg(u64) { ThrowNotImplemented(Opcode::F2F_reg); } @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { ThrowNotImplemented(Opcode::JMX); } -void TranslatorVisitor::KIL(u64) { - ThrowNotImplemented(Opcode::KIL); +void TranslatorVisitor::KIL() { + // KIL is a no-op } void TranslatorVisitor::LD(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c64..0fbb87ec4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const tex{insn}; - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset * 4)); } void TranslatorVisitor::TEX_b(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b00..54f0df754 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::Value Sample(TranslatorVisitor& v, u64 insn) { const Encoding texs{insn}; - const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset * 4))}; const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::Reg reg_a{texs.src_reg_a}; const IR::Reg reg_b{texs.src_reg_b}; -- cgit v1.2.3 From eeb1efa2d2947faed55340e8aec3a0187a3729a1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 20:28:26 -0300 Subject: shader: Implement LOP32I --- .../maxwell/translate/impl/logic_operation.cpp | 59 +++++++++++++++++----- .../maxwell/translate/impl/not_implemented.cpp | 4 -- 2 files changed, 45 insertions(+), 18 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp index e786a388e..89e5cd6de 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -32,34 +32,51 @@ enum class LogicalOp : u64 { } } -void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { +void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, + LogicalOp bit_op, std::optional pred_op = std::nullopt, + IR::Pred dest_pred = IR::Pred::PT) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; - BitField<39, 1, u64> neg_a; - BitField<40, 1, u64> neg_b; - BitField<41, 2, LogicalOp> bit_op; - BitField<43, 1, u64> x; - BitField<44, 2, PredicateOp> pred_op; - BitField<48, 3, IR::Pred> pred; } const lop{insn}; - if (lop.x != 0) { - throw NotImplementedException("LOP X"); + if (x) { + throw NotImplementedException("X"); + } + if (cc) { + throw NotImplementedException("CC"); } IR::U32 op_a{v.X(lop.src_reg)}; - if (lop.neg_a != 0) { + if (inv_a != 0) { op_a = v.ir.BitwiseNot(op_a); } - if (lop.neg_b != 0) { + if (inv_b != 0) { op_b = v.ir.BitwiseNot(op_b); } - const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; - const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; + const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; + if (pred_op) { + const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; + v.ir.SetPred(dest_pred, pred_result); + } v.X(lop.dest_reg, result); - v.ir.SetPred(lop.pred, pred_result); +} + +void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { + union { + u64 insn; + BitField<39, 1, u64> inv_a; + BitField<40, 1, u64> inv_b; + BitField<41, 2, LogicalOp> bit_op; + BitField<43, 1, u64> x; + BitField<44, 2, PredicateOp> pred_op; + BitField<47, 1, u64> cc; + BitField<48, 3, IR::Pred> dest_pred; + } const lop{insn}; + + LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, + lop.pred_op, lop.dest_pred); } } // Anonymous namespace @@ -74,4 +91,18 @@ void TranslatorVisitor::LOP_cbuf(u64 insn) { void TranslatorVisitor::LOP_imm(u64 insn) { LOP(*this, insn, GetImm20(insn)); } + +void TranslatorVisitor::LOP32I(u64 insn) { + union { + u64 raw; + BitField<53, 2, LogicalOp> bit_op; + BitField<57, 1, u64> x; + BitField<52, 1, u64> cc; + BitField<55, 1, u64> inv_a; + BitField<56, 1, u64> inv_b; + } const lop32i{insn}; + + LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, + lop32i.inv_b != 0, lop32i.bit_op); +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 59252bcc5..a4367fc5a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -357,10 +357,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::LOP32I(u64) { - ThrowNotImplemented(Opcode::LOP32I); -} - void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -- cgit v1.2.3 From c97d03efb9e02f89cca6dfea4c8d5c37fc4a2adc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 21:14:58 -0300 Subject: shader: Implement ISCADD (imm) --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index f06046d4d..5469e445a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -57,8 +57,8 @@ void TranslatorVisitor::ISCADD_cbuf(u64) { throw NotImplementedException("ISCADD (cbuf)"); } -void TranslatorVisitor::ISCADD_imm(u64) { - throw NotImplementedException("ISCADD (imm)"); +void TranslatorVisitor::ISCADD_imm(u64 insn) { + ISCADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::ISCADD32I(u64) { -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 145 +++++++++++------ src/shader_recompiler/frontend/ir/ir_emitter.h | 14 +- src/shader_recompiler/frontend/ir/opcodes.inc | 13 ++ .../frontend/maxwell/translate/impl/impl.cpp | 21 +++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../impl/integer_floating_point_conversion.cpp | 173 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- 9 files changed, 315 insertions(+), 69 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 672836c0b..652f6949e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -53,6 +53,10 @@ U64 IREmitter::Imm64(u64 value) const { return U64{Value{value}}; } +U64 IREmitter::Imm64(s64 value) const { + return U64{Value{static_cast(value)}}; +} + F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } @@ -363,7 +367,7 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { } F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { - if (a.Type() != a.Type()) { + if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { @@ -974,8 +978,15 @@ U32U64 IREmitter::INeg(const U32U64& value) { } } -U32 IREmitter::IAbs(const U32& value) { - return Inst(Opcode::IAbs32, value); +U32U64 IREmitter::IAbs(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::IAbs32, value); + case Type::U64: + return Inst(Opcode::IAbs64, value); + default: + ThrowInvalidType(value.Type()); + } } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { @@ -1074,8 +1085,25 @@ U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } -U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { - return Inst(Opcode::IEqual, lhs, rhs); +U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(Opcode::IEqual, lhs, rhs); + case Type::U64: { + // Manually compare the unpacked values + const Value lhs_vector{UnpackUint2x32(lhs)}; + const Value rhs_vector{UnpackUint2x32(rhs)}; + return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, + IR::U32{CompositeExtract(rhs_vector, 0)}), + IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, + IR::U32{CompositeExtract(rhs_vector, 1)})); + } + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { @@ -1198,79 +1226,96 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16S8, value); + case 16: + return Inst(Opcode::ConvertF16S16, value); + case 32: return Inst(Opcode::ConvertF16S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32S8, value); + case 16: + return Inst(Opcode::ConvertF32S16, value); + case 32: return Inst(Opcode::ConvertF32S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64S32, value); - case Type::U64: - return Inst(Opcode::ConvertF64S64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64S8, value); + case 16: + return Inst(Opcode::ConvertF64S16, value); + case 32: + return Inst(Opcode::ConvertF64S32, value); + case 64: + return Inst(Opcode::ConvertF64S64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16U8, value); + case 16: + return Inst(Opcode::ConvertF16U16, value); + case 32: return Inst(Opcode::ConvertF16U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32U8, value); + case 16: + return Inst(Opcode::ConvertF32U16, value); + case 32: return Inst(Opcode::ConvertF32U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64U32, value); - case Type::U64: - return Inst(Opcode::ConvertF64U64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64U8, value); + case 16: + return Inst(Opcode::ConvertF64U16, value); + case 32: + return Inst(Opcode::ConvertF64U32, value); + case 64: + return Inst(Opcode::ConvertF64U64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { +F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value) { if (is_signed) { - return ConvertSToF(bitsize, value); + return ConvertSToF(dest_bitsize, src_bitsize, value); } else { - return ConvertUToF(bitsize, value); + return ConvertUToF(dest_bitsize, src_bitsize, value); } } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 72af5db37..8edb11154 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -29,6 +29,7 @@ public: [[nodiscard]] U32 Imm32(s32 value) const; [[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; void Branch(Block* label); @@ -170,7 +171,7 @@ public: [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); - [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32U64 IAbs(const U32U64& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); @@ -193,7 +194,7 @@ public: [[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); - [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); @@ -207,9 +208,12 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 593faca52..8471db7b9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -247,6 +247,7 @@ OPCODE(IMul32, U32, U32, OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) +OPCODE(IAbs64, U64, U64, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) @@ -311,16 +312,28 @@ OPCODE(ConvertF16F32, F16, F32, OPCODE(ConvertF32F16, F32, F16, ) OPCODE(ConvertF32F64, F32, F64, ) OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S8, F16, U32, ) +OPCODE(ConvertF16S16, F16, U32, ) OPCODE(ConvertF16S32, F16, U32, ) OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U8, F16, U32, ) +OPCODE(ConvertF16U16, F16, U32, ) OPCODE(ConvertF16U32, F16, U32, ) OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S8, F32, U32, ) +OPCODE(ConvertF32S16, F32, U32, ) OPCODE(ConvertF32S32, F32, U32, ) OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U8, F32, U32, ) +OPCODE(ConvertF32U16, F32, U32, ) OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S8, F64, U32, ) +OPCODE(ConvertF64S16, F64, U32, ) OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U8, F64, U32, ) +OPCODE(ConvertF64U16, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF64U64, F64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index e444dcd4f..c9af83010 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); } +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { return ir.Imm64(Common::BitCast(value | sign_bit)); } +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast(static_cast(value) << 32)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ed81d9c36..cb66cca25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -356,10 +356,12 @@ public: [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..e8b5ae1d2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,173 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4367fc5a..4078feafa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) { ThrowNotImplemented(Opcode::HSETP2_imm); } -void TranslatorVisitor::I2F_reg(u64) { - ThrowNotImplemented(Opcode::I2F_reg); -} - -void TranslatorVisitor::I2F_cbuf(u64) { - ThrowNotImplemented(Opcode::I2F_cbuf); -} - -void TranslatorVisitor::I2F_imm(u64) { - ThrowNotImplemented(Opcode::I2F_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 0fbb87ec4..b691b4d1f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 54f0df754..d5fda20f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) { } IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { - return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); } IR::Value Sample(TranslatorVisitor& v, u64 insn) { -- cgit v1.2.3 From e802512d8e49cc4a92c0c09fe023576c2a2ab3db Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 20 Mar 2021 21:22:21 +0100 Subject: shader: Refactor half floating instructions --- .../translate/impl/half_floating_point_add.cpp | 60 +--------------------- .../translate/impl/half_floating_point_helper.cpp | 49 ++++++++++++++++++ .../translate/impl/half_floating_point_helper.h | 31 +++++++++++ 3 files changed, 82 insertions(+), 58 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index c292d5e87..19e3401ca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -2,66 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" namespace Shader::Maxwell { namespace { -enum class Merge : u64 { - H1_H0, - F32, - MRG_H0, - MRG_H1, -}; - -enum class Swizzle : u64 { - H1_H0, - F32, - H0_H0, - H1_H1, -}; - -std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { - switch (swizzle) { - case Swizzle::H1_H0: { - const IR::Value vector{ir.UnpackFloat2x16(value)}; - return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; - } - case Swizzle::H0_H0: { - const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; - return {scalar, scalar}; - } - case Swizzle::H1_H1: { - const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; - return {scalar, scalar}; - } - case Swizzle::F32: { - const IR::F32 scalar{ir.BitCast(value)}; - return {scalar, scalar}; - } - } - throw InvalidArgument("Invalid swizzle {}", swizzle); -} - -IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, - Merge merge) { - switch (merge) { - case Merge::H1_H0: - return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); - case Merge::F32: - return ir.BitCast(ir.FPConvert(32, lhs)); - case Merge::MRG_H0: - case Merge::MRG_H1: { - const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; - const bool h0{merge == Merge::MRG_H0}; - const IR::F16& insert{h0 ? lhs : rhs}; - return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); - } - } - throw InvalidArgument("Invalid merge {}", merge); -} void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { @@ -122,7 +66,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // Anonymous namespace +} // namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..930822092 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -0,0 +1,49 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { + switch (swizzle) { + case Swizzle::H1_H0: { + const IR::Value vector{ir.UnpackFloat2x16(value)}; + return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; + } + case Swizzle::H0_H0: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; + return {scalar, scalar}; + } + case Swizzle::H1_H1: { + const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; + return {scalar, scalar}; + } + case Swizzle::F32: { + const IR::F32 scalar{ir.BitCast(value)}; + return {scalar, scalar}; + } + } + throw InvalidArgument("Invalid swizzle {}", swizzle); +} + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge) { + switch (merge) { + case Merge::H1_H0: + return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); + case Merge::F32: + return ir.BitCast(ir.FPConvert(32, lhs)); + case Merge::MRG_H0: + case Merge::MRG_H1: { + const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; + const bool h0{merge == Merge::MRG_H0}; + const IR::F16& insert{h0 ? lhs : rhs}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + } + } + throw InvalidArgument("Invalid merge {}", merge); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..0933b595e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -0,0 +1,31 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class Merge : u64 { + H1_H0, + F32, + MRG_H0, + MRG_H1, +}; + +enum class Swizzle : u64 { + H1_H0, + F32, + H0_H0, + H1_H1, +}; + +std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); + +IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, + Merge merge); + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 28dff6a6298b714019aa10a47f5a9e3f3f689067 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 01:03:21 +0100 Subject: shader: Implement HFMA2 --- .../half_floating_point_fused_multiply_add.cpp | 170 +++++++++++++++++++++ .../translate/impl/half_floating_point_helper.cpp | 13 ++ .../translate/impl/half_floating_point_helper.h | 8 + .../maxwell/translate/impl/not_implemented.cpp | 20 --- 4 files changed, 191 insertions(+), 20 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..2f3996274 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -0,0 +1,170 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { + +void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, + Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, + bool sat, HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hfma2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; + const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + if (lhs_c.Type() == IR::Type::F16) { + lhs_c = v.ir.FPConvert(32, lhs_c); + rhs_c = v.ir.FPConvert(32, rhs_c); + } + } + + lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); + + lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); + rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{HalfPrecision2FmzMode(precision)}, + }; + IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); +} + +void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, + Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, + HalfPrecision precision) { + union { + u64 raw; + BitField<47, 2, Swizzle> swizzle_a; + BitField<49, 2, Merge> merge; + } const hfma2{insn}; + + HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, + sat, precision); +} + +} // namespace + +void TranslatorVisitor::HFMA2_reg(u64 insn) { + union { + u64 raw; + BitField<28, 2, Swizzle> swizzle_b; + BitField<32, 1, u64> saturate; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> neg_c; + BitField<35, 2, Swizzle> swizzle_c; + BitField<37, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, + GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_rc(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_b; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, + GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_cr(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + BitField<56, 1, u64> neg_b; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, + GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_imm(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> neg_c; + BitField<52, 1, u64> saturate; + BitField<53, 2, Swizzle> swizzle_c; + + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<57, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + + HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), + GetReg39(insn), hfma2.saturate != 0, hfma2.precision); +} + +void TranslatorVisitor::HFMA2_32I(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_c; + BitField<20, 32, u64> imm32; + BitField<52, 1, u64> neg_c; + BitField<53, 2, Swizzle> swizzle_a; + BitField<55, 2, HalfPrecision> precision; + } const hfma2{insn}; + + const u32 imm{static_cast(hfma2.imm32)}; + HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, + Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp index 930822092..d0c6ba1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -6,6 +6,19 @@ namespace Shader::Maxwell { +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { + switch (precision) { + case HalfPrecision::None: + return IR::FmzMode::None; + case HalfPrecision::FTZ: + return IR::FmzMode::FTZ; + case HalfPrecision::FMZ: + return IR::FmzMode::FMZ; + default: + return IR::FmzMode::DontCare; + } +} + std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { switch (swizzle) { case Swizzle::H1_H0: { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index 0933b595e..f26ef0949 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -23,6 +23,14 @@ enum class Swizzle : u64 { H1_H1, }; +enum class HalfPrecision : u64 { + None = 0, + FTZ = 1, + FMZ = 2, +}; + +IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); + std::pair Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4078feafa..ddfca8d7a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,26 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HFMA2_reg(u64) { - ThrowNotImplemented(Opcode::HFMA2_reg); -} - -void TranslatorVisitor::HFMA2_rc(u64) { - ThrowNotImplemented(Opcode::HFMA2_rc); -} - -void TranslatorVisitor::HFMA2_cr(u64) { - ThrowNotImplemented(Opcode::HFMA2_cr); -} - -void TranslatorVisitor::HFMA2_imm(u64) { - ThrowNotImplemented(Opcode::HFMA2_imm); -} - -void TranslatorVisitor::HFMA2_32I(u64) { - ThrowNotImplemented(Opcode::HFMA2_32I); -} - void TranslatorVisitor::HMUL2_reg(u64) { ThrowNotImplemented(Opcode::HMUL2_reg); } -- cgit v1.2.3 From ed6cd3c94ac10b434a1240fc3cbed2050766be65 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 02:08:04 +0100 Subject: shader: Implement HMUL2 --- .../impl/half_floating_point_multiply.cpp | 143 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 --- 2 files changed, 143 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..ff34a8c8f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -0,0 +1,143 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { + +void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, + Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, + HalfPrecision precision) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a; + } const hmul2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + const bool promotion{lhs_a.Type() != lhs_b.Type()}; + if (promotion) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + } + lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); + rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{HalfPrecision2FmzMode(precision)}, + }; + IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; + IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; + if (precision == HalfPrecision::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; + const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; + const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; + lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; + + const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; + const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; + const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; + rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; + } + if (sat) { + lhs = v.ir.FPSaturate(lhs); + rhs = v.ir.FPSaturate(rhs); + } + if (promotion) { + lhs = v.ir.FPConvert(16, lhs); + rhs = v.ir.FPConvert(16, rhs); + } + v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); +} + +void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, + Swizzle swizzle_b, const IR::U32& src_b) { + union { + u64 raw; + BitField<49, 2, Merge> merge; + BitField<47, 2, Swizzle> swizzle_a; + BitField<39, 2, HalfPrecision> precision; + } const hmul2{insn}; + + HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, + hmul2.precision); +} +} // namespace + +void TranslatorVisitor::HMUL2_reg(u64 insn) { + union { + u64 raw; + BitField<32, 1, u64> sat; + BitField<31, 1, u64> neg_b; + BitField<30, 1, u64> abs_b; + BitField<44, 1, u64> abs_a; + BitField<28, 2, Swizzle> swizzle_b; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, + hmul2.swizzle_b, GetReg20(insn)); +} + +void TranslatorVisitor::HMUL2_cbuf(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<54, 1, u64> abs_b; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, + Swizzle::F32, GetCbuf(insn)); +} + +void TranslatorVisitor::HMUL2_imm(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> sat; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_a; + } const hmul2{insn}; + + const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, + Swizzle::H1_H0, ir.Imm32(imm)); +} + +void TranslatorVisitor::HMUL2_32I(u64 insn) { + union { + u64 raw; + BitField<55, 2, HalfPrecision> precision; + BitField<52, 1, u64> sat; + BitField<53, 2, Swizzle> swizzle_a; + BitField<20, 32, u64> imm32; + } const hmul2{insn}; + + const u32 imm{static_cast(hmul2.imm32)}; + HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, + Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ddfca8d7a..6c159301f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,22 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HMUL2_reg(u64) { - ThrowNotImplemented(Opcode::HMUL2_reg); -} - -void TranslatorVisitor::HMUL2_cbuf(u64) { - ThrowNotImplemented(Opcode::HMUL2_cbuf); -} - -void TranslatorVisitor::HMUL2_imm(u64) { - ThrowNotImplemented(Opcode::HMUL2_imm); -} - -void TranslatorVisitor::HMUL2_32I(u64) { - ThrowNotImplemented(Opcode::HMUL2_32I); -} - void TranslatorVisitor::HSET2_reg(u64) { ThrowNotImplemented(Opcode::HSET2_reg); } -- cgit v1.2.3 From 9e213fd861d264cf79d7a6ed0268a57c87306b9b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 03:48:40 +0100 Subject: shader: Implement HSET2 --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/half_floating_point_helper.h | 1 + .../translate/impl/half_floating_point_set.cpp | 115 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 4 files changed, 118 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index f2a2ff331..1b87d04fc 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -105,8 +105,8 @@ INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") -INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 1100 1--- ----") -INST(HSET2_imm, "HSET2 (imm)", "0111 1100 0--- ----") +INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") +INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index f26ef0949..24063b2ab 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -5,6 +5,7 @@ #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..4825ca06a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -0,0 +1,115 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, + bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<47, 2, Swizzle> swizzle_a; + } const hset2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + // TODO: Implement FP16 FloatingPointCompare + //if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + //} + + lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(hset2.pred)}; + if (hset2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; + + const u32 true_value = bf ? 0x3c00 : 0xffff; + const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; + const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; + const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; + + v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); +} +} // Anonymous namespace + +void TranslatorVisitor::HSET2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> bf; + BitField<31, 1, u64> neg_b; + BitField<50, 1, u64> ftz; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, + hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); +} + +void TranslatorVisitor::HSET2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<56, 1, u64> neg_b; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + } const hset2{insn}; + + HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, + hset2.compare_op, Swizzle::F32); +} + +void TranslatorVisitor::HSET2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, + hset2.compare_op, Swizzle::H1_H0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6c159301f..d1aeceef1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,18 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HSET2_reg(u64) { - ThrowNotImplemented(Opcode::HSET2_reg); -} - -void TranslatorVisitor::HSET2_cbuf(u64) { - ThrowNotImplemented(Opcode::HSET2_cbuf); -} - -void TranslatorVisitor::HSET2_imm(u64) { - ThrowNotImplemented(Opcode::HSET2_imm); -} - void TranslatorVisitor::HSETP2_reg(u64) { ThrowNotImplemented(Opcode::HSETP2_reg); } -- cgit v1.2.3 From e10d9c1b8e21912d34c02a22b5812b94fc27502b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:24:30 +0100 Subject: shader: Implement HSETP2 --- .../impl/half_floating_point_set_predicate.cpp | 116 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 --- 2 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..6b1ac21d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, + Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { + union { + u64 insn; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<45, 2, BooleanOp> bop; + BitField<44, 1, u64> abs_a; + BitField<6, 1, u64> ftz; + BitField<47, 2, Swizzle> swizzle_a; + } const hsetp2{insn}; + + auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; + auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; + // TODO: Implement FP16 FloatingPointCompare + // if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } + //} + + lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); + + lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); + rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; + if (hsetp2.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; + const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; + const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; + const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; + + if (h_and) { + auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); + v.ir.SetPred(hsetp2.dest_pred_a, result); + v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); + } else { + v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); + v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); + } +} +} // Anonymous namespace + +void TranslatorVisitor::HSETP2_reg(u64 insn) { + union { + u64 insn; + BitField<30, 1, u64> abs_b; + BitField<49, 1, u64> h_and; + BitField<31, 1, u64> neg_b; + BitField<35, 4, FPCompareOp> compare_op; + BitField<28, 2, Swizzle> swizzle_b; + } const hsetp2{insn}; + HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_cbuf(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> abs_b; + BitField<56, 1, u64> neg_b; + BitField<49, 4, FPCompareOp> compare_op; + } const hsetp2{insn}; + + HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, + hsetp2.compare_op, hsetp2.h_and != 0); +} + +void TranslatorVisitor::HSETP2_imm(u64 insn) { + union { + u64 insn; + BitField<53, 1, u64> h_and; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hsetp2{insn}; + + const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + + HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, + hsetp2.h_and != 0); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index d1aeceef1..bd3c1f9d6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,18 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { ThrowNotImplemented(Opcode::GETLMEMBASE); } -void TranslatorVisitor::HSETP2_reg(u64) { - ThrowNotImplemented(Opcode::HSETP2_reg); -} - -void TranslatorVisitor::HSETP2_cbuf(u64) { - ThrowNotImplemented(Opcode::HSETP2_cbuf); -} - -void TranslatorVisitor::HSETP2_imm(u64) { - ThrowNotImplemented(Opcode::HSETP2_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } -- cgit v1.2.3 From 27fb97377eeb40849260ea866a90519521c6f59b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:33:19 +0100 Subject: shader: Fix floating point comparison for FP16 --- .../maxwell/translate/impl/common_funcs.cpp | 2 +- .../frontend/maxwell/translate/impl/common_funcs.h | 4 +-- .../translate/impl/half_floating_point_set.cpp | 38 +++++++++++----------- .../impl/half_floating_point_set_predicate.cpp | 20 ++++++------ 4 files changed, 32 insertions(+), 32 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index af9a8f82c..d30e82b10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,7 +72,7 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f8add3c34..f584060b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -18,7 +18,7 @@ namespace Shader::Maxwell { [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); -[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, - const IR::F32& operand_2, FPCompareOp compare_op, +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 4825ca06a..1d28c0531 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -22,8 +22,8 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - //if (lhs_a.Type() != lhs_b.Type()) { + + if (lhs_a.Type() != lhs_b.Type()) { if (lhs_a.Type() == IR::Type::F16) { lhs_a = v.ir.FPConvert(32, lhs_a); rhs_a = v.ir.FPConvert(32, rhs_a); @@ -32,7 +32,7 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f lhs_b = v.ir.FPConvert(32, lhs_b); rhs_b = v.ir.FPConvert(32, rhs_b); } - //} + } lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); @@ -94,22 +94,22 @@ void TranslatorVisitor::HSET2_cbuf(u64 insn) { } void TranslatorVisitor::HSET2_imm(u64 insn) { - union { - u64 insn; - BitField<53, 1, u64> bf; - BitField<54, 1, u64> ftz; - BitField<49, 4, FPCompareOp> compare_op; - BitField<56, 1, u64> neg_high; - BitField<30, 9, u64> high; - BitField<29, 1, u64> neg_low; - BitField<20, 9, u64> low; - } const hset2{insn}; - - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; - - HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, - hset2.compare_op, Swizzle::H1_H0); + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; + + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, + Swizzle::H1_H0); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 6b1ac21d5..3e2a23c92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -24,17 +24,17 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - // if (lhs_a.Type() != lhs_b.Type()) { - if (lhs_a.Type() == IR::Type::F16) { - lhs_a = v.ir.FPConvert(32, lhs_a); - rhs_a = v.ir.FPConvert(32, rhs_a); - } - if (lhs_b.Type() == IR::Type::F16) { - lhs_b = v.ir.FPConvert(32, lhs_b); - rhs_b = v.ir.FPConvert(32, rhs_b); + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } } - //} lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); -- cgit v1.2.3 From a77e764726938a26803fa90a9c69ccdd32ab09cd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 00:42:56 -0300 Subject: shader: Add support for fp16 comparisons and misc fixes --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 23 ++++++++++++++++++---- src/shader_recompiler/frontend/ir/ir_emitter.h | 6 +++--- src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../translate/impl/half_floating_point_add.cpp | 3 +-- .../half_floating_point_fused_multiply_add.cpp | 4 +--- .../impl/half_floating_point_multiply.cpp | 3 +-- .../translate/impl/half_floating_point_set.cpp | 1 + 7 files changed, 28 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 652f6949e..1eda95071 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC } } -U1 IREmitter::FPIsNan(const F32& value) { - return Inst(Opcode::FPIsNan32, value); +U1 IREmitter::FPIsNan(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::FPIsNan16, value); + case Type::F32: + return Inst(Opcode::FPIsNan32, value); + case Type::F64: + return Inst(Opcode::FPIsNan64, value); + default: + ThrowInvalidType(value.Type()); + } } -U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { +U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); } -U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { +U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8edb11154..ab4537d88 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -161,9 +161,9 @@ public: FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, bool ordered = true); - [[nodiscard]] U1 FPIsNan(const F32& value); - [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); - [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); + [[nodiscard]] U1 FPIsNan(const F16F32F64& value); + [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); + [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8471db7b9..884eea7a8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan16, U1, F16, ) OPCODE(FPIsNan32, U1, F32, ) +OPCODE(FPIsNan64, U1, F64, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { union { @@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, HalfPrecision precision) { @@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, sat, precision); } - -} // namespace +} // Anonymous namespace void TranslatorVisitor::HFMA2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, HalfPrecision precision) { @@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, hmul2.precision); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HMUL2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) { BitField<35, 4, FPCompareOp> compare_op; BitField<28, 2, Swizzle> swizzle_b; } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); } -- cgit v1.2.3 From 112b8f00f0da0e031bb62a7a7a44469d3a5518a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 01:32:02 -0400 Subject: shader: Add FP64 register load/store helpers --- .../frontend/maxwell/translate/impl/double_add.cpp | 16 ++----------- .../frontend/maxwell/translate/impl/impl.cpp | 27 ++++++++++++++++------ .../frontend/maxwell/translate/impl/impl.h | 2 ++ 3 files changed, 24 insertions(+), 21 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index bece191d7..3db09d0c2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -22,19 +22,11 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<48, 1, u64> neg_a; BitField<49, 1, u64> abs_b; } const dadd{insn}; - - if (!IR::IsAligned(dadd.dest_reg, 2)) { - throw NotImplementedException("Unaligned destination register {}", dadd.dest_reg.Value()); - } - if (!IR::IsAligned(dadd.src_a_reg, 2)) { - throw NotImplementedException("Unaligned destination register {}", dadd.src_a_reg.Value()); - } if (dadd.cc != 0) { throw NotImplementedException("DADD CC"); } - const IR::Reg reg_a{dadd.src_a_reg}; - const IR::F64 src_a{v.ir.PackDouble2x32(v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)))}; + const IR::F64 src_a{v.D(dadd.src_a_reg)}; const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; @@ -43,12 +35,8 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { .rounding{CastFpRounding(dadd.fp_rounding)}, .fmz_mode{IR::FmzMode::None}, }; - const IR::F64 value{v.ir.FPAdd(op_a, op_b, control)}; - const IR::Value result{v.ir.UnpackDouble2x32(value)}; - for (int i = 0; i < 2; i++) { - v.X(dadd.dest_reg + i, IR::U32{v.ir.CompositeExtract(result, i)}); - } + v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index c9af83010..2d2f6f9c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -25,6 +25,13 @@ IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } +IR::F64 TranslatorVisitor::D(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } @@ -33,6 +40,16 @@ void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } +void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + IR::U32 TranslatorVisitor::GetReg8(u64 insn) { union { u64 raw; @@ -68,13 +85,9 @@ IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { union { u64 raw; - BitField<20, 8, IR::Reg> src; - } const index{insn}; - const IR::Reg reg{index.src}; - if (!IR::IsAligned(reg, 2)) { - throw NotImplementedException("Unaligned source register {}", reg); - } - return ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1))); + BitField<20, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); } static std::pair CbufAddr(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index cb66cca25..1a1073fa7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -342,9 +342,11 @@ public: [[nodiscard]] IR::U32 X(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); + [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + void D(IR::Reg dest_reg, const IR::F64& value); [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); -- cgit v1.2.3 From c858b8ba97d3ff79dcff0795c1184ee356f2cd1a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 02:09:14 -0400 Subject: shader: Implement DMUL and DFMA Also add a missing const on DADD --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 2 +- .../frontend/maxwell/translate/impl/double_add.cpp | 2 +- .../translate/impl/double_fused_multiply_add.cpp | 53 ++++++++++++++++++++++ .../maxwell/translate/impl/double_multiply.cpp | 45 ++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 8 ++++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/not_implemented.cpp | 28 ------------ 7 files changed, 109 insertions(+), 30 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1b87d04fc..1dfaeb92f 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -35,7 +35,7 @@ INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") -INST(DFMA_cr, "DFMA (cr)", "0010 1011 0111 ----") +INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index 3db09d0c2..ac1433dea 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -30,7 +30,7 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; - IR::FpControl control{ + const IR::FpControl control{ .no_contraction{true}, .rounding{CastFpRounding(dadd.fp_rounding)}, .fmz_mode{IR::FmzMode::None}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..ff7321862 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<50, 2, FpRounding> fp_rounding; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_c; + } const dfma{insn}; + + const IR::F64 src_a{v.D(dfma.src_a_reg)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; + const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; + + const IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dfma.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + + v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DFMA_reg(u64 insn) { + DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_cr(u64 insn) { + DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); +} + +void TranslatorVisitor::DFMA_rc(u64 insn) { + DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DFMA_imm(u64 insn) { + DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..3e83d1c95 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 2, FpRounding> fp_rounding; + BitField<48, 1, u64> neg; + } const dmul{insn}; + + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; + const IR::FpControl control{ + .no_contraction{true}, + .rounding{CastFpRounding(dmul.fp_rounding)}, + .fmz_mode{IR::FmzMode::None}, + }; + + v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); +} +} // Anonymous namespace + +void TranslatorVisitor::DMUL_reg(u64 insn) { + DMUL(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMUL_cbuf(u64 insn) { + DMUL(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMUL_imm(u64 insn) { + DMUL(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 2d2f6f9c6..758a0230a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -90,6 +90,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { return D(reg.index); } +IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { + union { + u64 raw; + BitField<39, 8, IR::Reg> index; + } const reg{insn}; + return D(reg.index); +} + static std::pair CbufAddr(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 1a1073fa7..c994fe803 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -354,6 +354,7 @@ public: [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); + [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index bd3c1f9d6..4e069912a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,22 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DFMA_reg(u64) { - ThrowNotImplemented(Opcode::DFMA_reg); -} - -void TranslatorVisitor::DFMA_rc(u64) { - ThrowNotImplemented(Opcode::DFMA_rc); -} - -void TranslatorVisitor::DFMA_cr(u64) { - ThrowNotImplemented(Opcode::DFMA_cr); -} - -void TranslatorVisitor::DFMA_imm(u64) { - ThrowNotImplemented(Opcode::DFMA_imm); -} - void TranslatorVisitor::DMNMX_reg(u64) { ThrowNotImplemented(Opcode::DMNMX_reg); } @@ -109,18 +93,6 @@ void TranslatorVisitor::DMNMX_imm(u64) { ThrowNotImplemented(Opcode::DMNMX_imm); } -void TranslatorVisitor::DMUL_reg(u64) { - ThrowNotImplemented(Opcode::DMUL_reg); -} - -void TranslatorVisitor::DMUL_cbuf(u64) { - ThrowNotImplemented(Opcode::DMUL_cbuf); -} - -void TranslatorVisitor::DMUL_imm(u64) { - ThrowNotImplemented(Opcode::DMUL_imm); -} - void TranslatorVisitor::DSET_reg(u64) { ThrowNotImplemented(Opcode::DSET_reg); } -- cgit v1.2.3 From a62f04efab4331eeabd4441962f86a5e87db3f2d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 09:32:16 +0100 Subject: shader: Implement F2F --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 +- .../floating_point_conversion_floating_point.cpp | 180 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- 4 files changed, 188 insertions(+), 19 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 1eda95071..00c909f3e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1361,7 +1361,7 @@ U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } -F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { +F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) { switch (result_bitsize) { case 16: switch (value.Type()) { @@ -1369,7 +1369,7 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { // Nothing to do return value; case Type::F32: - return Inst(Opcode::ConvertF16F32, value); + return Inst(Opcode::ConvertF16F32, Flags{control}, value); case Type::F64: throw LogicError("Illegal conversion from F64 to F16"); default: @@ -1379,12 +1379,12 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { case 32: switch (value.Type()) { case Type::F16: - return Inst(Opcode::ConvertF32F16, value); + return Inst(Opcode::ConvertF32F16, Flags{control}, value); case Type::F32: // Nothing to do return value; case Type::F64: - return Inst(Opcode::ConvertF32F64, value); + return Inst(Opcode::ConvertF32F64, Flags{control}, value); default: break; } @@ -1394,10 +1394,10 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { case Type::F16: throw LogicError("Illegal conversion from F16 to F64"); case Type::F32: + return Inst(Opcode::ConvertF64F32, Flags{control}, value); + case Type::F64: // Nothing to do return value; - case Type::F64: - return Inst(Opcode::ConvertF32F64, value); default: break; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ab4537d88..346cef3ab 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -216,7 +216,8 @@ public: const Value& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, + FpControl control = {}); [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, const Value& offset, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..1e366fde0 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -0,0 +1,180 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class RoundingOp : u64 { + None = 0, + Pass = 3, + Round = 8, + Floor = 9, + Ceil = 10, + Trunc = 11, +}; + +[[nodiscard]] u32 WidthSize(FloatFormat width) { + switch (width) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + default: + throw NotImplementedException("Invalid width {}", width); + } +} + +void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> neg; + BitField<50, 1, u64> sat; + BitField<39, 4, u64> rounding_op; + BitField<39, 2, FpRounding> rounding; + BitField<10, 2, FloatFormat> src_size; + BitField<8, 2, FloatFormat> dst_size; + + [[nodiscard]] RoundingOp RoundingOperation() const { + constexpr u64 rounding_mask = 0x0B; + return static_cast(rounding_op.Value() & rounding_mask); + } + } const f2f{insn}; + + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; + + const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; + IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + if (f2f.src_size != f2f.dst_size) { + fp_control.rounding = CastFpRounding(f2f.rounding); + input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); + } else { + switch (f2f.RoundingOperation()) { + case RoundingOp::None: + case RoundingOp::Pass: + // Make sure NANs are handled properly + switch (f2f.src_size) { + case FloatFormat::F16: + input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); + break; + case FloatFormat::F32: + input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); + break; + case FloatFormat::F64: + input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); + break; + } + break; + case RoundingOp::Round: + input = v.ir.FPRoundEven(input, fp_control); + break; + case RoundingOp::Floor: + input = v.ir.FPFloor(input, fp_control); + break; + case RoundingOp::Ceil: + input = v.ir.FPCeil(input, fp_control); + break; + case RoundingOp::Trunc: + input = v.ir.FPTrunc(input, fp_control); + break; + default: + throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); + } + } + if (f2f.sat != 0 && !any_fp64) { + input = v.ir.FPSaturate(input); + } + + switch (f2f.dst_size) { + case FloatFormat::F16: { + const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); + break; + } + case FloatFormat::F32: + v.F(f2f.dest_reg, input); + break; + case FloatFormat::F64: + v.D(f2f.dest_reg, input); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::F2F_reg(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatReg20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleReg20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_cbuf(u64 insn) { + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; + src_a = f2f.selector != 0 ? rhs_a : lhs_a; + break; + } + case FloatFormat::F32: + src_a = GetFloatCbuf(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleCbuf(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} + +void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { + throw NotImplementedException("Instruction"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 4e069912a..08f6eb788 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -117,18 +117,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::F2F_reg(u64) { - ThrowNotImplemented(Opcode::F2F_reg); -} - -void TranslatorVisitor::F2F_cbuf(u64) { - ThrowNotImplemented(Opcode::F2F_cbuf); -} - -void TranslatorVisitor::F2F_imm(u64) { - ThrowNotImplemented(Opcode::F2F_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } -- cgit v1.2.3 From 56be556eee65335cdc896bb1eb47999d04850b77 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 18:23:16 +0100 Subject: shader: Implement FADD32I --- .../maxwell/translate/impl/floating_point_add.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 76a807d4e..487198aa6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -64,8 +64,21 @@ void TranslatorVisitor::FADD_imm(u64 insn) { FADD(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FADD32I(u64) { - throw NotImplementedException("FADD32I"); +void TranslatorVisitor::FADD32I(u64 insn) { + union { + u64 raw; + BitField<55, 1, u64> ftz; + BitField<53, 1, u64> neg_b; + BitField<54, 1, u64> abs_a; + BitField<52, 1, u64> cc; + BitField<56, 1, u64> neg_a; + BitField<57, 1, u64> abs_b; + BitField<50, 1, u64> sat; + } const fadd32i{insn}; + + FADD(*this, insn, fadd32i.sat != 0, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, + GetFloatImm32(insn), fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, + fadd32i.neg_b != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3 From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: shader: Implement DMNMX, DSET, DSETP --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/double_compare_and_set.cpp | 59 ++++++++++++++++++++++ .../maxwell/translate/impl/double_min_max.cpp | 50 ++++++++++++++++++ .../translate/impl/double_set_predicate.cpp | 54 ++++++++++++++++++++ .../translate/impl/floating_point_min_max.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 36 ------------- 8 files changed, 169 insertions(+), 39 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 00c909f3e..432dd29a5 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -529,6 +529,8 @@ Value IREmitter::Select(const U1& condition, const Value& true_value, const Valu return Inst(Opcode::SelectU64, condition, true_value, false_value); case Type::F32: return Inst(Opcode::SelectF32, condition, true_value, false_value); + case Type::F64: + return Inst(Opcode::SelectF64, condition, true_value, false_value); default: throw InvalidArgument("Invalid type {}", true_value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 884eea7a8..bdc07b9a7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -131,6 +131,7 @@ OPCODE(SelectU32, U32, U1, OPCODE(SelectU64, U64, U1, U64, U64, ) OPCODE(SelectF16, F16, U1, F16, F16, ) OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectF64, F64, U1, F64, F64, ) // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1dfaeb92f..c6cd2a79b 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -37,8 +37,8 @@ INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") -INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") -INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..e2ec852c9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + } const dset{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; + + IR::U1 pred{v.ir.GetPred(dset.pred)}; + if (dset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + + v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DSET_reg(u64 insn) { + DSET(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSET_cbuf(u64 insn) { + DSET(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSET_imm(u64 insn) { + DSET(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..55a224db3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const dmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; + + IR::F64 max{v.ir.FPMax(op_a, op_b)}; + IR::F64 min{v.ir.FPMin(op_a, op_b)}; + + if (dmnmx.neg_pred != 0) { + std::swap(min, max); + } + v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DMNMX_reg(u64 insn) { + DMNMX(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMNMX_cbuf(u64 insn) { + DMNMX(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMNMX_imm(u64 insn) { + DMNMX(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + } const dsetp{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; + + const BooleanOp bop{dsetp.bop}; + const FPCompareOp compare_op{dsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; + const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(dsetp.dest_pred_a, result_a); + v.ir.SetPred(dsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::DSETP_reg(u64 insn) { + DSETP(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSETP_cbuf(u64 insn) { + DSETP(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSETP_imm(u64 insn) { + DSETP(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index c3180a9bd..343d91032 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -24,7 +24,7 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; - const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ .no_contraction{false}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 08f6eb788..27b12ff3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,42 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DMNMX_reg(u64) { - ThrowNotImplemented(Opcode::DMNMX_reg); -} - -void TranslatorVisitor::DMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::DMNMX_cbuf); -} - -void TranslatorVisitor::DMNMX_imm(u64) { - ThrowNotImplemented(Opcode::DMNMX_imm); -} - -void TranslatorVisitor::DSET_reg(u64) { - ThrowNotImplemented(Opcode::DSET_reg); -} - -void TranslatorVisitor::DSET_cbuf(u64) { - ThrowNotImplemented(Opcode::DSET_cbuf); -} - -void TranslatorVisitor::DSET_imm(u64) { - ThrowNotImplemented(Opcode::DSET_imm); -} - -void TranslatorVisitor::DSETP_reg(u64) { - ThrowNotImplemented(Opcode::DSETP_reg); -} - -void TranslatorVisitor::DSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::DSETP_cbuf); -} - -void TranslatorVisitor::DSETP_imm(u64) { - ThrowNotImplemented(Opcode::DSETP_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } -- cgit v1.2.3 From 96b7ced6ec32ccd3da94ebfcfe74a7568cce509f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 21:01:47 -0300 Subject: shader: Better but still partial interpolation support --- .../frontend/maxwell/translate/impl/load_store_attribute.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 2922145ee..516ffec2d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -141,14 +141,16 @@ void TranslatorVisitor::IPA(u64 insn) { const IR::Attribute attribute{ipa.attribute}; IR::F32 value{ir.GetAttribute(attribute)}; if (IR::IsGeneric(attribute)) { - // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; - const bool is_perspective{false}; + const ProgramHeader& sph{env.SPH()}; + const u32 attr_index{IR::GenericAttributeIndex(attribute)}; + const u32 element{static_cast(attribute) % 4}; + const std::array input_map{sph.ps.GenericInputMap(attr_index)}; + const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; if (is_perspective) { - const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; - value = ir.FPMul(value, rcp_position_w); + const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; + value = ir.FPMul(value, position_w); } } - switch (ipa.interpolation_mode) { case InterpolationMode::Pass: break; -- cgit v1.2.3 From a8d8fd40f7d7b249c542e4694953e2da5998fbaf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 14:39:59 -0300 Subject: shader: Fix TEX mask --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b691b4d1f..d2626f3e7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -188,6 +188,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, } }()}; + IR::Reg dest_reg{tex.dest_reg}; for (int element = 0; element < 4; ++element) { if (((tex.mask >> element) & 1) == 0) { continue; @@ -198,7 +199,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, } else { value = IR::F32{v.ir.CompositeExtract(sample, element)}; } - v.F(tex.dest_reg + element, value); + v.F(dest_reg, value); + ++dest_reg; } if (tex.sparse_pred != IR::Pred::PT) { v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 +++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 5 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 6 +++ .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/vote.cpp | 52 ++++++++++++++++++++++ 5 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 432dd29a5..ff2970125 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +U1 IREmitter::VoteAll(const U1& value) { + return Inst(Opcode::VoteAll, value); +} + +U1 IREmitter::VoteAny(const U1& value) { + return Inst(Opcode::VoteAny, value); +} + +U1 IREmitter::VoteEqual(const U1& value) { + return Inst(Opcode::VoteEqual, value); +} + +U32 IREmitter::SubgroupBallot(const U1& value) { + return Inst(Opcode::SubgroupBallot, value); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 346cef3ab..1708be3ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -234,6 +234,11 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); + [[nodiscard]] U1 VoteAny(const U1& value); + [[nodiscard]] U1 VoteEqual(const U1& value); + [[nodiscard]] U32 SubgroupBallot(const U1& value); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index bdc07b9a7..fe888b8b2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32, OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +// Vote operations +OPCODE(VoteAll, U1, U1, ) +OPCODE(VoteAny, U1, U1, ) +OPCODE(VoteEqual, U1, U1, ) +OPCODE(SubgroupBallot, U32, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 27b12ff3c..c0e36a7e2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) { ThrowNotImplemented(Opcode::VMNMX); } -void TranslatorVisitor::VOTE(u64) { - ThrowNotImplemented(Opcode::VOTE); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..a88894a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:33:45 -0300 Subject: shader: Implement NDC [-1, 1], attribute types and default varying initialization --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 3 +++ src/shader_recompiler/frontend/ir/microinstruction.cpp | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 4 ++++ .../frontend/maxwell/structured_control_flow.cpp | 7 ++++++- 5 files changed, 23 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ff2970125..ce610799a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -92,6 +92,14 @@ void IREmitter::DemoteToHelperInvocation(Block* continue_label) { Inst(Opcode::DemoteToHelperInvocation, continue_label); } +void IREmitter::Prologue() { + Inst(Opcode::Prologue); +} + +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1708be3ef..39109b0de 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -39,6 +39,9 @@ public: void Return(); void DemoteToHelperInvocation(Block* continue_label); + void Prologue(); + void Epilogue(); + [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 21b7d8a9f..ba3968056 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,6 +56,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SelectionMerge: case Opcode::Return: case Opcode::DemoteToHelperInvocation: + case Opcode::Prologue: + case Opcode::Epilogue: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::SetFragColor: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index fe888b8b2..8945c7b04 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -15,6 +15,10 @@ OPCODE(SelectionMerge, Void, Labe OPCODE(Return, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) + // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cec03e73e..fdac1c95a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -634,6 +634,9 @@ public: : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); + + IR::IREmitter ir{*block_list.front()}; + ir.Prologue(); } private: @@ -734,7 +737,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.Return(); + IR::IREmitter ir{*current_block}; + ir.Epilogue(); + ir.Return(); current_block = nullptr; break; } -- cgit v1.2.3 From 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 00:02:30 +0100 Subject: shader: Fix F2I --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 18 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../impl/floating_point_conversion_integer.cpp | 88 ++++++++++++++++++++-- .../frontend/maxwell/translate/impl/impl.cpp | 17 +++++ .../frontend/maxwell/translate/impl/impl.h | 2 + 6 files changed, 124 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce610799a..6280c08f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } +F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value) { + if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), + max_value.Type()); + } + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::FPClamp16, value, min_value, max_value); + case Type::F32: + return Inst(Opcode::FPClamp32, value, min_value, max_value); + case Type::F64: + return Inst(Opcode::FPClamp64, value, min_value, max_value); + default: + ThrowInvalidType(value.Type()); + } +} + F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 39109b0de..ebbda78a9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -147,6 +147,7 @@ public: [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8945c7b04..dd17212a1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -192,6 +192,9 @@ OPCODE(FPLog2, F32, F32, OPCODE(FPSaturate16, F16, F16, ) OPCODE(FPSaturate32, F32, F32, ) OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPClamp16, F16, F16, F16, F16, ) +OPCODE(FPClamp32, F32, F32, F32, F32, ) +OPCODE(FPClamp64, F64, F64, F64, F64, ) OPCODE(FPRoundEven16, F16, F16, ) OPCODE(FPRoundEven32, F32, F32, ) OPCODE(FPRoundEven64, F64, F64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 81175627f..7c5a72800 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" @@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) { } } +std::pair ClampBounds(DestFormat format, bool is_signed) { + if (is_signed) { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } else { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } + throw NotImplementedException("Invalid destination format {}", format); +} + IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { union { u64 raw; @@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // For example converting F32 65537.0 to U16, the expected value is 0xffff, const bool is_signed{f2i.is_signed != 0}; - const size_t bitsize{BitSize(f2i.dest_format)}; - const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); + + IR::F16F32F64 intermediate; + switch (f2i.src_format) { + case SrcFormat::F16: { + const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(max_bound)))}; + const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(min_bound)))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F32: { + const IR::F32 max_val{v.ir.Imm32(static_cast(max_bound))}; + const IR::F32 min_val{v.ir.Imm32(static_cast(min_bound))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F64: { + const IR::F64 max_val{v.ir.Imm64(max_bound)}; + const IR::F64 min_val{v.ir.Imm64(min_bound)}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + default: + throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); + } + + const size_t bitsize{std::max(32, BitSize(f2i.dest_format))}; + IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; + + bool handled_special_case = false; + const bool special_nan_cases = + (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); + if (special_nan_cases) { + if (f2i.dest_format == DestFormat::I32) { + handled_special_case = true; + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; + } else if (f2i.dest_format == DestFormat::I64) { + handled_special_case = true; + result = IR::U64{ + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + } + } + if (!handled_special_case && is_signed) { + if (bitsize != 64) { + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; + } else { + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + } + } if (bitsize == 64) { - const IR::Value vector{v.ir.UnpackUint2x32(result)}; - v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); - v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + v.L(f2i.dest_reg, result); } else { v.X(f2i.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 758a0230a..9bae89c10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::U64 TranslatorVisitor::L(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } @@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackUint2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c994fe803..54c31deb4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -341,10 +341,12 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::U64 L(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + void L(IR::Reg dest_reg, const IR::U64& value); void F(IR::Reg dest_reg, const IR::F32& value); void D(IR::Reg dest_reg, const IR::F64& value); -- cgit v1.2.3 From 83a283fa867d0a09742faff11d9115acc95ea556 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:36:59 -0300 Subject: shader: Minor style nits --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 7c5a72800..ef55b9c75 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -69,7 +69,8 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { case DestFormat::I64: return {static_cast(std::numeric_limits::max()), static_cast(std::numeric_limits::min())}; - default: {} + default: + break; } } else { switch (format) { @@ -82,7 +83,8 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { case DestFormat::I64: return {static_cast(std::numeric_limits::max()), static_cast(std::numeric_limits::min())}; - default: {} + default: + break; } } throw NotImplementedException("Invalid destination format {}", format); -- cgit v1.2.3 From d3dad6b6320f680b4e85ab991941d15cbce0e616 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 19:37:44 -0300 Subject: shader: Properly insert Prologue instruction --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index fdac1c95a..79e344986 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -635,7 +635,8 @@ public: block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); - IR::IREmitter ir{*block_list.front()}; + IR::Block& first_block{*block_list.front()}; + IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } -- cgit v1.2.3 From 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 25 Mar 2021 11:31:37 -0400 Subject: shader: Implement SHFL --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 23 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 12 +++- .../frontend/ir/microinstruction.cpp | 12 ++++ .../frontend/ir/microinstruction.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 7 ++- .../maxwell/translate/impl/integer_scaled_add.cpp | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../maxwell/translate/impl/warp_shuffle.cpp | 69 ++++++++++++++++++++++ 8 files changed, 124 insertions(+), 8 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6280c08f6..418b7f5ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { return Inst(Opcode::GetSparseFromOp, op); } +U1 IREmitter::GetInBoundsFromOp(const Value& op) { + return Inst(Opcode::GetInBoundsFromOp, op); +} + F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); @@ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) { return Inst(Opcode::SubgroupBallot, value); } +U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleIndex, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleUp, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleDown, value, index, clamp, seg_mask); +} + +U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask) { + return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); +} } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ebbda78a9..64738735e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -104,6 +104,7 @@ public: [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); [[nodiscard]] U1 GetSparseFromOp(const Value& op); + [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); @@ -147,7 +148,8 @@ public: [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); + [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); @@ -242,6 +244,14 @@ public: [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); [[nodiscard]] U32 SubgroupBallot(const U1& value); + [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, + const IR::U32& seg_mask); + [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, + const IR::U32& clamp, const IR::U32& seg_mask); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ba3968056..be8eb4d4c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: case Opcode::GetSparseFromOp: + case Opcode::GetInBoundsFromOp: return true; default: return false; @@ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { case Opcode::GetSparseFromOp: CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); return associated_insts->sparse_inst; + case Opcode::GetInBoundsFromOp: + CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); + return associated_insts->in_bounds_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -262,6 +266,10 @@ void Inst::Use(const Value& value) { AllocAssociatedInsts(assoc_inst); SetPseudoInstruction(assoc_inst->sparse_inst, this); break; + case Opcode::GetInBoundsFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->in_bounds_inst, this); + break; default: break; } @@ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) { AllocAssociatedInsts(assoc_inst); RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; + case Opcode::GetInBoundsFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); + break; default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index d5336c438..770bbd550 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); struct AssociatedInsts { union { + Inst* in_bounds_inst; Inst* sparse_inst; Inst* zero_inst{}; }; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dd17212a1..a2479c46a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -159,6 +159,7 @@ OPCODE(GetSignFromOp, U1, Opaq OPCODE(GetCarryFromOp, U1, Opaque, ) OPCODE(GetOverflowFromOp, U1, Opaque, ) OPCODE(GetSparseFromOp, U1, Opaque, ) +OPCODE(GetInBoundsFromOp, U1, Opaque, ) // Floating-point operations OPCODE(FPAbs16, F16, F16, ) @@ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod, F32x4, U32, OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -// Vote operations +// Warp operations OPCODE(VoteAll, U1, U1, ) OPCODE(VoteAny, U1, U1, ) OPCODE(VoteEqual, U1, U1, ) OPCODE(SubgroupBallot, U32, U1, ) +OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) +OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 5469e445a..42fd42bb1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { ISCADD(*this, insn, GetReg20(insn)); } -void TranslatorVisitor::ISCADD_cbuf(u64) { - throw NotImplementedException("ISCADD (cbuf)"); +void TranslatorVisitor::ISCADD_cbuf(u64 insn) { + ISCADD(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::ISCADD_imm(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c0e36a7e2..3ccd7b925 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { ThrowNotImplemented(Opcode::SETLMEMBASE); } -void TranslatorVisitor::SHFL(u64) { - ThrowNotImplemented(Opcode::SHFL); -} - void TranslatorVisitor::SSY() { // SSY is a no-op } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class ShuffleMode : u64 { + IDX, + UP, + DOWN, + BFLY, +}; + +[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, + const IR::U32& index, const IR::U32& mask, + ShuffleMode shfl_op) { + const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; + const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; + switch (shfl_op) { + case ShuffleMode::IDX: + return ir.ShuffleIndex(value, index, clamp, seg_mask); + case ShuffleMode::UP: + return ir.ShuffleUp(value, index, clamp, seg_mask); + case ShuffleMode::DOWN: + return ir.ShuffleDown(value, index, clamp, seg_mask); + case ShuffleMode::BFLY: + return ir.ShuffleButterfly(value, index, clamp, seg_mask); + default: + throw NotImplementedException("Invalid SHFL op {}", shfl_op); + } +} + +void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<30, 2, ShuffleMode> mode; + BitField<48, 3, IR::Pred> pred; + } const shfl{insn}; + + const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; + v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); + v.X(shfl.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::SHFL(u64 insn) { + union { + u64 insn; + BitField<20, 5, u64> src_a_imm; + BitField<28, 1, u64> src_a_flag; + BitField<29, 1, u64> src_b_flag; + BitField<34, 13, u64> src_b_imm; + } const flags{insn}; + const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast(flags.src_a_imm)) + : GetReg20(insn)}; + const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast(flags.src_b_imm)) + : GetReg39(insn)}; + Shuffle(*this, insn, src_a, src_b); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From c7c518e280d1ac04adb08d45145690fd06ac7b18 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 23:41:55 +0100 Subject: shader: Implement TLD4 and TLD4_B --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 13 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 6 + src/shader_recompiler/frontend/ir/modifiers.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 6 + src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_gather.cpp | 209 +++++++++++++++++++++ 7 files changed, 237 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 418b7f5ac..b365a8a6e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1474,6 +1474,19 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather}; + return Inst(op, Flags{info}, handle, coords, offset, offset2); +} + +Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, const F32& dref, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref + : Opcode::BindlessImageGatherDref}; + return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 64738735e..04b43197f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -240,6 +240,12 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, TextureInstInfo info); + + [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, + const Value& offset2, const F32& dref, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 308c00153..4f09a4b39 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -38,6 +38,7 @@ union TextureInstInfo { BitField<8, 1, u32> has_bias; BitField<9, 1, u32> has_lod_clamp; BitField<10, 1, u32> relaxed_precision; + BitField<11, 2, u32> gather_component; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a2479c46a..60a0bc980 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -353,16 +353,22 @@ OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c6cd2a79b..d668dc1aa 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -254,8 +254,8 @@ INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") -INST(TLD4, "TLD4", "1100 10-- --11 1---") -INST(TLD4_b, "TLD4 (b)", "1101 1110 1111 1---") +INST(TLD4, "TLD4", "1100 10-- ---- ----") +INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") INST(TLDS, "TLDS", "1101 -01- ---- ----") INST(TMML, "TMML", "1101 1111 0101 1---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 3ccd7b925..e59c3326e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,14 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4(u64) { - ThrowNotImplemented(Opcode::TLD4); -} - -void TranslatorVisitor::TLD4_b(u64) { - ThrowNotImplemented(Opcode::TLD4_b); -} - void TranslatorVisitor::TLD4S(u64) { ThrowNotImplemented(Opcode::TLD4S); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..d64865876 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -0,0 +1,209 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +enum class OffsetType : u64 { + None = 0, + AOFFI, + PTP, + Invalid, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { + const IR::U32 value1{v.X(reg++)}; + const IR::U32 value2{v.X(reg++)}; + const auto getVector = ([&v](const IR::U32& value) { + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + }); + return {getVector(value1), getVector(value2)}; +} + +void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, + bool is_bindless) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld4{insn}; + + const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; + + IR::Reg meta_reg{tld4.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::Value offset2; + IR::F32 dref; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld4.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + switch (offset_type) { + case OffsetType::None: + break; + case OffsetType::AOFFI: { + offset = MakeOffset(v, meta_reg, tld4.type); + break; + } + case OffsetType::PTP: { + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + break; + } + default: + throw NotImplementedException("Invalid offset type {}", offset_type); + } + if (tld4.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.gather_component.Assign(static_cast(component_type)); + const IR::Value sample{[&]() -> IR::Value { + if (tld4.dc == 0) { + return v.ir.ImageGather(handle, coords, offset, offset2, info); + } + return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); + }()}; + + IR::Reg dest_reg{tld4.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld4.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld4.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4(u64 insn) { + union { + u64 raw; + BitField<56, 2, ComponentType> component; + BitField<54, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, false); +} + +void TranslatorVisitor::TLD4_b(u64 insn) { + union { + u64 raw; + BitField<38, 2, ComponentType> component; + BitField<36, 2, OffsetType> offset; + } const tld4{insn}; + Impl(*this, insn, tld4.component, tld4.offset, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From fda0835300a7ef6112791ae503435c81ffe883f5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 19:59:35 +0100 Subject: shader: Implement TLD4S. --- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/texture_gather_swizzled.cpp | 133 +++++++++++++++++++++ 2 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e59c3326e..788765c21 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -349,10 +349,6 @@ void TranslatorVisitor::TLD_b(u64) { ThrowNotImplemented(Opcode::TLD_b); } -void TranslatorVisitor::TLD4S(u64) { - ThrowNotImplemented(Opcode::TLD4S); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..beab515ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -0,0 +1,133 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F32, + F16, +}; + +enum class ComponentType : u64 { + R = 0, + G = 1, + B = 2, + A = 3, +}; + +union Encoding { + u64 raw; + BitField<55, 1, Precision> precision; + BitField<52, 2, ComponentType> component_type; + BitField<51, 1, u64> aoffi; + BitField<50, 1, u64> dc; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tld4s{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tld4s.cbuf_offset * 4))}; + const IR::Reg reg_a{tld4s.src_reg_a}; + const IR::Reg reg_b{tld4s.src_reg_b}; + IR::TextureInstInfo info{}; + if (tld4s.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.gather_component.Assign(static_cast(tld4s.component_type.Value())); + info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + IR::Value coords; + if (tld4s.aoffi != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::Value offset = MakeOffset(v, reg_b); + if (tld4s.dc != 0) { + CheckAlignment(reg_b, 2); + IR::F32 dref = v.F(reg_b + 1); + return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); + } + return v.ir.ImageGather(handle, coords, offset, {}, info); + } + if (tld4s.dc != 0) { + CheckAlignment(reg_a, 2); + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); + IR::F32 dref = v.F(reg_b); + return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); + } + coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); + return v.ir.ImageGather(handle, coords, {}, {}, info); +} + +IR::Reg RegStoreComponent32(u64 insn, size_t index) { + const Encoding tlds4{insn}; + switch (index) { + case 0: + return tlds4.dest_reg_a; + case 1: + CheckAlignment(tlds4.dest_reg_a, 2); + return tlds4.dest_reg_a + 1; + case 2: + return tlds4.dest_reg_b; + case 3: + CheckAlignment(tlds4.dest_reg_b, 2); + return tlds4.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + for (size_t component = 0; component < 4; ++component) { + const IR::Reg dest{RegStoreComponent32(insn, component)}; + v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + std::array swizzled; + for (size_t component = 0; component < 4; ++component) { + swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; + } + const Encoding tld4s{insn}; + v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); +} +} // Anonymous namespace + +void TranslatorVisitor::TLD4S(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From f5672777c8af4700c9e0fc32af52cb2563f564f4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 25 Mar 2021 20:27:09 +0100 Subject: shader: Implement FragDepth --- src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index ea9b33da9..58a53c0ec 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -25,7 +25,7 @@ void ExitFragment(TranslatorVisitor& v) { throw NotImplementedException("Sample mask"); } if (sph.ps.omap.depth != 0) { - throw NotImplementedException("Fragment depth"); + v.ir.SetFragDepth(v.F(src_reg + 1)); } } } // Anonymous namespace -- cgit v1.2.3 From 981eb6f43bb88f1e57b4c657bf37cb7471a113e3 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 01:54:03 +0100 Subject: shader: Fix Array Indices in TEX/TLD4 --- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 6 +++--- .../frontend/maxwell/translate/impl/texture_gather.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index d2626f3e7..1f1689c43 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -61,11 +61,11 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -73,7 +73,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); } throw NotImplementedException("Invalid texture type {}", type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index d64865876..8c6384040 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -65,11 +65,11 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -77,7 +77,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); } throw NotImplementedException("Invalid texture type {}", type); } -- cgit v1.2.3 From 742d11c2ad948c8630be15901514ec9e5e5fcd20 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 16:02:04 +0100 Subject: shader: Implement TLD4.PTP --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 +++++++++----- src/shader_recompiler/frontend/ir/microinstruction.h | 6 ++++++ src/shader_recompiler/frontend/ir/modifiers.h | 5 +++++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + src/shader_recompiler/frontend/ir/value.cpp | 14 ++++++++++++++ src/shader_recompiler/frontend/ir/value.h | 1 + .../frontend/maxwell/translate/impl/texture_fetch.cpp | 14 ++++++++------ .../frontend/maxwell/translate/impl/texture_gather.cpp | 16 ++++++++-------- 8 files changed, 52 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b365a8a6e..f49c30484 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -398,15 +398,16 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } + CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: - return Inst(Opcode::CompositeConstructU32x2, e1, e2); + return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); case Type::F16: - return Inst(Opcode::CompositeConstructF16x2, e1, e2); + return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); case Type::F32: - return Inst(Opcode::CompositeConstructF32x2, e1, e2); + return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); case Type::F64: - return Inst(Opcode::CompositeConstructF64x2, e1, e2); + return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); default: ThrowInvalidType(e1.Type()); } @@ -436,6 +437,7 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } + CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); @@ -445,6 +447,8 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); case Type::F64: return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); + case Type::U32x2: + return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); default: ThrowInvalidType(e1.Type()); } @@ -1481,7 +1485,7 @@ Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Val } Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info) { + const Value& offset2, const F32& dref, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref : Opcode::BindlessImageGatherDref}; return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 770bbd550..77296cfa4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -99,6 +99,12 @@ public: return ret; } + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { + std::memcpy(&flags, &new_val, sizeof(new_val)); + } + /// Intrusively store the host definition of this instruction. template void SetDefinition(DefinitionType def) { diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 4f09a4b39..20fb14fea 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -32,6 +32,11 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +struct CompositeDecoration { + bool is_constant{false}; +}; +static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); + union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 60a0bc980..0dc0aabdf 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -126,6 +126,7 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index e8e4662e7..7671fc3d8 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -44,6 +44,20 @@ bool Value::IsEmpty() const noexcept { return type == Type::Void; } +bool Value::IsConstantContainer() const { + if (IsImmediate()) { + return true; + } + ValidateAccess(Type::Opaque); + auto num_args = inst->NumArgs(); + for (size_t i = 0; i < num_args; i++) { + if (!inst->Arg(i).IsConstantContainer()) { + return false; + } + } + return true; +} + bool Value::IsImmediate() const noexcept { if (IsIdentity()) { return inst->Arg(0).IsImmediate(); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..5d6e74c14 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -38,6 +38,7 @@ public: [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; + [[nodiscard]] bool IsConstantContainer() const; [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 1f1689c43..b2da079f9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -101,16 +101,18 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { switch (type) { case TextureType::_1D: case TextureType::ARRAY_1D: - return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); case TextureType::_2D: case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); case TextureType::_3D: case TextureType::ARRAY_3D: - return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); case TextureType::CUBE: case TextureType::ARRAY_CUBE: throw NotImplementedException("Illegal offset on CUBE sample"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index 8c6384040..cdf5cb5c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const auto getVector = ([&v](const IR::U32& value) { + const IR::U32 bitsize = v.ir.Imm32(6); + const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), - v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); }); - return {getVector(value1), getVector(value2)}; + return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), + getVector(value2, 0), getVector(value2, 16)); } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -155,7 +155,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy break; } case OffsetType::PTP: { - std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); + offset2 = MakeOffsetPTP(v, meta_reg); break; } default: -- cgit v1.2.3 From b5db38f50e9f81964bf0cc946e4ed5b00fe564d0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 19:24:50 +0100 Subject: shader: Add IR opcode for ImageFetch --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 8 ++++++-- src/shader_recompiler/frontend/ir/opcodes.inc | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f49c30484..b8d36f362 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1491,6 +1491,12 @@ Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); } +Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch}; + return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 04b43197f..446fd7785 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -243,8 +243,12 @@ public: [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); - [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, - const Value& offset2, const F32& dref, TextureInstInfo info); + [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, + const Value& offset, const Value& offset2, const F32& dref, + TextureInstInfo info); + + [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, + const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0dc0aabdf..3dacd7b6b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -356,6 +356,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -363,6 +364,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -370,6 +372,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) -- cgit v1.2.3 From d9c5bd9509e82fcde72c18663989931f97ed6518 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 16:46:07 -0300 Subject: shader: Refactor PTP and other minor changes --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 ++++------ .../frontend/ir/microinstruction.h | 4 ++-- src/shader_recompiler/frontend/ir/modifiers.h | 5 ---- src/shader_recompiler/frontend/ir/opcodes.inc | 1 - src/shader_recompiler/frontend/ir/value.cpp | 14 ----------- src/shader_recompiler/frontend/ir/value.h | 1 - .../maxwell/translate/impl/texture_gather.cpp | 28 ++++++++++------------ 7 files changed, 19 insertions(+), 46 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b8d36f362..0296f8773 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -398,16 +398,15 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { if (e1.Type() != e2.Type()) { throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); } - CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: - return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructU32x2, e1, e2); case Type::F16: - return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructF16x2, e1, e2); case Type::F32: - return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructF32x2, e1, e2); case Type::F64: - return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); + return Inst(Opcode::CompositeConstructF64x2, e1, e2); default: ThrowInvalidType(e1.Type()); } @@ -437,7 +436,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), e3.Type(), e4.Type()); } - CompositeDecoration decor{}; switch (e1.Type()) { case Type::U32: return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); @@ -447,8 +445,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); case Type::F64: return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); - case Type::U32x2: - return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); default: ThrowInvalidType(e1.Type()); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 77296cfa4..6658dc674 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -101,8 +101,8 @@ public: template requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { - std::memcpy(&flags, &new_val, sizeof(new_val)); + [[nodiscard]] void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); } /// Intrusively store the host definition of this instruction. diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 20fb14fea..4f09a4b39 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -32,11 +32,6 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); -struct CompositeDecoration { - bool is_constant{false}; -}; -static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); - union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 3dacd7b6b..e12b92c47 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -126,7 +126,6 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) -OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 7671fc3d8..e8e4662e7 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -44,20 +44,6 @@ bool Value::IsEmpty() const noexcept { return type == Type::Void; } -bool Value::IsConstantContainer() const { - if (IsImmediate()) { - return true; - } - ValidateAccess(Type::Opaque); - auto num_args = inst->NumArgs(); - for (size_t i = 0; i < num_args; i++) { - if (!inst->Arg(i).IsConstantContainer()) { - return false; - } - } - return true; -} - bool Value::IsImmediate() const noexcept { if (IsIdentity()) { return inst->Arg(0).IsImmediate(); diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 5d6e74c14..b27601e70 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -38,7 +38,6 @@ public: [[nodiscard]] bool IsImmediate() const noexcept; [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; - [[nodiscard]] bool IsConstantContainer() const; [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Block* Label() const; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index cdf5cb5c4..b2f9cda46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { throw NotImplementedException("Invalid texture type {}", type); } -IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { +std::pair MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { const IR::U32 value1{v.X(reg++)}; const IR::U32 value2{v.X(reg++)}; - const IR::U32 bitsize = v.ir.Imm32(6); - const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { - return v.ir.CompositeConstruct( - v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), - v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); - }); - return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), - getVector(value2, 0), getVector(value2, 16)); + const IR::U32 bitsize{v.ir.Imm32(6)}; + const auto make_vector{[&v, &bitsize](const IR::U32& value) { + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), + v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); + }}; + return {make_vector(value1), make_vector(value2)}; } void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, @@ -150,14 +150,12 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy switch (offset_type) { case OffsetType::None: break; - case OffsetType::AOFFI: { + case OffsetType::AOFFI: offset = MakeOffset(v, meta_reg, tld4.type); break; - } - case OffsetType::PTP: { - offset2 = MakeOffsetPTP(v, meta_reg); + case OffsetType::PTP: + std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); break; - } default: throw NotImplementedException("Invalid offset type {}", offset_type); } @@ -167,7 +165,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy IR::TextureInstInfo info{}; info.type.Assign(GetType(tld4.type, tld4.dc != 0)); info.gather_component.Assign(static_cast(component_type)); - const IR::Value sample{[&]() -> IR::Value { + const IR::Value sample{[&] { if (tld4.dc == 0) { return v.ir.ImageGather(handle, coords, offset, offset2, info); } -- cgit v1.2.3 From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: shader: Implement TXQ and fix FragDepth --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../maxwell/translate/impl/not_implemented.cpp | 8 --- .../maxwell/translate/impl/texture_query.cpp | 76 ++++++++++++++++++++++ 5 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0296f8773..f281c023f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1493,6 +1493,12 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); } +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions + : Opcode::BindlessImageQueryDimensions}; + return Inst(op, handle, lod); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 446fd7785..771c186d4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -239,6 +239,7 @@ public: const F32& dref, const F32& lod, const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e12b92c47..5d7462d76 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -356,6 +356,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -364,6 +365,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -372,6 +374,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, U32, OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 788765c21..96ee2e741 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -373,14 +373,6 @@ void TranslatorVisitor::TXD_b(u64) { ThrowNotImplemented(Opcode::TXD_b); } -void TranslatorVisitor::TXQ(u64) { - ThrowNotImplemented(Opcode::TXQ); -} - -void TranslatorVisitor::TXQ_b(u64) { - ThrowNotImplemented(Opcode::TXQ_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..e8ea8faeb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Dimension = 1, + TextureType = 2, + SamplePos = 5, +}; + +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { + switch (mode) { + case Mode::Dimension: { + const IR::U32 lod{v.X(src_reg)}; + return v.ir.ImageQueryDimension(handle, lod); + } + case Mode::TextureType: + case Mode::SamplePos: + default: + throw NotImplementedException("Mode {}", mode); + } +} + +void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<22, 3, Mode> mode; + BitField<31, 4, u64> mask; + } const txq{insn}; + + IR::Reg src_reg{txq.src_reg}; + IR::U32 handle; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(src_reg); + ++src_reg; + } + const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + IR::Reg dest_reg{txq.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((txq.mask >> element) & 1) == 0) { + continue; + } + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXQ(u64 insn) { + union { + u64 raw; + BitField<36, 13, u64> cbuf_offset; + } const txq{insn}; + + Impl(*this, insn, static_cast(txq.cbuf_offset)); +} + +void TranslatorVisitor::TXQ_b(u64 insn) { + Impl(*this, insn, std::nullopt); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From cdf0cc38698bf31773edd0016d5171bd11b966d0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 04:19:38 +0100 Subject: shader: Fix TXQ --- src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index e8ea8faeb..8c7e04bca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -66,7 +66,7 @@ void TranslatorVisitor::TXQ(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const txq{insn}; - Impl(*this, insn, static_cast(txq.cbuf_offset)); + Impl(*this, insn, static_cast(txq.cbuf_offset * 4)); } void TranslatorVisitor::TXQ_b(u64 insn) { -- cgit v1.2.3 From a806b29cb9bb48c4a9628700946231c9150463b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:54:32 -0300 Subject: shader: Fix structured control flow on KIL instructions This could potentially leave unvisited blocks, leading to illegal phi nodes. --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f281c023f..82613f607 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -83,6 +83,7 @@ void IREmitter::SelectionMerge(Block* merge_block) { } void IREmitter::Return() { + block->SetReturn(); Inst(Opcode::Return); } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 79e344986..9d4688390 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -152,7 +152,9 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { switch (stmt->type) { case StatementType::Code: - ret += fmt::format("{} Block {:04x};\n", indent, stmt->code->LocationBegin()); + ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, + stmt->code->LocationBegin(), stmt->code->LocationEnd(), + reinterpret_cast(stmt->code)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -749,8 +751,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); - current_block = nullptr; + IR::Block* demote_block{MergeBlock(parent, stmt)}; + IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + current_block = demote_block; break; } default: -- cgit v1.2.3 From 675a82416d7775dc7a252a5d8f5b704e6b8f2326 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 03:08:31 -0300 Subject: spirv: Remove dependencies on Environment when generating SPIR-V --- src/shader_recompiler/frontend/ir/program.h | 2 ++ src/shader_recompiler/frontend/maxwell/program.cpp | 3 +++ 2 files changed, 5 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 733513c8b..0162e919c 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include @@ -19,6 +20,7 @@ struct Program { BlockList post_order_blocks; Info info; Stage stage{}; + std::array workgroup_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0074eb89b..6efaf6ee0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -33,6 +33,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: shader: Better interpolation and disabled attributes support --- src/shader_recompiler/frontend/maxwell/program.cpp | 35 ++++++++++++++++++++++ .../translate/impl/load_store_attribute.cpp | 10 +------ 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 6efaf6ee0..a914a91f4 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -27,6 +27,40 @@ static void RemoveUnreachableBlocks(IR::Program& program) { }); } +static void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; @@ -51,6 +85,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sat, 27 Mar 2021 05:26:29 -0300 Subject: shader: Add missing I2I exception when CC is used --- .../frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index ca28c6dd9..e8f35552c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -52,10 +52,14 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<13, 1, u64> src_fmt_sign; BitField<41, 3, u64> selector; BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; BitField<49, 1, u64> abs; BitField<50, 1, u64> sat; } const i2i{insn}; + if (i2i.cc != 0) { + throw NotImplementedException("I2I CC"); + } if (i2i.sat != 0) { throw NotImplementedException("I2I SAT"); } -- cgit v1.2.3 From 51475e21ba5e9a17730a2b5a868dc73d53db9bc1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Mar 2021 19:47:00 -0400 Subject: shader: Implement VMAD, VMNMX, VSETP --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 21 ++--- src/shader_recompiler/frontend/ir/ir_emitter.h | 2 + .../maxwell/translate/impl/not_implemented.cpp | 13 --- .../maxwell/translate/impl/video_helper.cpp | 30 +++++++ .../frontend/maxwell/translate/impl/video_helper.h | 23 ++++++ .../translate/impl/video_minimum_maximum.cpp | 92 ++++++++++++++++++++++ .../maxwell/translate/impl/video_multiply_add.cpp | 64 +++++++++++++++ .../maxwell/translate/impl/video_set_predicate.cpp | 92 ++++++++++++++++++++++ 8 files changed, 314 insertions(+), 23 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 82613f607..6d41442ee 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1121,6 +1121,10 @@ U32 IREmitter::UMin(const U32& a, const U32& b) { return Inst(Opcode::UMin32, a, b); } +U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMin(a, b) : UMin(a, b); +} + U32 IREmitter::SMax(const U32& a, const U32& b) { return Inst(Opcode::SMax32, a, b); } @@ -1129,6 +1133,10 @@ U32 IREmitter::UMax(const U32& a, const U32& b) { return Inst(Opcode::UMax32, a, b); } +U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { + return is_signed ? SMax(a, b) : UMax(a, b); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } @@ -1267,11 +1275,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { } U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { - if (is_signed) { - return ConvertFToS(bitsize, value); - } else { - return ConvertFToU(bitsize, value); - } + return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); } F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { @@ -1360,11 +1364,8 @@ F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, const Value& value) { - if (is_signed) { - return ConvertSToF(dest_bitsize, src_bitsize, value); - } else { - return ConvertUToF(dest_bitsize, src_bitsize, value); - } + return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value) + : ConvertUToF(dest_bitsize, src_bitsize, value); } U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 771c186d4..8d50aa607 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -196,8 +196,10 @@ public: [[nodiscard]] U32 FindUMsb(const U32& value); [[nodiscard]] U32 SMin(const U32& a, const U32& b); [[nodiscard]] U32 UMin(const U32& a, const U32& b); + [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); [[nodiscard]] U32 SMax(const U32& a, const U32& b); [[nodiscard]] U32 UMax(const U32& a, const U32& b); + [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 96ee2e741..409216640 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -385,14 +385,6 @@ void TranslatorVisitor::VADD(u64) { ThrowNotImplemented(Opcode::VADD); } -void TranslatorVisitor::VMAD(u64) { - ThrowNotImplemented(Opcode::VMAD); -} - -void TranslatorVisitor::VMNMX(u64) { - ThrowNotImplemented(Opcode::VMNMX); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } @@ -400,11 +392,6 @@ void TranslatorVisitor::VOTE_vtg(u64) { void TranslatorVisitor::VSET(u64) { ThrowNotImplemented(Opcode::VSET); } - -void TranslatorVisitor::VSETP(u64) { - ThrowNotImplemented(Opcode::VSETP); -} - void TranslatorVisitor::VSHL(u64) { ThrowNotImplemented(Opcode::VSHL); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { + +IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, + u32 selector, bool is_signed) { + switch (width) { + case VideoWidth::Byte: + case VideoWidth::Unknown: + return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); + case VideoWidth::Short: + return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); + case VideoWidth::Word: + return value; + default: + throw NotImplementedException("Unknown VideoWidth {}", width); + } +} + +VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { + // immediates must be 16-bit format. + return is_immediate ? VideoWidth::Short : width; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +enum class VideoWidth : u64 { + Byte, + Unknown, + Short, + Word, +}; + +[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, + VideoWidth width, u32 selector, bool is_signed); + +[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VideoMinMaxOps : u64 { + MRG_16H, + MRG_16L, + MRG_8B0, + MRG_8B2, + ACC, + MIN, + MAX, +}; + +[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, + VideoMinMaxOps op, bool is_signed) { + switch (op) { + case VideoMinMaxOps::MIN: + return ir.IMin(lhs, rhs, is_signed); + case VideoMinMaxOps::MAX: + return ir.IMax(lhs, rhs, is_signed); + default: + throw NotImplementedException("VMNMX op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VMNMX(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 3, VideoMinMaxOps> op; + BitField<54, 1, u64> dest_sign; + BitField<55, 1, u64> sat; + BitField<56, 1, u64> mx; + } const vmnmx{insn}; + + if (vmnmx.cc != 0) { + throw NotImplementedException("VMNMX CC"); + } + if (vmnmx.sat != 0) { + throw NotImplementedException("VMNMX SAT"); + } + // Selectors were shown to default to 2 in unit tests + if (vmnmx.src_a_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); + } + if (vmnmx.src_b_selector != 2) { + throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); + } + if (vmnmx.src_a_width != VideoWidth::Word) { + throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); + } + + const bool is_b_imm{vmnmx.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vmnmx.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const VideoWidth a_width{vmnmx.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmnmx.src_a_sign != 0}; + const bool src_b_signed{vmnmx.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; + + // First operation's sign is only dependent on operand b's sign + const bool op_1_signed{src_b_signed}; + + const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) + : ir.IMin(op_a, op_b, op_1_signed)}; + X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp @@ -0,0 +1,64 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::VMAD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + BitField<51, 2, u64> scale; + BitField<53, 1, u64> src_c_neg; + BitField<54, 1, u64> src_a_neg; + BitField<55, 1, u64> sat; + } const vmad{insn}; + + if (vmad.cc != 0) { + throw NotImplementedException("VMAD CC"); + } + if (vmad.sat != 0) { + throw NotImplementedException("VMAD SAT"); + } + if (vmad.scale != 0) { + throw NotImplementedException("VMAD SCALE"); + } + if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD PO"); + } + if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { + throw NotImplementedException("VMAD NEG"); + } + const bool is_b_imm{vmad.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vmad.src_b_imm)) : GetReg20(insn)}; + const IR::U32 src_c{GetReg39(insn)}; + + const u32 a_selector{static_cast(vmad.src_a_selector)}; + // Immediate values can't have a selector + const u32 b_selector{is_b_imm ? 0U : static_cast(vmad.src_b_selector)}; + const VideoWidth a_width{vmad.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; + + const bool src_a_signed{vmad.src_a_sign != 0}; + const bool src_b_signed{vmad.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; + + X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..af13b3fcc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -0,0 +1,92 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" + +namespace Shader::Maxwell { +namespace { +enum class VsetpCompareOp : u64 { + False = 0, + LessThan, + Equal, + LessThanEqual, + GreaterThan = 16, + NotEqual, + GreaterThanEqual, + True, +}; + +CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { + switch (op) { + case VsetpCompareOp::False: + return CompareOp::False; + case VsetpCompareOp::LessThan: + return CompareOp::LessThan; + case VsetpCompareOp::Equal: + return CompareOp::Equal; + case VsetpCompareOp::LessThanEqual: + return CompareOp::LessThanEqual; + case VsetpCompareOp::GreaterThan: + return CompareOp::GreaterThan; + case VsetpCompareOp::NotEqual: + return CompareOp::NotEqual; + case VsetpCompareOp::GreaterThanEqual: + return CompareOp::GreaterThanEqual; + case VsetpCompareOp::True: + return CompareOp::True; + default: + throw NotImplementedException("Invalid compare op {}", op); + } +} +} // Anonymous namespace + +void TranslatorVisitor::VSETP(u64 insn) { + union { + u64 raw; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<20, 16, u64> src_b_imm; + BitField<28, 2, u64> src_b_selector; + BitField<29, 2, VideoWidth> src_b_width; + BitField<36, 2, u64> src_a_selector; + BitField<37, 2, VideoWidth> src_a_width; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 5, VsetpCompareOp> compare_op; + BitField<45, 2, BooleanOp> bop; + BitField<48, 1, u64> src_a_sign; + BitField<49, 1, u64> src_b_sign; + BitField<50, 1, u64> is_src_b_reg; + } const vsetp{insn}; + + const bool is_b_imm{vsetp.is_src_b_reg == 0}; + const IR::U32 src_a{GetReg8(insn)}; + const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; + + const u32 a_selector{static_cast(vsetp.src_a_selector)}; + const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; + const VideoWidth a_width{vsetp.src_a_width}; + const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; + + const bool src_a_signed{vsetp.src_a_sign != 0}; + const bool src_b_signed{vsetp.src_b_sign != 0}; + const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)}; + + // Compare operation's sign is only dependent on operand b's sign + const bool compare_signed{src_b_signed}; + const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; + const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; + const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; + const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; + ir.SetPred(vsetp.dest_pred_a, result_a); + ir.SetPred(vsetp.dest_pred_b, result_b); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 84298ce1917da637e7f60ee6c95602a8e7512c8a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 14:08:17 -0400 Subject: shader: Implement ISCADD CC --- .../frontend/maxwell/translate/impl/integer_scaled_add.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 42fd42bb1..7aef37f54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -43,7 +43,10 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { v.X(iscadd.dest_reg, result); if (iscadd.cc != 0) { - throw NotImplementedException("ISCADD CC"); + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.GetOverflowFromOp(result)); } } -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 46 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 6 + .../frontend/ir/microinstruction.cpp | 6 + src/shader_recompiler/frontend/ir/opcodes.inc | 18 ++ src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../translate/impl/load_store_local_shared.cpp | 197 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 -- 8 files changed, 277 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { Inst(Opcode::WriteGlobal128, address, vector); } +U32 IREmitter::LoadLocal(const IR::U32& word_offset) { + return Inst(Opcode::LoadLocal, word_offset); +} + +void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { + Inst(Opcode::WriteLocal, word_offset, value); +} + +Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { + switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); + case 32: + return Inst(Opcode::LoadSharedU32, offset); + case 64: + return Inst(Opcode::LoadSharedU64, offset); + case 128: + return Inst(Opcode::LoadSharedU128, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size); +} + +void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { + switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + case 128: + Inst(Opcode::WriteSharedU128, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + } +} + U1 IREmitter::GetZeroFromOp(const Value& op) { return Inst(Opcode::GetZeroFromOp, op); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,12 @@ public: void WriteGlobal64(const U64& address, const IR::Value& vector); void WriteGlobal128(const U64& address, const IR::Value& vector); + [[nodiscard]] U32 LoadLocal(const U32& word_offset); + void WriteLocal(const U32& word_offset, const U32& value); + + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const U32& offset, const Value& value); + [[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteStorage32: case Opcode::WriteStorage64: case Opcode::WriteStorage128: + case Opcode::WriteLocal: + case Opcode::WriteSharedU8: + case Opcode::WriteSharedU16: + case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: + case Opcode::WriteSharedU128: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +// Local memory operations +OPCODE(LoadLocal, U32, U32, ) +OPCODE(WriteLocal, Void, U32, U32, ) + +// Shared memory operations +OPCODE(LoadSharedU8, U32, U32, ) +OPCODE(LoadSharedS8, U32, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) +OPCODE(LoadSharedS16, U32, U32, ) +OPCODE(LoadSharedU32, U32, U32, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(LoadSharedU128, U32x4, U32, ) +OPCODE(WriteSharedU8, Void, U32, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) +OPCODE(WriteSharedU32, Void, U32, U32, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) +OPCODE(WriteSharedU128, Void, U32, U32x4, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -21,6 +21,8 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset)); + } else { + const s32 relative{static_cast(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + const Size nnn = encoding.size; + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, ir.LoadLocal(word_offset)); + for (int i = 1; i < bit_size / 32; ++i) { + X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, bit_size / 32)) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDL(u64) { - ThrowNotImplemented(Opcode::LDL); -} - -void TranslatorVisitor::LDS(u64) { - ThrowNotImplemented(Opcode::LDS); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { ThrowNotImplemented(Opcode::ST); } -void TranslatorVisitor::STL(u64) { - ThrowNotImplemented(Opcode::STL); -} - void TranslatorVisitor::STP(u64) { ThrowNotImplemented(Opcode::STP); } -void TranslatorVisitor::STS(u64) { - ThrowNotImplemented(Opcode::STS); -} - void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -- cgit v1.2.3 From cd9f75e2239666a932861f6d54138febf8736a8c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 20:16:26 -0400 Subject: shader: Fix ISCADD logic for PO/CC --- .../maxwell/translate/impl/integer_scaled_add.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 7aef37f54..93cc2c0b1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -30,23 +30,24 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { if (iscadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } + } else { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); } // With the operands already processed, scale A const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; - IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; - if (po) { - // .PO adds one to the final result - result = v.ir.IAdd(result, v.ir.Imm32(1)); - } + const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; v.X(iscadd.dest_reg, result); if (iscadd.cc != 0) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); - v.SetCFlag(v.ir.GetCarryFromOp(result)); - v.SetOFlag(v.ir.GetOverflowFromOp(result)); + const IR::U1 carry{v.ir.GetCarryFromOp(result)}; + const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; + v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); + v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); } } -- cgit v1.2.3 From dbc1e5cde79b9165605741e1ea7158513ef6499f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 27 Mar 2021 23:01:28 -0400 Subject: shader: Implement I2I SAT --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../impl/integer_to_integer_conversion.cpp | 40 ++++++++++++++++------ 4 files changed, 42 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d6a1d8ec2..9b898e4e1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1183,6 +1183,14 @@ U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { return is_signed ? SMax(a, b) : UMax(a, b); } +U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) { + return Inst(Opcode::SClamp32, value, min, max); +} + +U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { + return Inst(Opcode::UClamp32, value, min, max); +} + U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 842c2bdaf..269f367a4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -206,6 +206,8 @@ public: [[nodiscard]] U32 SMax(const U32& a, const U32& b); [[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); + [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); + [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c75658328..9b050995b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -299,6 +299,8 @@ OPCODE(SMin32, U32, U32, OPCODE(UMin32, U32, U32, U32, ) OPCODE(SMax32, U32, U32, U32, ) OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SClamp32, U32, U32, U32, U32, ) +OPCODE(UClamp32, U32, U32, U32, U32, ) OPCODE(SLessThan, U1, U32, U32, ) OPCODE(ULessThan, U1, U32, U32, ) OPCODE(IEqual, U1, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index e8f35552c..98b7f59f7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -30,16 +30,33 @@ enum class IntegerWidth : u64 { [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width) { const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 count{WidthSize(ir, dst_width)}; + return ir.BitFieldExtract(src, zero, count, false); +} + +[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, + bool dst_signed, bool src_signed) { + IR::U32 min{}; + IR::U32 max{}; + const IR::U32 zero{ir.Imm32(0)}; switch (dst_width) { case IntegerWidth::Byte: - return ir.BitFieldExtract(src, zero, ir.Imm32(8), false); + min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; + max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); + break; case IntegerWidth::Short: - return ir.BitFieldExtract(src, zero, ir.Imm32(16), false); + min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; + max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); + break; case IntegerWidth::Word: - return ir.BitFieldExtract(src, zero, ir.Imm32(32), false); + min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; + max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); + break; default: throw NotImplementedException("Invalid width {}", dst_width); } + const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; + return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); } void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { @@ -60,9 +77,6 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (i2i.cc != 0) { throw NotImplementedException("I2I CC"); } - if (i2i.sat != 0) { - throw NotImplementedException("I2I SAT"); - } if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { throw NotImplementedException("16-bit source format incompatible with selector {}", i2i.selector); @@ -75,15 +89,21 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { const s32 selector{static_cast(i2i.selector)}; const IR::U32 offset{v.ir.Imm32(selector * 8)}; const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; - IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, i2i.src_fmt_sign != 0)}; - if (i2i.abs) { + const bool src_signed{i2i.src_fmt_sign != 0}; + const bool dst_signed{i2i.dst_fmt_sign != 0}; + const bool sat{i2i.sat != 0}; + + IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; + if (i2i.abs != 0) { src_values = v.ir.IAbs(src_values); } - if (i2i.neg) { + if (i2i.neg != 0) { src_values = v.ir.INeg(src_values); } + const IR::U32 result{ + sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) + : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; - const IR::U32 result{ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; v.X(i2i.dest_reg, result); } } // Anonymous namespace -- cgit v1.2.3 From 73af0d2e0d12d94b1d2dc8c0b448d0769cf111f4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 21:33:52 -0400 Subject: shader: Implement I2I CC --- .../maxwell/translate/impl/integer_to_integer_conversion.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index 98b7f59f7..2f1a58805 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -74,9 +74,6 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<50, 1, u64> sat; } const i2i{insn}; - if (i2i.cc != 0) { - throw NotImplementedException("I2I CC"); - } if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { throw NotImplementedException("16-bit source format incompatible with selector {}", i2i.selector); @@ -105,6 +102,10 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; v.X(i2i.dest_reg, result); + if (i2i.cc != 0) { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } } } // Anonymous namespace -- cgit v1.2.3 From 39a379632ea9f5eec9877b53668ebf385d0520bf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 01:16:16 -0300 Subject: shader: Fix alignment checks on RZ --- src/shader_recompiler/frontend/ir/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 3845ec5fb..a4b635792 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -309,7 +309,7 @@ constexpr Reg operator++(Reg& reg, int) { } [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { - return (RegIndex(reg) / align) * align == RegIndex(reg); + return RegIndex(reg) % align == 0 || reg == Reg::RZ; } } // namespace Shader::IR -- cgit v1.2.3 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 +++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../frontend/maxwell/control_flow.cpp | 58 +++++++++-- .../frontend/maxwell/control_flow.h | 7 +- .../maxwell/indirect_branch_table_track.cpp | 108 +++++++++++++++++++++ .../frontend/maxwell/indirect_branch_table_track.h | 28 ++++++ .../frontend/maxwell/instruction.h | 1 + .../frontend/maxwell/structured_control_flow.cpp | 57 +++++++++++ .../maxwell/translate/impl/branch_indirect.cpp | 36 +++++++ .../maxwell/translate/impl/load_constant.cpp | 29 +----- .../maxwell/translate/impl/load_constant.h | 39 ++++++++ .../maxwell/translate/impl/not_implemented.cpp | 8 -- 14 files changed, 347 insertions(+), 44 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9b898e4e1..552472487 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -87,6 +87,10 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::Unreachable() { + Inst(Opcode::Unreachable); +} + void IREmitter::DemoteToHelperInvocation(Block* continue_label) { block->SetBranch(continue_label); continue_label->AddImmediatePredecessor(block); @@ -126,6 +130,14 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) { Inst(Opcode::SetGotoVariable, id, value); } +U32 IREmitter::GetIndirectBranchVariable() { + return Inst(Opcode::GetIndirectBranchVariable); +} + +void IREmitter::SetIndirectBranchVariable(const U32& value) { + Inst(Opcode::SetIndirectBranchVariable, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 269f367a4..17bc32fc8 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -37,6 +37,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void Unreachable(); void DemoteToHelperInvocation(Block* continue_label); void Prologue(); @@ -51,6 +52,9 @@ public: [[nodiscard]] U1 GetGotoVariable(u32 id); void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetIndirectBranchVariable(); + void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 52a5e5034..c3ba6b522 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,6 +55,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Prologue: case Opcode::Epilogue: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9b050995b..fb79e3d8d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) // Special operations @@ -26,6 +27,8 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetIndirectBranchVariable, U32, ) +OPCODE(SetIndirectBranchVariable, Void, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 4f6707fae..1e9b8e426 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -14,6 +14,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { @@ -252,9 +253,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Opcode opcode{Decode(inst.raw)}; switch (opcode) { case Opcode::BRA: - case Opcode::BRX: case Opcode::JMP: - case Opcode::JMX: case Opcode::RET: if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; @@ -264,10 +263,6 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::JMP: AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); break; - case Opcode::BRX: - case Opcode::JMX: - AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); - break; case Opcode::RET: block->end_class = EndClass::Return; break; @@ -302,6 +297,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::SSY: block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; + case Opcode::BRX: + case Opcode::JMX: + return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); case Opcode::PRET: @@ -407,8 +405,46 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { - throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id) { + const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + if (!brx_table) { + TrackIndirectBranchTable(env, pc, block->begin); + throw NotImplementedException("Failed to track indirect branch"); + } + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { + throw NotImplementedException("Conditional indirect branch"); + } + std::vector targets; + targets.reserve(brx_table->num_entries); + for (u32 i = 0; i < brx_table->num_entries; ++i) { + u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; + if (!is_absolute) { + target += pc.Offset(); + } + target += brx_table->branch_offset; + target += 8; + targets.push_back(target); + } + std::ranges::sort(targets); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + + block->indirect_branches.reserve(targets.size()); + for (const u32 target : targets) { + Block* const branch{AddLabel(block, block->stack, target, function_id)}; + block->indirect_branches.push_back(branch); + } + block->cond = IR::Condition{true}; + block->end = pc + 1; + block->end_class = EndClass::IndirectBranch; + block->branch_reg = brx_table->branch_reg; + block->branch_offset = brx_table->branch_offset + 8; + if (!is_absolute) { + block->branch_offset += pc.Offset(); + } + return AnalysisState::Branch; } CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, @@ -449,7 +485,6 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function // Block already exists and it has been visited return &*it; } - // TODO: FIX DANGLING BLOCKS Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, @@ -494,6 +529,11 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::IndirectBranch: + for (Block* const branch : block.indirect_branches) { + add_branch(branch, false); + } + break; case EndClass::Call: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 22f134194..1e05fcb97 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -26,6 +26,7 @@ using FunctionId = size_t; enum class EndClass { Branch, + IndirectBranch, Call, Exit, Return, @@ -76,11 +77,14 @@ struct Block : boost::intrusive::set_base_hook< union { Block* branch_true; FunctionId function_call; + IR::Reg branch_reg; }; union { Block* branch_false; Block* return_block; + s32 branch_offset; }; + std::vector indirect_branches; }; struct Label { @@ -139,7 +143,8 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..96453509d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +namespace { +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 19, u64> immediate; + BitField<56, 1, u64> is_negative; + BitField<20, 24, s64> brx_offset; +}; + +template +std::optional Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { + while (pos >= block_begin) { + const u64 insn{env.ReadInstruction(pos.Offset())}; + --pos; + if (func(insn, Decode(insn))) { + return insn; + } + } + return std::nullopt; +} + +std::optional TrackLDC(Environment& env, Location block_begin, Location& pos, + IR::Reg brx_reg) { + return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { + const LDC::Encoding ldc{insn}; + return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && + ldc.mode == LDC::Mode::Default; + }); +} + +std::optional TrackSHL(Environment& env, Location block_begin, Location& pos, + IR::Reg ldc_reg) { + return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { + const Encoding shl{insn}; + return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; + }); +} + +std::optional TrackIMNMX(Environment& env, Location block_begin, Location& pos, + IR::Reg shl_reg) { + return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { + const Encoding imnmx{insn}; + return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; + }); +} +} // Anonymous namespace + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin) { + const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; + const Opcode brx_opcode{Decode(brx_insn)}; + if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { + throw LogicError("Tracked instruction is not BRX or JMX"); + } + const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; + const s32 brx_offset{static_cast(Encoding{brx_insn}.brx_offset)}; + + Location pos{brx_pos}; + const std::optional ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; + if (!ldc_insn) { + return std::nullopt; + } + const LDC::Encoding ldc{*ldc_insn}; + const u32 cbuf_index{static_cast(ldc.index)}; + const u32 cbuf_offset{static_cast(static_cast(ldc.offset.Value()))}; + const IR::Reg ldc_reg{ldc.src_reg}; + + const std::optional shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; + if (!shl_insn) { + return std::nullopt; + } + const Encoding shl{*shl_insn}; + const IR::Reg shl_reg{shl.src_reg}; + + const std::optional imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; + if (!imnmx_insn) { + return std::nullopt; + } + const Encoding imnmx{*imnmx_insn}; + if (imnmx.is_negative != 0) { + return std::nullopt; + } + const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; + return IndirectBranchTableInfo{ + .cbuf_index{cbuf_index}, + .cbuf_offset{cbuf_offset}, + .num_entries{imnmx_immediate + 1}, + .branch_offset{brx_offset}, + .branch_reg{brx_reg}, + }; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell { + +struct IndirectBranchTableInfo { + u32 cbuf_index{}; + u32 cbuf_offset{}; + u32 num_entries{}; + s32 branch_offset{}; + IR::Reg branch_reg{}; +}; + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h index 57fd531f2..743d68d61 100644 --- a/src/shader_recompiler/frontend/maxwell/instruction.h +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/reg.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 9d4688390..a6e55f61e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -17,6 +17,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/object_pool.h" @@ -46,12 +47,15 @@ enum class StatementType { Break, Return, Kill, + Unreachable, Function, Identity, Not, Or, SetVariable, + SetIndirectBranchVariable, Variable, + IndirectBranchCond, }; bool HasChildren(StatementType type) { @@ -72,12 +76,15 @@ struct Loop {}; struct Break {}; struct Return {}; struct Kill {}; +struct Unreachable {}; struct FunctionTag {}; struct Identity {}; struct Not {}; struct Or {}; struct SetVariable {}; +struct SetIndirectBranchVariable {}; struct Variable {}; +struct IndirectBranchCond {}; #ifdef _MSC_VER #pragma warning(push) @@ -96,6 +103,7 @@ struct Statement : ListBaseHook { : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} Statement(Kill) : type{StatementType::Kill} {} + Statement(Unreachable) : type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} @@ -103,7 +111,12 @@ struct Statement : ListBaseHook { : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + : branch_offset{branch_offset_}, + branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_) + : location{location_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -118,11 +131,14 @@ struct Statement : ListBaseHook { IR::Condition guest_cond; Statement* op; Statement* op_a; + u32 location; + s32 branch_offset; }; union { Statement* cond; Statement* op_b; u32 id; + IR::Reg branch_reg; }; Statement* up{}; StatementType type; @@ -141,6 +157,8 @@ std::string DumpExpr(const Statement* stmt) { return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); case StatementType::Variable: return fmt::format("goto_L{}", stmt->id); + case StatementType::IndirectBranchCond: + return fmt::format("(indirect_branch == {:x})", stmt->location); default: return ""; } @@ -182,14 +200,22 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { case StatementType::Kill: ret += fmt::format("{} kill;\n", indent); break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; case StatementType::SetVariable: ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); break; + case StatementType::SetIndirectBranchVariable: + ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, + stmt->branch_offset); + break; case StatementType::Function: case StatementType::Identity: case StatementType::Not: case StatementType::Or: case StatementType::Variable: + case StatementType::IndirectBranchCond: throw LogicError("Statement can't be printed"); } } @@ -417,6 +443,17 @@ private: } break; } + case Flow::EndClass::IndirectBranch: + root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, + block.branch_offset)); + for (Flow::Block* const branch : block.indirect_branches) { + const Node indirect_label{local_labels.at(branch)}; + Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + } + root.insert(ip, *pool.Create(Unreachable{})); + break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; const Node call_return_label{local_labels.at(block.return_block)}; @@ -623,6 +660,8 @@ IR::Block* TryFindForwardBlock(const Statement& stmt) { return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); case StatementType::Variable: return ir.GetGotoVariable(stmt.id); + case StatementType::IndirectBranchCond: + return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); default: throw NotImplementedException("Statement type {}", stmt.type); } @@ -670,6 +709,15 @@ private: ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } + case StatementType::SetIndirectBranchVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; + ir.SetIndirectBranchVariable(address); + break; + } case StatementType::If: { if (!current_block) { current_block = block_pool.Create(inst_pool); @@ -756,6 +804,15 @@ private: current_block = demote_block; break; } + case StatementType::Unreachable: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Unreachable(); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 39becf93c..49ccb7d62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -5,25 +5,11 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" namespace Shader::Maxwell { +using namespace LDC; namespace { -enum class Mode : u64 { - Default, - IL, - IS, - ISL, -}; - -enum class Size : u64 { - U8, - S8, - U16, - S16, - B32, - B64, -}; - std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, const IR::U32& reg, const IR::U32& imm) { switch (mode) { @@ -37,16 +23,7 @@ std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& im } // Anonymous namespace void TranslatorVisitor::LDC(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_reg; - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - BitField<44, 2, Mode> mode; - BitField<48, 3, Size> size; - } const ldc{insn}; - + const Encoding ldc{insn}; const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; const IR::U32 reg{X(ldc.src_reg)}; const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b62d8ee2a..a0057a473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -53,10 +53,6 @@ void TranslatorVisitor::BRK(u64) { ThrowNotImplemented(Opcode::BRK); } -void TranslatorVisitor::BRX(u64) { - ThrowNotImplemented(Opcode::BRX); -} - void TranslatorVisitor::CAL() { // CAL is a no-op } @@ -181,10 +177,6 @@ void TranslatorVisitor::JMP(u64) { ThrowNotImplemented(Opcode::JMP); } -void TranslatorVisitor::JMX(u64) { - ThrowNotImplemented(Opcode::JMX); -} - void TranslatorVisitor::KIL() { // KIL is a no-op } -- cgit v1.2.3 From 6c51f496320f698e123207c09ca61e55180a31b5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 22:23:45 -0400 Subject: shader: Implement FSWZADD --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 3 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../translate/impl/floating_point_swizzled_add.cpp | 44 ++++++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 4 ++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 -- 7 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 552472487..505fba46a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons const IR::U32& seg_mask) { return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); } +F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { + return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); +} } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 17bc32fc8..8f3325738 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -277,6 +277,8 @@ public: const IR::U32& seg_mask); [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask); + [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, + FpControl control = {}); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index fb79e3d8d..717aa71ca 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -408,3 +408,4 @@ OPCODE(ShuffleIndex, U32, U32, OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) +OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..e42921a21 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::FSWZADD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<28, 8, u64> swizzle; + BitField<38, 1, u64> ndv; + BitField<39, 2, FpRounding> round; + BitField<44, 1, u64> ftz; + BitField<47, 1, u64> cc; + } const fswzadd{insn}; + + if (fswzadd.ndv != 0) { + throw NotImplementedException("FSWZADD NDV"); + } + + const IR::F32 src_a{GetFloatReg8(insn)}; + const IR::F32 src_b{GetFloatReg20(insn)}; + const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{CastFpRounding(fswzadd.round)}, + .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; + F(fswzadd.dest_reg, result); + + if (fswzadd.cc != 0) { + throw NotImplementedException("FSWZADD CC"); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 9bae89c10..30b570ce4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { + return ir.BitCast(GetReg8(insn)); +} + IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 54c31deb4..bf7d1bae8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -353,6 +353,7 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0057a473..6a580f831 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSWZADD(u64) { - ThrowNotImplemented(Opcode::FSWZADD); -} - void TranslatorVisitor::GETCRSPTR(u64) { ThrowNotImplemented(Opcode::GETCRSPTR); } -- cgit v1.2.3 From 12783f8105e06c8aebcfccf29921441552b991f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 16:58:42 -0300 Subject: shader: Add missing new lines --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 505fba46a..6e7dddead 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1602,7 +1602,9 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons const IR::U32& seg_mask) { return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); } + F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); } + } // namespace Shader::IR -- cgit v1.2.3 From b0d5572abfe1f14e02d8219f0a4d7dd09ff36fd1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 22:13:37 -0300 Subject: shader: Fix indirect branches to scheduler instructions --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 9 ++++++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 9 ++++++++- .../frontend/maxwell/structured_control_flow.cpp | 6 +++--- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1e9b8e426..784f9df8a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -434,7 +434,10 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, block->indirect_branches.reserve(targets.size()); for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; - block->indirect_branches.push_back(branch); + block->indirect_branches.push_back({ + .block{branch}, + .address{target}, + }); } block->cond = IR::Condition{true}; block->end = pc + 1; @@ -530,8 +533,8 @@ std::string CFG::Dot() const { } break; case EndClass::IndirectBranch: - for (Block* const branch : block.indirect_branches) { - add_branch(branch, false); + for (const IndirectBranch& branch : block.indirect_branches) { + add_branch(branch.block, false); } break; case EndClass::Call: diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 1e05fcb97..a8c90d27a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -22,6 +22,8 @@ namespace Shader::Maxwell::Flow { +struct Block; + using FunctionId = size_t; enum class EndClass { @@ -60,6 +62,11 @@ private: boost::container::small_vector entries; }; +struct IndirectBranch { + Block* block; + u32 address; +}; + struct Block : boost::intrusive::set_base_hook< // Normal link is ~2.5% faster compared to safe link boost::intrusive::link_mode> { @@ -84,7 +91,7 @@ struct Block : boost::intrusive::set_base_hook< Block* return_block; s32 branch_offset; }; - std::vector indirect_branches; + std::vector indirect_branches; }; struct Label { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index a6e55f61e..c804c2a8e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -446,9 +446,9 @@ private: case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, block.branch_offset)); - for (Flow::Block* const branch : block.indirect_branches) { - const Node indirect_label{local_labels.at(branch)}; - Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + for (const Flow::IndirectBranch& indirect : block.indirect_branches) { + const Node indirect_label{local_labels.at(indirect.block)}; + Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } -- cgit v1.2.3 From 514a6b07eedace58b4a0c95282bdfc729623d1d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 03:19:50 -0300 Subject: shader: Store type of phi nodes in flags This is needed because pseudo-instructions where invalidated. --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ++++ src/shader_recompiler/frontend/ir/value.cpp | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index c3ba6b522..074c71d53 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -193,6 +193,10 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } + if (Flags() == IR::Type::Void) { + // Set the type of the phi node + SetFlags(value.Type()); + } phi_args.emplace_back(predecessor, value); } diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index e8e4662e7..837c1b487 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -56,7 +56,11 @@ bool Value::IsLabel() const noexcept { } IR::Type Value::Type() const noexcept { - if (IsIdentity() || IsPhi()) { + if (IsPhi()) { + // The type of a phi node is stored in its flags + return inst->Flags(); + } + if (IsIdentity()) { return inst->Arg(0).Type(); } if (type == Type::Opaque) { -- cgit v1.2.3 From dc1a9a3bed2aa9b0851f07976b0c687172aa3edc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 20:51:05 +0100 Subject: shader: Implement TLD --- src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_load.cpp | 165 +++++++++++++++++++++ 4 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 717aa71ca..302b8471d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -378,7 +378,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -387,7 +387,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -396,7 +396,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) // Warp operations diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index d668dc1aa..b47fb9c2e 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -252,8 +252,8 @@ INST(SYNC, "SYNC", "1111 0000 1111 1---") INST(TEX, "TEX", "1100 0--- ---- ----") INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") -INST(TLD, "TLD", "1101 1100 --11 1---") -INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD, "TLD", "1101 1100 ---- ----") +INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") INST(TLD4, "TLD4", "1100 10-- ---- ----") INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6a580f831..60d61ec6e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLD(u64) { - ThrowNotImplemented(Opcode::TLD); -} - -void TranslatorVisitor::TLD_b(u64) { - ThrowNotImplemented(Opcode::TLD_b); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..b4063fa6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{ + [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; + switch (type) { + case TextureType::_1D: + return v.X(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<55, 1, u64> lod; + BitField<50, 1, u64> multisample; + BitField<35, 1, u64> aoffi; + BitField<54, 1, u64> clamp; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld{insn}; + + const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; + + IR::Reg meta_reg{tld.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::U32 lod; + IR::U32 multisample; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + if (tld.lod != 0) { + lod = v.X(meta_reg++); + } + if (tld.aoffi != 0) { + offset = MakeOffset(v, meta_reg, tld.type); + } + if (tld.multisample != 0) { + multisample = v.X(meta_reg++); + } + if (tld.clamp != 0) { + throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld.type, false)); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); + }()}; + + IR::Reg dest_reg{tld.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TLD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 2c276ec6ebff55fb97262ccb50d1ab6a04b3c06a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 01:45:20 +0100 Subject: shader: Implement TLDS --- .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/texture_load_swizzled.cpp | 252 +++++++++++++++++++++ 2 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 60d61ec6e..7e1ad63e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,10 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLDS(u64) { - ThrowNotImplemented(Opcode::TLDS); -} - void TranslatorVisitor::TMML(u64) { ThrowNotImplemented(Opcode::TMML); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..3e6ebd911 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -0,0 +1,252 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Precision : u64 { + F16, + F32, +}; + +constexpr unsigned R = 1; +constexpr unsigned G = 2; +constexpr unsigned B = 4; +constexpr unsigned A = 8; + +constexpr std::array RG_LUT{ + R, // + G, // + B, // + A, // + R | G, // + R | A, // + G | A, // + B | A, // +}; + +constexpr std::array RGBA_LUT{ + R | G | B, // + R | G | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +union Encoding { + u64 raw; + BitField<59, 1, Precision> precision; + BitField<54, 1, u64> aoffi; + BitField<53, 1, u64> lod; + BitField<55, 1, u64> ms; + BitField<49, 1, u64> nodep; + BitField<28, 8, IR::Reg> dest_reg_b; + BitField<0, 8, IR::Reg> dest_reg_a; + BitField<8, 8, IR::Reg> src_reg_a; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<36, 13, u64> cbuf_offset; + BitField<50, 3, u64> swizzle; + BitField<53, 4, u64> encoding; +}; + +void CheckAlignment(IR::Reg reg, int alignment) { + if (!IR::IsAligned(reg, alignment)) { + throw NotImplementedException("Unaligned source register {}", reg); + } +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { + const IR::U32 value{v.X(reg)}; + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); +} + +IR::Value Sample(TranslatorVisitor& v, u64 insn) { + const Encoding tlds{insn}; + const IR::U32 handle{v.ir.Imm32(static_cast(tlds.cbuf_offset * 4))}; + const IR::Reg reg_a{tlds.src_reg_a}; + const IR::Reg reg_b{tlds.src_reg_b}; + IR::Value coords; + IR::U32 lod; + IR::Value offsets; + IR::U32 multisample; + Shader::TextureType texture_type; + switch (tlds.encoding) { + case 0: { + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + break; + } + case 1: { + texture_type = Shader::TextureType::Color1D; + coords = v.X(reg_a); + lod = v.X(reg_b); + break; + } + case 2: { + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); + break; + } + case 4: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + offsets = MakeOffset(v, reg_b); + break; + } + case 5: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + break; + } + case 6: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + multisample = v.X(reg_b); + break; + } + case 7: { + CheckAlignment(reg_a, 2); + texture_type = Shader::TextureType::Color3D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); + break; + } + case 8: { + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::ColorArray2D; + IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); + coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); + break; + } + case 12: { + CheckAlignment(reg_a, 2); + CheckAlignment(reg_b, 2); + texture_type = Shader::TextureType::Color2D; + coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); + lod = v.X(reg_b); + offsets = MakeOffset(v, reg_b + 1); + break; + } + default: { + throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); + break; + } + } + IR::TextureInstInfo info{}; + if (tlds.precision == Precision::F16) { + info.relaxed_precision.Assign(1); + } + info.type.Assign(texture_type); + return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); +} + +unsigned Swizzle(u64 insn) { + const Encoding tlds{insn}; + const size_t encoding{tlds.swizzle}; + if (tlds.dest_reg_b == IR::Reg::RZ) { + if (encoding >= RG_LUT.size()) { + throw NotImplementedException("Illegal RG encoding {}", encoding); + } + return RG_LUT[encoding]; + } else { + if (encoding >= RGBA_LUT.size()) { + throw NotImplementedException("Illegal RGBA encoding {}", encoding); + } + return RGBA_LUT[encoding]; + } +} + +IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { + return IR::F32{v.ir.CompositeExtract(sample, component)}; +} + +IR::Reg RegStoreComponent32(u64 insn, unsigned index) { + const Encoding tlds{insn}; + switch (index) { + case 0: + return tlds.dest_reg_a; + case 1: + CheckAlignment(tlds.dest_reg_a, 2); + return tlds.dest_reg_a + 1; + case 2: + return tlds.dest_reg_b; + case 3: + CheckAlignment(tlds.dest_reg_b, 2); + return tlds.dest_reg_b + 1; + } + throw LogicError("Invalid store index {}", index); +} + +void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + const IR::Reg dest{RegStoreComponent32(insn, store_index)}; + v.F(dest, Extract(v, sample, component)); + ++store_index; + } +} + +IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { + return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); +} + +void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { + const unsigned swizzle{Swizzle(insn)}; + unsigned store_index{0}; + std::array swizzled; + for (unsigned component = 0; component < 4; ++component) { + if (((swizzle >> component) & 1) == 0) { + continue; + } + swizzled[store_index] = Extract(v, sample, component); + ++store_index; + } + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const Encoding tlds{insn}; + switch (store_index) { + case 1: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); + break; + case 2: + case 3: + case 4: + v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); + switch (store_index) { + case 2: + break; + case 3: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); + break; + case 4: + v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); + break; + } + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLDS(u64 insn) { + const IR::Value sample{Sample(*this, insn)}; + if (Encoding{insn}.precision == Precision::F32) { + Store32(*this, insn, sample); + } else { + Store16(*this, insn, sample); + } +} +} // namespace Shader::Maxwell -- cgit v1.2.3 From 613b48c4a2ce71a0d0eaba17fe164f4a2e4a3db5 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Mar 2021 19:47:52 +0200 Subject: shader,spirv: Implement ImageQueryLod. --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 3 +++ 3 files changed, 11 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6e7dddead..ba9591727 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1567,6 +1567,12 @@ Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { return Inst(op, handle, lod); } +Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod + : Opcode::BindlessImageQueryLod}; + return Inst(op, handle, coords); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8f3325738..9e752b208 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -255,6 +255,8 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); + [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords); + [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 302b8471d..49cdcd57f 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -380,6 +380,7 @@ OPCODE(BindlessImageGather, F32x4, U32, OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -389,6 +390,7 @@ OPCODE(BoundImageGather, F32x4, U32, OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -398,6 +400,7 @@ OPCODE(ImageGather, F32x4, U32, OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) +OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) // Warp operations OPCODE(VoteAll, U1, U1, ) -- cgit v1.2.3 From be3e94ae55184933e0f1f5fb55698513f7936382 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 28 Mar 2021 21:25:08 +0200 Subject: shader: Implement TMML partially --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 +- .../maxwell/translate/impl/not_implemented.cpp | 8 -- .../translate/impl/texture_mipmap_level.cpp | 130 +++++++++++++++++++++ 4 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ba9591727..f6818ec8a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1567,10 +1567,10 @@ Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { return Inst(op, handle, lod); } -Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords) { +Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod : Opcode::BindlessImageQueryLod}; - return Inst(op, handle, coords); + return Inst(op, Flags{info}, handle, coords); } U1 IREmitter::VoteAll(const U1& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 9e752b208..2beeace8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -255,7 +255,8 @@ public: TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); - [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords); + [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, + TextureInstInfo info); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 7e1ad63e1..9f5ea7775 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TMML(u64) { - ThrowNotImplemented(Opcode::TMML); -} - -void TranslatorVisitor::TMML_b(u64) { - ThrowNotImplemented(Opcode::TMML_b); -} - void TranslatorVisitor::TXA(u64) { ThrowNotImplemented(Opcode::TXA); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..ee13ede30 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -0,0 +1,130 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> ndv; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tmml{insn}; + + if ((tmml.mask & 0xC) != 0) { + throw NotImplementedException("TMML BA results are not implmented"); + } + + IR::F32 transform_constant = v.ir.Imm32(256.0f); + + const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; + + IR::U32 handle; + IR::Reg meta_reg{tmml.meta_reg}; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tmml.type, false)); + const IR::Value sample{ + [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RP}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + IR::Reg dest_reg{tmml.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tmml.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; + v.F(dest_reg, + element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TMML(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TMML_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From d5bfc630886d98ed77959a9771c67293244aff0e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 02:00:43 +0200 Subject: shader: Implement ImageGradient --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 7 +++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++++ src/shader_recompiler/frontend/ir/modifiers.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 +++ 4 files changed, 15 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f6818ec8a..edf8c05d4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1573,6 +1573,13 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture return Inst(op, Flags{info}, handle, coords); } +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, + const Value& offset, const F32& lod_clamp, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient + : Opcode::BindlessImageGradient}; + return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2beeace8f..a4616e247 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -268,6 +268,10 @@ public: [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); + [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, + const Value& derivates, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 4f09a4b39..90078f535 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -39,6 +39,7 @@ union TextureInstInfo { BitField<9, 1, u32> has_lod_clamp; BitField<10, 1, u32> relaxed_precision; BitField<11, 2, u32> gather_component; + BitField<13, 2, u32> num_derivates; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 49cdcd57f..79baacd08 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -381,6 +381,7 @@ OPCODE(BindlessImageGatherDref, F32x4, U32, OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -391,6 +392,7 @@ OPCODE(BoundImageGatherDref, F32x4, U32, OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -401,6 +403,7 @@ OPCODE(ImageGatherDref, F32x4, U32, OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) +OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, F32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) -- cgit v1.2.3 From 630273b6295f524401abf1c131dba09fdd055911 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 29 Mar 2021 02:52:52 +0200 Subject: shader: Implement TXD --- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_gradient.cpp | 180 +++++++++++++++++++++ 3 files changed, 182 insertions(+), 10 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index b47fb9c2e..c759bd4d4 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -261,8 +261,8 @@ INST(TLDS, "TLDS", "1101 -01- ---- ----") INST(TMML, "TMML", "1101 1111 0101 1---") INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") INST(TXA, "TXA", "1101 1111 0100 0---") -INST(TXD, "TXD", "1101 1110 0011 10--") -INST(TXD_b, "TXD (b)", "1101 1110 0111 10--") +INST(TXD, "TXD", "1101 1110 00-- ----") +INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") INST(TXQ, "TXQ", "1101 1111 0100 1---") INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9f5ea7775..ba526817a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -317,14 +317,6 @@ void TranslatorVisitor::TXA(u64) { ThrowNotImplemented(Opcode::TXA); } -void TranslatorVisitor::TXD(u64) { - ThrowNotImplemented(Opcode::TXD); -} - -void TranslatorVisitor::TXD_b(u64) { - ThrowNotImplemented(Opcode::TXD_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..00768e167 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -0,0 +1,180 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { + const IR::U32 value{v.X(reg)}; + const u32 base = has_lod_clamp ? 12 : 16; + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<35, 1, u64> aoffi; + BitField<50, 1, u64> lc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> derivate_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const txd{insn}; + + const bool has_lod_clamp = txd.lc != 0; + if (has_lod_clamp) { + throw NotImplementedException("TXD.LC - CLAMP is not implemented"); + } + + IR::Value coords; + u32 num_derivates; + IR::Reg base_reg = txd.coord_reg; + IR::Reg last_reg; + IR::Value handle; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); + } else { + handle = v.X(base_reg++); + } + + const auto read_array{[&]() -> IR::F32 { + return v.ir.ConvertUToF(32, 16, + v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), + v.ir.Imm32(has_lod_clamp ? 12 : 16))); + }}; + switch (txd.type) { + case TextureType::_1D: { + coords = v.F(base_reg); + num_derivates = 1; + last_reg = base_reg + 1; + break; + } + case TextureType::ARRAY_1D: { + last_reg = base_reg + 1; + coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); + num_derivates = 1; + break; + } + case TextureType::_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); + num_derivates = 2; + break; + } + case TextureType::ARRAY_2D: { + last_reg = base_reg + 2; + coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); + num_derivates = 2; + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + const IR::Reg derivate_reg{txd.derivate_reg}; + IR::Value derivates; + switch (num_derivates) { + case 1: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); + break; + } + case 2: { + derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), + v.F(derivate_reg + 2), v.F(derivate_reg + 3)); + break; + } + default: + throw NotImplementedException("Invalid texture type"); + } + + IR::Value offset; + if (txd.aoffi != 0) { + offset = MakeOffset(v, last_reg, has_lod_clamp); + } + + IR::F32 lod_clamp; + if (has_lod_clamp) { + const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); + const IR::F32 tmp = v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); + lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + } + + IR::TextureInstInfo info{}; + info.type.Assign(GetType(txd.type, false)); + info.num_derivates.Assign(num_derivates); + info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); + }()}; + + IR::Reg dest_reg{txd.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((txd.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (txd.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TXD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From cb6fc03e55e9eff0826173a6bcacef3034322f7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 29 Mar 2021 01:08:25 -0300 Subject: shader: Always pass a lod for TexelFetch --- src/shader_recompiler/frontend/ir/opcodes.inc | 6 ++-- .../maxwell/translate/impl/texture_load.cpp | 2 ++ .../translate/impl/texture_load_swizzled.cpp | 34 ++++++++-------------- 3 files changed, 17 insertions(+), 25 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 79baacd08..e82db0cd2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -378,7 +378,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) @@ -389,7 +389,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) @@ -400,7 +400,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, F32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index b4063fa6e..df38f87a3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -124,6 +124,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } if (tld.lod != 0) { lod = v.X(meta_reg++); + } else { + lod = v.ir.Imm32(0U); } if (tld.aoffi != 0) { offset = MakeOffset(v, meta_reg, tld.type); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 3e6ebd911..623b8fc23 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -74,62 +74,55 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { const IR::Reg reg_a{tlds.src_reg_a}; const IR::Reg reg_b{tlds.src_reg_b}; IR::Value coords; - IR::U32 lod; + IR::U32 lod{v.ir.Imm32(0U)}; IR::Value offsets; IR::U32 multisample; - Shader::TextureType texture_type; + Shader::TextureType texture_type{}; switch (tlds.encoding) { - case 0: { + case 0: texture_type = Shader::TextureType::Color1D; coords = v.X(reg_a); break; - } - case 1: { + case 1: texture_type = Shader::TextureType::Color1D; coords = v.X(reg_a); lod = v.X(reg_b); break; - } - case 2: { + case 2: texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); break; - } - case 4: { + case 4: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); offsets = MakeOffset(v, reg_b); break; - } - case 5: { + case 5: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); lod = v.X(reg_b); break; - } - case 6: { + case 6: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color2D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); multisample = v.X(reg_b); break; - } - case 7: { + case 7: CheckAlignment(reg_a, 2); texture_type = Shader::TextureType::Color3D; coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); break; - } case 8: { CheckAlignment(reg_b, 2); + const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; texture_type = Shader::TextureType::ColorArray2D; - IR::U32 array = v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16)); coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); break; } - case 12: { + case 12: CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); texture_type = Shader::TextureType::Color2D; @@ -137,11 +130,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { lod = v.X(reg_b); offsets = MakeOffset(v, reg_b + 1); break; - } - default: { + default: throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); - break; - } } IR::TextureInstInfo info{}; if (tlds.precision == Precision::F16) { -- cgit v1.2.3 From 4d0d29fc2092bf02e102b8bac9cfa1b509274901 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 08:41:21 +0200 Subject: shader: Address feedback --- .../maxwell/translate/impl/texture_gradient.cpp | 34 ++++++++++++---------- .../maxwell/translate/impl/texture_load.cpp | 10 +++---- .../translate/impl/texture_mipmap_level.cpp | 26 +++++++---------- 3 files changed, 33 insertions(+), 37 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index 00768e167..c66468a48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -47,7 +47,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { const IR::U32 value{v.X(reg)}; - const u32 base = has_lod_clamp ? 12 : 16; + const u32 base{has_lod_clamp ? 12U : 16U}; return v.ir.CompositeConstruct( v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); @@ -74,20 +74,21 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::Value coords; - u32 num_derivates; - IR::Reg base_reg = txd.coord_reg; + u32 num_derivates{}; + IR::Reg base_reg{txd.coord_reg}; IR::Reg last_reg; IR::Value handle; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(base_reg++); + } else { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); } const auto read_array{[&]() -> IR::F32 { - return v.ir.ConvertUToF(32, 16, - v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), - v.ir.Imm32(has_lod_clamp ? 12 : 16))); + const IR::U32 base{v.ir.Imm32(0)}; + const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; + const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; + return v.ir.ConvertUToF(32, 16, array_index); }}; switch (txd.type) { case TextureType::_1D: { @@ -141,19 +142,20 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::F32 lod_clamp; if (has_lod_clamp) { - const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); - const IR::F32 tmp = v.ir.ConvertUToF( - 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); - lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. + // to convert a fixed point, float(value) / float(1 << fixed_point) + // in this case the fixed_point is 8. + const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast(1U << 8))}; + const IR::F32 fixp_lc{v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; + lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); } IR::TextureInstInfo info{}; info.type.Assign(GetType(txd.type, false)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); - }()}; + const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; IR::Reg dest_reg{txd.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index df38f87a3..987b7ec34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -117,10 +117,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::Value offset; IR::U32 lod; IR::U32 multisample; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); } if (tld.lod != 0) { lod = v.X(meta_reg++); @@ -138,9 +138,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; info.type.Assign(GetType(tld.type, false)); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); - }()}; + const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index ee13ede30..b6efc04f0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -81,39 +81,35 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { BitField<36, 13, u64> cbuf_offset; } const tmml{insn}; - if ((tmml.mask & 0xC) != 0) { + if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - IR::F32 transform_constant = v.ir.Imm32(256.0f); + IR::F32 transform_constant{v.ir.Imm32(256.0f)}; const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; IR::Reg meta_reg{tmml.meta_reg}; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; info.type.Assign(GetType(tmml.type, false)); - const IR::Value sample{ - [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; - const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RP}, - .fmz_mode{IR::FmzMode::FTZ}, - }; IR::Reg dest_reg{tmml.dest_reg}; for (size_t element = 0; element < 4; ++element) { if (((tmml.mask >> element) & 1) == 0) { continue; } - IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; - v.F(dest_reg, - element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + IR::F32 value{v.ir.CompositeExtract(sample, element)}; + if (element < 2) { + value = v.ir.FPMul(value, transform_constant); + } + v.F(dest_reg, value); ++dest_reg; } } -- cgit v1.2.3 From 67afdaf56622d9e4129dfae42abfa743a4b025d4 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 19:20:59 +0200 Subject: shader: Fix TXD --- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e82db0cd2..ffd0cc690 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -403,7 +403,7 @@ OPCODE(ImageGatherDref, F32x4, U32, OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) -OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) // Warp operations OPCODE(VoteAll, U1, U1, ) -- cgit v1.2.3 From c826220733678198e9aef328a9808b062b06c5df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 01:06:17 -0300 Subject: shader: Unroll "using enum" for opcode declarations --- src/shader_recompiler/frontend/ir/opcodes.cpp | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 8492a13d5..1cb9db6c9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -17,7 +17,33 @@ struct OpcodeMeta { std::array arg_types; }; -using enum Type; +// using enum Type; +constexpr Type Void{Type::Void}; +constexpr Type Opaque{Type::Opaque}; +constexpr Type Label{Type::Label}; +constexpr Type Reg{Type::Reg}; +constexpr Type Pred{Type::Pred}; +constexpr Type Attribute{Type::Attribute}; +constexpr Type U1{Type::U1}; +constexpr Type U8{Type::U8}; +constexpr Type U16{Type::U16}; +constexpr Type U32{Type::U32}; +constexpr Type U64{Type::U64}; +constexpr Type F16{Type::F16}; +constexpr Type F32{Type::F32}; +constexpr Type F64{Type::F64}; +constexpr Type U32x2{Type::U32x2}; +constexpr Type U32x3{Type::U32x3}; +constexpr Type U32x4{Type::U32x4}; +constexpr Type F16x2{Type::F16x2}; +constexpr Type F16x3{Type::F16x3}; +constexpr Type F16x4{Type::F16x4}; +constexpr Type F32x2{Type::F32x2}; +constexpr Type F32x3{Type::F32x3}; +constexpr Type F32x4{Type::F32x4}; +constexpr Type F64x2{Type::F64x2}; +constexpr Type F64x3{Type::F64x3}; +constexpr Type F64x4{Type::F64x4}; constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ -- cgit v1.2.3 From eaafd53cfedf0c7ae40a3f790af5f0aec63ebd13 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 31 Mar 2021 19:46:10 -0300 Subject: shader: Implement LDG .U.128 as .128 --- .../frontend/maxwell/translate/impl/load_store_memory.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 748b856c9..71688b1d7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -118,7 +118,8 @@ void TranslatorVisitor::LDG(u64 insn) { } break; } - case LoadSize::B128: { + case LoadSize::B128: + case LoadSize::U128: { if (!IR::IsAligned(dest_reg, 4)) { throw NotImplementedException("Unaligned data registers"); } @@ -128,8 +129,6 @@ void TranslatorVisitor::LDG(u64 insn) { } break; } - case LoadSize::U128: - throw NotImplementedException("LDG U.128"); default: throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); } -- cgit v1.2.3 From b4a5e767d0a60d44c77460bd3a4062c5f69fb6c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 2 Apr 2021 01:17:47 -0300 Subject: shader: Fix branches to visited virtual blocks --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 784f9df8a..ac8707847 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -486,6 +486,16 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { // Block already exists and it has been visited + if (function.blocks.begin() != it) { + // Check if the previous node is the virtual variant of the label + // This won't exist if a virtual node is not needed or it hasn't been visited + // If it hasn't been visited and a virtual node is needed, this will still behave as + // expected because the node impersonated with its virtual node. + const auto prev{std::prev(it)}; + if (it->begin.Virtual() == prev->begin) { + return &*prev; + } + } return &*it; } Block* const new_block{block_pool.Create(Block{ -- cgit v1.2.3 From 5ed8f2438498d3281c2ce8621869995de3908413 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 07:52:36 +0200 Subject: shader: Stub VOTE.VTG --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 7 +++++++ .../frontend/maxwell/translate/impl/move_special_register.cpp | 4 ++++ .../frontend/maxwell/translate/impl/not_implemented.cpp | 4 ---- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 4 ++++ 4 files changed, 15 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index edf8c05d4..5258ede09 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -256,6 +256,13 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + case FlowTest::CSM_TA: + case FlowTest::CSM_TR: + case FlowTest::CSM_MX: + case FlowTest::FCSM_TA: + case FlowTest::FCSM_TR: + case FlowTest::FCSM_MX: + return ir.Imm1(false); default: throw NotImplementedException("Flow test {}", flow_test); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 93cea302a..a295f4c5e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -95,6 +95,10 @@ enum class SpecialRegister : u64 { return ir.WorkgroupIdY(); case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); + case SpecialRegister::SR_WSCALEFACTOR_XY: + return ir.Imm32(Common::BitCast(1.0f)); + case SpecialRegister::SR_WSCALEFACTOR_Z: + return ir.Imm32(Common::BitCast(1.0f)); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba526817a..83ed0c0fd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -329,10 +329,6 @@ void TranslatorVisitor::VADD(u64) { ThrowNotImplemented(Opcode::VADD); } -void TranslatorVisitor::VOTE_vtg(u64) { - ThrowNotImplemented(Opcode::VOTE_vtg); -} - void TranslatorVisitor::VSET(u64) { ThrowNotImplemented(Opcode::VSET); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index a88894a7e..391520a18 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -49,4 +49,8 @@ void TranslatorVisitor::VOTE(u64 insn) { Vote(*this, insn); } +void TranslatorVisitor::VOTE_vtg(u64) { + // Stub +} + } // namespace Shader::Maxwell -- cgit v1.2.3 From ecb30c907266921818d5b6b03e341028fa2ea082 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 22:20:57 +0200 Subject: shader: Improve VOTE.VTG stub --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 37 ++++++++++++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 10 ++++++ src/shader_recompiler/frontend/ir/opcodes.inc | 8 +++++ .../frontend/maxwell/translate/impl/vote.cpp | 5 ++- 4 files changed, 57 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5258ede09..ddaa873f2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -198,6 +198,38 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } +U1 IREmitter::GetFCSMFlag() { + return Inst(Opcode::GetFCSMFlag); +} + +U1 IREmitter::GetTAFlag() { + return Inst(Opcode::GetTAFlag); +} + +U1 IREmitter::GetTRFlag() { + return Inst(Opcode::GetTRFlag); +} + +U1 IREmitter::GetMXFlag() { + return Inst(Opcode::GetMXFlag); +} + +void IREmitter::SetFCSMFlag(const U1& value) { + Inst(Opcode::SetFCSMFlag, value); +} + +void IREmitter::SetTAFlag(const U1& value) { + Inst(Opcode::SetTAFlag, value); +} + +void IREmitter::SetTRFlag(const U1& value) { + Inst(Opcode::SetTRFlag, value); +} + +void IREmitter::SetMXFlag(const U1& value) { + Inst(Opcode::SetMXFlag, value); +} + static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { switch (flow_test) { case FlowTest::F: @@ -256,13 +288,14 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); + + case FlowTest::FCSM_TR: + return ir.LogicalAnd(ir.GetFCSMFlag(), ir.GetTRFlag()); case FlowTest::CSM_TA: case FlowTest::CSM_TR: case FlowTest::CSM_MX: case FlowTest::FCSM_TA: - case FlowTest::FCSM_TR: case FlowTest::FCSM_MX: - return ir.Imm1(false); default: throw NotImplementedException("Flow test {}", flow_test); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index a4616e247..6e04eec7f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -70,6 +70,16 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); + [[nodiscard]] U1 GetFCSMFlag(); + [[nodiscard]] U1 GetTAFlag(); + [[nodiscard]] U1 GetTRFlag(); + [[nodiscard]] U1 GetMXFlag(); + + void SetFCSMFlag(const U1& value); + void SetTAFlag(const U1& value); + void SetTRFlag(const U1& value); + void SetMXFlag(const U1& value); + [[nodiscard]] U1 Condition(IR::Condition cond); [[nodiscard]] U1 GetFlowTestResult(FlowTest test); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ffd0cc690..702372775 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -46,10 +46,18 @@ OPCODE(GetZFlag, U1, Void OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) OPCODE(GetOFlag, U1, Void, ) +OPCODE(GetFCSMFlag, U1, Void, ) +OPCODE(GetTAFlag, U1, Void, ) +OPCODE(GetTRFlag, U1, Void, ) +OPCODE(GetMXFlag, U1, Void, ) OPCODE(SetZFlag, Void, U1, ) OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) +OPCODE(SetFCSMFlag, Void, U1, ) +OPCODE(SetTAFlag, Void, U1, ) +OPCODE(SetTRFlag, Void, U1, ) +OPCODE(SetMXFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 391520a18..2acabb662 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -50,7 +50,10 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // Stub + // LOG_WARNING("VOTE.VTG: Stubbed!"); + auto imm = ir.Imm1(false); + ir.SetFCSMFlag(imm); + ir.SetTRFlag(imm); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 655f7a570a10218ffb2ed175bb7f0b84530ccae0 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 19:27:30 +0200 Subject: shader: Implement MEMBAR --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 + src/shader_recompiler/frontend/ir/modifiers.h | 13 +++++ src/shader_recompiler/frontend/ir/opcodes.inc | 3 ++ .../maxwell/translate/impl/barrier_operations.cpp | 56 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 11 ----- 6 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ddaa873f2..2fd90303f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,10 @@ void IREmitter::SelectionMerge(Block* merge_block) { Inst(Opcode::SelectionMerge, merge_block); } +void IREmitter::MemoryBarrier(BarrierInstInfo info) { + Inst(Opcode::MemoryBarrier, Flags{info}); +} + void IREmitter::Return() { block->SetReturn(); Inst(Opcode::Return); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e04eec7f..5bebf66e3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -136,6 +136,8 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); + [[nodiscard]] void MemoryBarrier(BarrierInstInfo info); + template [[nodiscard]] Dest BitCast(const Source& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 90078f535..7730c25a9 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,6 +25,14 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; +enum class MemoryScope : u32 { + DontCare, + Warp, + Workgroup, + Device, + System +}; + struct FpControl { bool no_contraction{false}; FpRounding rounding{FpRounding::DontCare}; @@ -32,6 +40,11 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +union BarrierInstInfo { + u32 raw; + BitField<0, 3, MemoryScope> scope; +}; + union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 702372775..d9e0d5471 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -16,6 +16,9 @@ OPCODE(Return, Void, OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) +// Barriers +OPCODE(MemoryBarrier, Void, ) + // Special operations OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..933af572c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" + +namespace Shader::Maxwell { +namespace { +// Seems to be in CUDA terminology. +enum class LocalScope : u64 { + CTG = 0, + GL = 1, + SYS = 2, + VC = 3, +}; + +IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { + switch (scope) { + case LocalScope::CTG: + return IR::MemoryScope::Warp; + case LocalScope::GL: + return IR::MemoryScope::Device; + case LocalScope::SYS: + return IR::MemoryScope::System; + case LocalScope::VC: + return IR::MemoryScope::Workgroup; // or should be device? + default: + throw NotImplementedException("Unimplemented Local Scope {}", scope); + } +} + +} // namespace + +void TranslatorVisitor::MEMBAR(u64 inst) { + union { + u64 raw; + BitField<8, 2, LocalScope> scope; + } membar{inst}; + IR::BarrierInstInfo info{}; + info.scope.Assign(LocalScopeToMemoryScope(membar.scope)); + ir.MemoryBarrier(info); +} + +void TranslatorVisitor::DEPBAR() { + // DEPBAR is a no-op +} + +void TranslatorVisitor::BAR(u64) { + throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 83ed0c0fd..80a6ed578 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -37,10 +37,6 @@ void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } -void TranslatorVisitor::BAR(u64) { - ThrowNotImplemented(Opcode::BAR); -} - void TranslatorVisitor::BPT(u64) { ThrowNotImplemented(Opcode::BPT); } @@ -73,9 +69,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } -void TranslatorVisitor::DEPBAR() { - // DEPBAR is a no-op -} void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); @@ -189,10 +182,6 @@ void TranslatorVisitor::LONGJMP(u64) { ThrowNotImplemented(Opcode::LONGJMP); } -void TranslatorVisitor::MEMBAR(u64) { - ThrowNotImplemented(Opcode::MEMBAR); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } -- cgit v1.2.3 From 45d547af11a18434ea17e4427db7286856a19537 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 2 Apr 2021 23:05:47 +0200 Subject: shader: Implement SR_LaneId --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ 4 files changed, 9 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 2fd90303f..b5f61956a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U32 IREmitter::LaneId() { + return Inst(Opcode::LaneId); +} + U32 IREmitter::LoadGlobalU8(const U64& address) { return Inst(Opcode::LoadGlobalU8, address); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5bebf66e3..e034d672f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -97,6 +97,8 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U32 LaneId(); + [[nodiscard]] U32 LoadGlobalU8(const U64& address); [[nodiscard]] U32 LoadGlobalS8(const U64& address); [[nodiscard]] U32 LoadGlobalU16(const U64& address); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d9e0d5471..74e956930 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -63,6 +63,7 @@ OPCODE(SetTRFlag, Void, U1, OPCODE(SetMXFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(LaneId, U32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index a295f4c5e..731ac643f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -99,6 +99,8 @@ enum class SpecialRegister : u64 { return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: return ir.Imm32(Common::BitCast(1.0f)); + case SpecialRegister::SR_LANEID: + return ir.LaneId(); default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From baec84247fe815199595d9e8077b71f3b5c8317e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 01:48:39 +0200 Subject: shader: Address Feedback --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 49 ++++++---------------- src/shader_recompiler/frontend/ir/ir_emitter.h | 12 +----- src/shader_recompiler/frontend/ir/modifiers.h | 13 +----- src/shader_recompiler/frontend/ir/opcodes.inc | 12 ++---- .../maxwell/translate/impl/barrier_operations.cpp | 12 ++---- .../translate/impl/move_special_register.cpp | 2 + .../frontend/maxwell/translate/impl/vote.cpp | 5 +-- 7 files changed, 25 insertions(+), 80 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b5f61956a..5e94edd74 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,8 +82,17 @@ void IREmitter::SelectionMerge(Block* merge_block) { Inst(Opcode::SelectionMerge, merge_block); } -void IREmitter::MemoryBarrier(BarrierInstInfo info) { - Inst(Opcode::MemoryBarrier, Flags{info}); +void IREmitter::MemoryBarrier(MemoryScope scope) { + switch (scope) { + case MemoryScope::Workgroup: + Inst(Opcode::MemoryBarrierWorkgroupLevel); + case MemoryScope::Device: + Inst(Opcode::MemoryBarrierDeviceLevel); + case MemoryScope::System: + Inst(Opcode::MemoryBarrierSystemLevel); + default: + throw InvalidArgument("Invalid memory scope {}", scope); + } } void IREmitter::Return() { @@ -202,38 +211,6 @@ void IREmitter::SetOFlag(const U1& value) { Inst(Opcode::SetOFlag, value); } -U1 IREmitter::GetFCSMFlag() { - return Inst(Opcode::GetFCSMFlag); -} - -U1 IREmitter::GetTAFlag() { - return Inst(Opcode::GetTAFlag); -} - -U1 IREmitter::GetTRFlag() { - return Inst(Opcode::GetTRFlag); -} - -U1 IREmitter::GetMXFlag() { - return Inst(Opcode::GetMXFlag); -} - -void IREmitter::SetFCSMFlag(const U1& value) { - Inst(Opcode::SetFCSMFlag, value); -} - -void IREmitter::SetTAFlag(const U1& value) { - Inst(Opcode::SetTAFlag, value); -} - -void IREmitter::SetTRFlag(const U1& value) { - Inst(Opcode::SetTRFlag, value); -} - -void IREmitter::SetMXFlag(const U1& value) { - Inst(Opcode::SetMXFlag, value); -} - static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { switch (flow_test) { case FlowTest::F: @@ -292,9 +269,9 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); - case FlowTest::FCSM_TR: - return ir.LogicalAnd(ir.GetFCSMFlag(), ir.GetTRFlag()); + // LOG_WARNING(ShaderDecompiler, "FCSM_TR CC State (Stubbed)"); + return ir.Imm1(false); case FlowTest::CSM_TA: case FlowTest::CSM_TR: case FlowTest::CSM_MX: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e034d672f..14b743975 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -70,16 +70,6 @@ public: void SetCFlag(const U1& value); void SetOFlag(const U1& value); - [[nodiscard]] U1 GetFCSMFlag(); - [[nodiscard]] U1 GetTAFlag(); - [[nodiscard]] U1 GetTRFlag(); - [[nodiscard]] U1 GetMXFlag(); - - void SetFCSMFlag(const U1& value); - void SetTAFlag(const U1& value); - void SetTRFlag(const U1& value); - void SetMXFlag(const U1& value); - [[nodiscard]] U1 Condition(IR::Condition cond); [[nodiscard]] U1 GetFlowTestResult(FlowTest test); @@ -138,7 +128,7 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); - [[nodiscard]] void MemoryBarrier(BarrierInstInfo info); + [[nodiscard]] void MemoryBarrier(MemoryScope scope); template [[nodiscard]] Dest BitCast(const Source& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 7730c25a9..2aa4ac79b 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,13 +25,7 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; -enum class MemoryScope : u32 { - DontCare, - Warp, - Workgroup, - Device, - System -}; +enum class MemoryScope : u32 { DontCare, Warp, Workgroup, Device, System }; struct FpControl { bool no_contraction{false}; @@ -40,11 +34,6 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); -union BarrierInstInfo { - u32 raw; - BitField<0, 3, MemoryScope> scope; -}; - union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 74e956930..3640a5d24 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -17,7 +17,9 @@ OPCODE(Unreachable, Void, OPCODE(DemoteToHelperInvocation, Void, Label, ) // Barriers -OPCODE(MemoryBarrier, Void, ) +OPCODE(MemoryBarrierWorkgroupLevel, Void, ) +OPCODE(MemoryBarrierDeviceLevel, Void, ) +OPCODE(MemoryBarrierSystemLevel, Void, ) // Special operations OPCODE(Prologue, Void, ) @@ -49,18 +51,10 @@ OPCODE(GetZFlag, U1, Void OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) OPCODE(GetOFlag, U1, Void, ) -OPCODE(GetFCSMFlag, U1, Void, ) -OPCODE(GetTAFlag, U1, Void, ) -OPCODE(GetTRFlag, U1, Void, ) -OPCODE(GetMXFlag, U1, Void, ) OPCODE(SetZFlag, Void, U1, ) OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) -OPCODE(SetFCSMFlag, Void, U1, ) -OPCODE(SetTAFlag, Void, U1, ) -OPCODE(SetTRFlag, Void, U1, ) -OPCODE(SetMXFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) OPCODE(LaneId, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 933af572c..26d5e276b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -5,8 +5,8 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/modifiers.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { namespace { @@ -21,28 +21,24 @@ enum class LocalScope : u64 { IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { switch (scope) { case LocalScope::CTG: - return IR::MemoryScope::Warp; + return IR::MemoryScope::Workgroup; case LocalScope::GL: return IR::MemoryScope::Device; case LocalScope::SYS: return IR::MemoryScope::System; - case LocalScope::VC: - return IR::MemoryScope::Workgroup; // or should be device? default: throw NotImplementedException("Unimplemented Local Scope {}", scope); } } -} // namespace +} // Anonymous namespace void TranslatorVisitor::MEMBAR(u64 inst) { union { u64 raw; BitField<8, 2, LocalScope> scope; } membar{inst}; - IR::BarrierInstInfo info{}; - info.scope.Assign(LocalScopeToMemoryScope(membar.scope)); - ir.MemoryBarrier(info); + ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); } void TranslatorVisitor::DEPBAR() { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 731ac643f..7d9c42a83 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -96,8 +96,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: + // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_XY (Stubbed)"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: + // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_Z (Stubbed)"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 2acabb662..d508e1e23 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -50,10 +50,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING("VOTE.VTG: Stubbed!"); - auto imm = ir.Imm1(false); - ir.SetFCSMFlag(imm); - ir.SetTRFlag(imm); + // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 80df541a0860eecc599f60a7b2955e1e286bc48a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:28:44 +0200 Subject: shader: "Implement" NOP --- .../frontend/maxwell/translate/impl/not_implemented.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 80a6ed578..acabb0118 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -183,7 +183,7 @@ void TranslatorVisitor::LONGJMP(u64) { } void TranslatorVisitor::NOP(u64) { - ThrowNotImplemented(Opcode::NOP); + // NOP is No-Op. } void TranslatorVisitor::OUT_reg(u64) { -- cgit v1.2.3 From ed6a1b1a3def4b8ed8c8fd1a7774a0a14edefc70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:34:07 +0200 Subject: shader: Address feedback --- src/shader_recompiler/frontend/ir/modifiers.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 2aa4ac79b..461671326 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,7 +25,13 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; -enum class MemoryScope : u32 { DontCare, Warp, Workgroup, Device, System }; +enum class MemoryScope : u32 { + DontCare, + Warp, + Workgroup, + Device, + System, +}; struct FpControl { bool no_contraction{false}; -- cgit v1.2.3 From e7700aad183047f0c6fa990e1f424448d00c8865 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 03:01:12 +0200 Subject: shader: Fix undetected bug from review --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5e94edd74..dbfc670b0 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -86,10 +86,13 @@ void IREmitter::MemoryBarrier(MemoryScope scope) { switch (scope) { case MemoryScope::Workgroup: Inst(Opcode::MemoryBarrierWorkgroupLevel); + break; case MemoryScope::Device: Inst(Opcode::MemoryBarrierDeviceLevel); + break; case MemoryScope::System: Inst(Opcode::MemoryBarrierSystemLevel); + break; default: throw InvalidArgument("Invalid memory scope {}", scope); } -- cgit v1.2.3 From ca7ebdc471cfd9549b15f8ae5523c6fdddca57e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:16:09 -0300 Subject: shader: Fix FADD32I --- .../frontend/maxwell/translate/impl/floating_point_add.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 487198aa6..b39950c84 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -68,17 +68,15 @@ void TranslatorVisitor::FADD32I(u64 insn) { union { u64 raw; BitField<55, 1, u64> ftz; - BitField<53, 1, u64> neg_b; + BitField<56, 1, u64> neg_a; BitField<54, 1, u64> abs_a; BitField<52, 1, u64> cc; - BitField<56, 1, u64> neg_a; + BitField<53, 1, u64> neg_b; BitField<57, 1, u64> abs_b; - BitField<50, 1, u64> sat; } const fadd32i{insn}; - FADD(*this, insn, fadd32i.sat != 0, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, - GetFloatImm32(insn), fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, - fadd32i.neg_b != 0); + FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), + fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); } } // namespace Shader::Maxwell -- cgit v1.2.3 From c4aab5c40ec1347da9811169bbc3dfb23632ab98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:17:08 -0300 Subject: shader: Fix fp16 merge when using native fp16 --- .../frontend/maxwell/translate/impl/half_floating_point_helper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp index d0c6ba1aa..0dbeb7f56 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp @@ -51,9 +51,9 @@ IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const I case Merge::MRG_H0: case Merge::MRG_H1: { const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; - const bool h0{merge == Merge::MRG_H0}; - const IR::F16& insert{h0 ? lhs : rhs}; - return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); + const bool is_h0{merge == Merge::MRG_H0}; + const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; + return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); } } throw InvalidArgument("Invalid merge {}", merge); -- cgit v1.2.3 From 0b26f2b90ea4fe6097d982b72dfe38c0a3658ad0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 06:40:16 -0300 Subject: shader: Remove unused header in VOTE --- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index d508e1e23..0793611ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -- cgit v1.2.3 From 3f594dd86bd1ee1b178109132482c7d6b43e66dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 02:31:09 -0300 Subject: shader: Reimplement GetCbufU64 as GetCbufU32x2 It may generate better code on some compilers and it's easier to handle. --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 +++--- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++-- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- .../frontend/maxwell/translate/impl/load_constant.cpp | 12 ++++++------ 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index dbfc670b0..dbd38a28b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -162,8 +162,8 @@ U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { return Inst(Opcode::GetCbufU32, binding, byte_offset); } -UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, - bool is_signed) { +Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed) { switch (bitsize) { case 8: return Inst(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); @@ -172,7 +172,7 @@ UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsi case 32: return Inst(Opcode::GetCbufU32, binding, byte_offset); case 64: - return Inst(Opcode::GetCbufU64, binding, byte_offset); + return Inst(Opcode::GetCbufU32x2, binding, byte_offset); default: throw InvalidArgument("Invalid bit size {}", bitsize); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 14b743975..81a57fefe 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -56,8 +56,8 @@ public: void SetIndirectBranchVariable(const U32& value); [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); - [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, - bool is_signed); + [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, + bool is_signed); [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] U1 GetZFlag(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 3640a5d24..734f5328b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -40,7 +40,7 @@ OPCODE(GetCbufU16, U32, U32, OPCODE(GetCbufS16, U32, U32, U32, ) OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) -OPCODE(GetCbufU64, U64, U32, U32, ) +OPCODE(GetCbufU32x2, U32x2, U32, U32, ) OPCODE(GetAttribute, F32, Attribute, ) OPCODE(SetAttribute, Void, Attribute, F32, ) OPCODE(GetAttributeIndexed, F32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 49ccb7d62..ae3ecea32 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -30,25 +30,25 @@ void TranslatorVisitor::LDC(u64 insn) { const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; switch (ldc.size) { case Size::U8: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); break; case Size::S8: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); break; case Size::U16: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); break; case Size::S16: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); break; case Size::B32: - X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false)); + X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); break; case Size::B64: { if (!IR::IsAligned(ldc.dest_reg, 2)) { throw NotImplementedException("Unaligned destination register"); } - const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))}; + const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); } -- cgit v1.2.3 From 85795de99f27e57ddf97696e7915ddd4bdf02976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 03:00:41 -0300 Subject: shader: Abstract breadth searches and use the abstraction --- .../frontend/ir/breadth_first_search.h | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/shader_recompiler/frontend/ir/breadth_first_search.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h new file mode 100644 index 000000000..b35f062d4 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +template +auto BreadthFirstSearch(const Value& value, Pred&& pred) + -> std::invoke_result_t { + if (value.IsImmediate()) { + // Nothing to do with immediates + return std::nullopt; + } + // Breadth-first search visiting the right most arguments first + // Small vector has been determined from shaders in Super Smash Bros. Ultimate + boost::container::small_vector visited; + std::queue queue; + queue.push(value.InstRecursive()); + + while (!queue.empty()) { + // Pop one instruction from the queue + const Inst* const inst{queue.front()}; + queue.pop(); + if (const std::optional result = pred(inst)) { + // This is the instruction we were looking for + return result; + } + // Visit the right most arguments first + for (size_t arg = inst->NumArgs(); arg--;) { + const Value arg_value{inst->Arg(arg)}; + if (arg_value.IsImmediate()) { + continue; + } + // Queue instruction if it hasn't been visited + const Inst* const arg_inst{arg_value.InstRecursive()}; + if (std::ranges::find(visited, arg_inst) == visited.end()) { + visited.push_back(arg_inst); + queue.push(arg_inst); + } + } + } + // SSA tree has been traversed and the result hasn't been found + return std::nullopt; +} + +} // namespace Shader::IR -- cgit v1.2.3 From fc93bc2abde0b54a0a495f9b28a76fd34b47f320 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 03:04:48 -0300 Subject: shader: Implement BAR and fix memory barriers --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + .../frontend/ir/microinstruction.cpp | 4 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../maxwell/translate/impl/barrier_operations.cpp | 58 +++++++++++++++++++++- 5 files changed, 66 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index dbd38a28b..246c3b9ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,10 @@ void IREmitter::SelectionMerge(Block* merge_block) { Inst(Opcode::SelectionMerge, merge_block); } +void IREmitter::Barrier() { + Inst(Opcode::Barrier); +} + void IREmitter::MemoryBarrier(MemoryScope scope) { switch (scope) { case MemoryScope::Workgroup: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 81a57fefe..1b00c548d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -128,6 +128,7 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); + [[nodiscard]] void Barrier(); [[nodiscard]] void MemoryBarrier(MemoryScope scope); template diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 074c71d53..481202d94 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -57,6 +57,10 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Return: case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: + case Opcode::Barrier: + case Opcode::MemoryBarrierWorkgroupLevel: + case Opcode::MemoryBarrierDeviceLevel: + case Opcode::MemoryBarrierSystemLevel: case Opcode::Prologue: case Opcode::Epilogue: case Opcode::SetAttribute: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 734f5328b..dcd54bcf7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -17,6 +17,7 @@ OPCODE(Unreachable, Void, OPCODE(DemoteToHelperInvocation, Void, Label, ) // Barriers +OPCODE(Barrier, Void, ) OPCODE(MemoryBarrierWorkgroupLevel, Void, ) OPCODE(MemoryBarrierDeviceLevel, Void, ) OPCODE(MemoryBarrierSystemLevel, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 26d5e276b..2a2a294df 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -38,6 +38,7 @@ void TranslatorVisitor::MEMBAR(u64 inst) { u64 raw; BitField<8, 2, LocalScope> scope; } membar{inst}; + ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); } @@ -45,8 +46,61 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::BAR(u64) { - throw NotImplementedException("Instruction {} is not implemented", Opcode::BAR); +void TranslatorVisitor::BAR(u64 insn) { + enum class Mode { + RedPopc, + Scan, + RedAnd, + RedOr, + Sync, + Arrive, + }; + union { + u64 raw; + BitField<43, 1, u64> is_a_imm; + BitField<44, 1, u64> is_b_imm; + BitField<8, 8, u64> imm_a; + BitField<20, 12, u64> imm_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, IR::Pred> pred; + } const bar{insn}; + + const Mode mode{[insn] { + switch (insn & 0x0000009B00000000ULL) { + case 0x0000000200000000ULL: + return Mode::RedPopc; + case 0x0000000300000000ULL: + return Mode::Scan; + case 0x0000000A00000000ULL: + return Mode::RedAnd; + case 0x0000001200000000ULL: + return Mode::RedOr; + case 0x0000008000000000ULL: + return Mode::Sync; + case 0x0000008100000000ULL: + return Mode::Arrive; + } + throw NotImplementedException("Invalid encoding"); + }()}; + if (mode != Mode::Sync) { + throw NotImplementedException("BAR mode {}", mode); + } + if (bar.is_a_imm == 0) { + throw NotImplementedException("Non-immediate input A"); + } + if (bar.imm_a != 0) { + throw NotImplementedException("Non-zero input A"); + } + if (bar.is_b_imm == 0) { + throw NotImplementedException("Non-immediate input B"); + } + if (bar.imm_b != 0) { + throw NotImplementedException("Non-zero input B"); + } + if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { + throw NotImplementedException("Non-true input predicate"); + } + ir.Barrier(); } } // namespace Shader::Maxwell -- cgit v1.2.3 From da6cf2632cd4dc0d2b0278353fcaee0789b418c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:17:17 -0300 Subject: shader: Add subgroup masks --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 20 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 5 + src/shader_recompiler/frontend/ir/opcodes.inc | 5 + .../translate/impl/move_special_register.cpp | 110 ++++++++++++++------- 4 files changed, 105 insertions(+), 35 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 246c3b9ef..ed1e0dd3b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1628,6 +1628,26 @@ U32 IREmitter::SubgroupBallot(const U1& value) { return Inst(Opcode::SubgroupBallot, value); } +U32 IREmitter::SubgroupEqMask() { + return Inst(Opcode::SubgroupEqMask); +} + +U32 IREmitter::SubgroupLtMask() { + return Inst(Opcode::SubgroupLtMask); +} + +U32 IREmitter::SubgroupLeMask() { + return Inst(Opcode::SubgroupLeMask); +} + +U32 IREmitter::SubgroupGtMask() { + return Inst(Opcode::SubgroupGtMask); +} + +U32 IREmitter::SubgroupGeMask() { + return Inst(Opcode::SubgroupGeMask); +} + U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask) { return Inst(Opcode::ShuffleIndex, value, index, clamp, seg_mask); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1b00c548d..42756af43 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -281,6 +281,11 @@ public: [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); [[nodiscard]] U32 SubgroupBallot(const U1& value); + [[nodiscard]] U32 SubgroupEqMask(); + [[nodiscard]] U32 SubgroupLtMask(); + [[nodiscard]] U32 SubgroupLeMask(); + [[nodiscard]] U32 SubgroupGtMask(); + [[nodiscard]] U32 SubgroupGeMask(); [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask); [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dcd54bcf7..1697de965 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -417,6 +417,11 @@ OPCODE(VoteAll, U1, U1, OPCODE(VoteAny, U1, U1, ) OPCODE(VoteEqual, U1, U1, ) OPCODE(SubgroupBallot, U32, U1, ) +OPCODE(SubgroupEqMask, U32, ) +OPCODE(SubgroupLtMask, U32, ) +OPCODE(SubgroupLeMask, U32, ) +OPCODE(SubgroupGtMask, U32, ) +OPCODE(SubgroupGeMask, U32, ) OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 7d9c42a83..be1f21e7b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -10,6 +10,7 @@ namespace Shader::Maxwell { namespace { enum class SpecialRegister : u64 { SR_LANEID = 0, + SR_CLOCK = 1, SR_VIRTCFG = 2, SR_VIRTID = 3, SR_PM0 = 4, @@ -20,6 +21,9 @@ enum class SpecialRegister : u64 { SR_PM5 = 9, SR_PM6 = 10, SR_PM7 = 11, + SR12 = 12, + SR13 = 13, + SR14 = 14, SR_ORDERING_TICKET = 15, SR_PRIM_TYPE = 16, SR_INVOCATION_ID = 17, @@ -41,44 +45,70 @@ enum class SpecialRegister : u64 { SR_TID_X = 33, SR_TID_Y = 34, SR_TID_Z = 35, + SR_CTA_PARAM = 36, SR_CTAID_X = 37, SR_CTAID_Y = 38, SR_CTAID_Z = 39, - SR_NTID = 49, - SR_CirQueueIncrMinusOne = 50, - SR_NLATC = 51, - SR_SWINLO = 57, - SR_SWINSZ = 58, - SR_SMEMSZ = 59, - SR_SMEMBANKS = 60, - SR_LWINLO = 61, - SR_LWINSZ = 62, - SR_LMEMLOSZ = 63, - SR_LMEMHIOFF = 64, - SR_EQMASK = 65, - SR_LTMASK = 66, - SR_LEMASK = 67, - SR_GTMASK = 68, - SR_GEMASK = 69, - SR_REGALLOC = 70, - SR_GLOBALERRORSTATUS = 73, - SR_WARPERRORSTATUS = 75, - SR_PM_HI0 = 81, - SR_PM_HI1 = 82, - SR_PM_HI2 = 83, - SR_PM_HI3 = 84, - SR_PM_HI4 = 85, - SR_PM_HI5 = 86, - SR_PM_HI6 = 87, - SR_PM_HI7 = 88, - SR_CLOCKLO = 89, - SR_CLOCKHI = 90, - SR_GLOBALTIMERLO = 91, - SR_GLOBALTIMERHI = 92, - SR_HWTASKID = 105, - SR_CIRCULARQUEUEENTRYINDEX = 106, - SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, - SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, + SR_NTID = 40, + SR_CirQueueIncrMinusOne = 41, + SR_NLATC = 42, + SR43 = 43, + SR_SM_SPA_VERSION = 44, + SR_MULTIPASSSHADERINFO = 45, + SR_LWINHI = 46, + SR_SWINHI = 47, + SR_SWINLO = 48, + SR_SWINSZ = 49, + SR_SMEMSZ = 50, + SR_SMEMBANKS = 51, + SR_LWINLO = 52, + SR_LWINSZ = 53, + SR_LMEMLOSZ = 54, + SR_LMEMHIOFF = 55, + SR_EQMASK = 56, + SR_LTMASK = 57, + SR_LEMASK = 58, + SR_GTMASK = 59, + SR_GEMASK = 60, + SR_REGALLOC = 61, + SR_BARRIERALLOC = 62, + SR63 = 63, + SR_GLOBALERRORSTATUS = 64, + SR65 = 65, + SR_WARPERRORSTATUS = 66, + SR_WARPERRORSTATUSCLEAR = 67, + SR68 = 68, + SR69 = 69, + SR70 = 70, + SR71 = 71, + SR_PM_HI0 = 72, + SR_PM_HI1 = 73, + SR_PM_HI2 = 74, + SR_PM_HI3 = 75, + SR_PM_HI4 = 76, + SR_PM_HI5 = 77, + SR_PM_HI6 = 78, + SR_PM_HI7 = 79, + SR_CLOCKLO = 80, + SR_CLOCKHI = 81, + SR_GLOBALTIMERLO = 82, + SR_GLOBALTIMERHI = 83, + SR84 = 84, + SR85 = 85, + SR86 = 86, + SR87 = 87, + SR88 = 88, + SR89 = 89, + SR90 = 90, + SR91 = 91, + SR92 = 92, + SR93 = 93, + SR94 = 94, + SR95 = 95, + SR_HWTASKID = 96, + SR_CIRCULARQUEUEENTRYINDEX = 97, + SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, + SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, }; [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { @@ -103,6 +133,16 @@ enum class SpecialRegister : u64 { return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); + case SpecialRegister::SR_EQMASK: + return ir.SubgroupEqMask(); + case SpecialRegister::SR_LTMASK: + return ir.SubgroupLtMask(); + case SpecialRegister::SR_LEMASK: + return ir.SubgroupLeMask(); + case SpecialRegister::SR_GTMASK: + return ir.SubgroupGtMask(); + case SpecialRegister::SR_GEMASK: + return ir.SubgroupGeMask(); default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From ffca21487f9728015a2c036fa581ead7d3d074d9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 05:18:09 -0300 Subject: shader: Eliminate orphan blocks more efficiently --- src/shader_recompiler/frontend/maxwell/program.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 7b08f11b0..05b7591bc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -14,20 +14,20 @@ #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { - -static void RemoveUnreachableBlocks(IR::Program& program) { +namespace { +void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const IR::BlockList& post_order{program.post_order_blocks}; - std::erase_if(program.blocks, [&](IR::Block* block) { - return std::ranges::find(post_order, block) == post_order.end(); - }); + const auto begin{std::next(program.blocks.begin())}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); } -static void CollectInterpolationInfo(Environment& env, IR::Program& program) { +void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (program.stage != Stage::Fragment) { return; } @@ -60,6 +60,7 @@ static void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } +} // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { -- cgit v1.2.3 From 9e6fe430bdc615ae5f7cc4fbc32d7e2baccd7ceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 4 Apr 2021 20:00:34 -0300 Subject: shader: Fix splits on blocks using indirect branches --- .../frontend/maxwell/control_flow.cpp | 35 ++++++++++++++++++---- .../frontend/maxwell/control_flow.h | 18 +++++------ src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- 3 files changed, 38 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index ac8707847..eb0f7c8d1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -45,19 +45,29 @@ void Split(Block* old_block, Block* new_block, Location pc) { .begin{pc}, .end{old_block->end}, .end_class{old_block->end_class}, - .stack{old_block->stack}, .cond{old_block->cond}, + .stack{old_block->stack}, .branch_true{old_block->branch_true}, .branch_false{old_block->branch_false}, + .function_call{old_block->function_call}, + .return_block{old_block->return_block}, + .branch_reg{old_block->branch_reg}, + .branch_offset{old_block->branch_offset}, + .indirect_branches{std::move(old_block->indirect_branches)}, }; *old_block = Block{ .begin{old_block->begin}, .end{pc}, .end_class{EndClass::Branch}, - .stack{std::move(old_block->stack)}, .cond{true}, + .stack{std::move(old_block->stack)}, .branch_true{new_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; } @@ -173,10 +183,15 @@ Function::Function(ObjectPool& block_pool, Location start_address) .begin{start_address}, .end{start_address}, .end_class{EndClass::Branch}, - .stack{}, .cond{true}, + .stack{}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}, .stack{}, }} {} @@ -351,10 +366,15 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, .begin{block->begin.Virtual()}, .end{block->begin.Virtual()}, .end_class{EndClass::Branch}, - .stack{block->stack}, .cond{cond}, + .stack{block->stack}, .branch_true{conditional_block}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, }; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); @@ -502,10 +522,15 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function .begin{pc}, .end{pc}, .end_class{EndClass::Branch}, - .stack{stack}, .cond{true}, + .stack{stack}, .branch_true{nullptr}, .branch_false{nullptr}, + .function_call{}, + .return_block{}, + .branch_reg{}, + .branch_offset{}, + .indirect_branches{}, })}; function.labels.push_back(Label{ .address{pc}, diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index a8c90d27a..466b14198 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -79,18 +79,14 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; EndClass end_class; - Stack stack; IR::Condition cond; - union { - Block* branch_true; - FunctionId function_call; - IR::Reg branch_reg; - }; - union { - Block* branch_false; - Block* return_block; - s32 branch_offset; - }; + Stack stack; + Block* branch_true; + Block* branch_false; + FunctionId function_call; + Block* return_block; + IR::Reg branch_reg; + s32 branch_offset; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 05b7591bc..58caa35a1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -21,7 +21,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { if (program.blocks.size() == program.post_order_blocks.size()) { return; } - const auto begin{std::next(program.blocks.begin())}; + const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); -- cgit v1.2.3 From 20ba0ea0a94fa915cad6392b3742d8e58e2fa0d9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 6 Apr 2021 02:01:01 +0200 Subject: shader: Fix BRX tracking --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 6 +++--- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index eb0f7c8d1..1a4ee4f6c 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -197,7 +197,7 @@ Function::Function(ObjectPool& block_pool, Location start_address) }} {} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_} { + : env{env_}, block_pool{block_pool_}, program_start{start_address} { functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -427,9 +427,9 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, FunctionId function_id) { - const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; if (!brx_table) { - TrackIndirectBranchTable(env, pc, block->begin); + TrackIndirectBranchTable(env, pc, program_start); throw NotImplementedException("Failed to track indirect branch"); } const IR::FlowTest flow_test{inst.branch.flow_test}; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 466b14198..9f570fbb5 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -157,6 +157,7 @@ private: ObjectPool& block_pool; boost::container::small_vector functions; FunctionId current_function_id{0}; + Location program_start; }; } // namespace Shader::Maxwell::Flow -- cgit v1.2.3 From 0df7e509db060693ee1f131bae44045db995c3bd Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 02:42:58 +0200 Subject: shader: Implement AL2P --- .../impl/attribute_memory_to_physical.cpp | 35 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 4 --- 2 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { + +enum class BitSize : u64 { + B32, + B64, + B96, + B128, +}; + +void TranslatorVisitor::AL2P(u64 inst) { + union { + u64 raw; + BitField<0, 8, IR::Reg> result_register; + BitField<8, 8, IR::Reg> indexing_register; + BitField<20, 11, s64> offset; + BitField<47, 2, BitSize> bitsize; + } al2p{inst}; + if (al2p.bitsize != BitSize::B32) { + throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); + } + const IR::U32 converted_offset{ir.Imm32(static_cast(al2p.offset.Value()))}; + const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; + X(al2p.result_register, result); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index acabb0118..ba0cfa673 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -13,10 +13,6 @@ namespace Shader::Maxwell { throw NotImplementedException("Instruction {} is not implemented", opcode); } -void TranslatorVisitor::AL2P(u64) { - ThrowNotImplemented(Opcode::AL2P); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -- cgit v1.2.3 From 1d51803169f72f79e19995072fb9e8a371dbdcbf Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 4 Apr 2021 06:47:14 +0200 Subject: shader: Implement indexed attributes --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 3 ++ src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../translate/impl/load_store_attribute.cpp | 38 ++++++++++++++-------- 4 files changed, 37 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ed1e0dd3b..e4e9b260c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -307,6 +307,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +F32 IREmitter::GetAttributeIndexed(IR::U32 phys_address) { + return Inst(Opcode::GetAttributeIndexed, phys_address); +} + +void IREmitter::SetAttributeIndexed(IR::U32 phys_address, const F32& value) { + Inst(Opcode::SetAttributeIndexed, phys_address, value); +} + void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 42756af43..afa8bd924 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -76,6 +76,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(IR::U32 phys_address); + void SetAttributeIndexed(IR::U32 phys_address, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 58caa35a1..aaf2a74a7 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -87,7 +87,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Mon, 5 Apr 2021 04:03:12 +0200 Subject: shader: Address feedback --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +-- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +-- .../translate/impl/load_store_attribute.cpp | 37 ++++++++++++---------- 3 files changed, 25 insertions(+), 20 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e4e9b260c..13eb2de4c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -307,11 +307,11 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } -F32 IREmitter::GetAttributeIndexed(IR::U32 phys_address) { +F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { return Inst(Opcode::GetAttributeIndexed, phys_address); } -void IREmitter::SetAttributeIndexed(IR::U32 phys_address, const F32& value) { +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { Inst(Opcode::SetAttributeIndexed, phys_address, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index afa8bd924..2cab1dc5d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -76,8 +76,8 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); - [[nodiscard]] F32 GetAttributeIndexed(IR::U32 phys_address); - void SetAttributeIndexed(IR::U32 phys_address, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); + void SetAttributeIndexed(const U32& phys_address, const F32& value); void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 0d248c020..f629e7167 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -44,6 +44,17 @@ u32 NumElements(Size size) { } throw InvalidArgument("Invalid size {}", size); } + +template +void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { + const IR::U32 index_value{v.X(index_reg)}; + for (u32 element = 0; element < num_elements; ++element) { + const IR::U32 final_offset{ + element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; + f(element, final_offset); + } +} + } // Anonymous namespace void TranslatorVisitor::ALD(u64 insn) { @@ -70,18 +81,15 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Unaligned absolute offset {}", offset); } const u32 num_elements{NumElements(ald.size)}; - if (ald.index_reg != IR::Reg::RZ) { - const IR::U32 index_value = X(ald.index_reg); + if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::U32 final_offset = - element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))}; - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); } return; } - for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); - } + HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + }); } void TranslatorVisitor::AST(u64 insn) { @@ -110,18 +118,15 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Unaligned absolute offset {}", offset); } const u32 num_elements{NumElements(ast.size)}; - if (ast.index_reg != IR::Reg::RZ) { - const IR::U32 index_value = X(ast.index_reg); + if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::U32 final_offset = - element == 0 ? index_value : IR::U32{ir.IAdd(index_value, ir.Imm32(element * 4U))}; - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); } return; } - for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); - } + HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + }); } void TranslatorVisitor::IPA(u64 insn) { -- cgit v1.2.3 From 56b92bd89cdf28f51277d6fc68115b2cd4b18864 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 05:53:38 -0300 Subject: shader: Fix F2I --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ef55b9c75..21ae92be1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -37,7 +37,7 @@ union F2I { BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; BitField<39, 2, Rounding> rounding; - BitField<49, 1, u64> half; + BitField<41, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; BitField<47, 1, u64> cc; -- cgit v1.2.3 From 233e39bb7b9ca7660c7a63a386e285aa5524bd20 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Apr 2021 16:48:39 -0300 Subject: shader: Fix dangling labels --- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 1a4ee4f6c..847bb1986 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -518,6 +518,11 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function } return &*it; } + // Make sure we don't insert the same layer twice + const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; + if (label_it != function.labels.end()) { + return label_it->block; + } Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, -- cgit v1.2.3 From 5cd3d00167b17c1fe36f97da978a7024e93c14e7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 23:41:27 -0300 Subject: shader: Fix FCMP immediate variant --- .../frontend/maxwell/translate/impl/floating_point_compare.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index e78e9c4e1..c02a40209 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -42,7 +42,15 @@ void TranslatorVisitor::FCMP_cr(u64 insn) { } void TranslatorVisitor::FCMP_imm(u64 insn) { - FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const fcmp{insn}; + const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; + const u32 value{static_cast(fcmp.value) << 12}; + + FCMP(*this, insn, ir.Imm32(value), GetFloatReg39(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- src/shader_recompiler/frontend/ir/attribute.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/condition.cpp | 6 +- src/shader_recompiler/frontend/ir/condition.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/microinstruction.cpp | 16 +-- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/program.cpp | 2 - src/shader_recompiler/frontend/ir/value.cpp | 4 +- src/shader_recompiler/frontend/ir/value.h | 2 +- .../frontend/maxwell/control_flow.cpp | 140 +++++++++------------ src/shader_recompiler/frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../frontend/maxwell/structured_control_flow.cpp | 3 +- .../frontend/maxwell/translate/impl/double_add.cpp | 6 +- .../translate/impl/double_fused_multiply_add.cpp | 6 +- .../maxwell/translate/impl/double_multiply.cpp | 6 +- .../maxwell/translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- .../floating_point_conversion_floating_point.cpp | 6 +- .../impl/floating_point_conversion_integer.cpp | 11 +- .../impl/floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../translate/impl/floating_point_swizzled_add.cpp | 6 +- .../translate/impl/half_floating_point_add.cpp | 11 +- .../half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../translate/impl/half_floating_point_set.cpp | 11 +- .../impl/half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../impl/integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../translate/impl/load_store_local_shared.cpp | 9 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../translate/impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../maxwell/translate/impl/video_set_predicate.cpp | 1 - 45 files changed, 184 insertions(+), 206 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 4811242ea..7993e5c43 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } - return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4; + return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } std::string NameOf(Attribute attribute) { @@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) { return fmt::format("", static_cast(attribute)); } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index ec029dfd6..e1f0191f4 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map& ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); for (const Inst& inst : block) { - const Opcode op{inst.Opcode()}; + const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); if (TypeOf(op) != Type::Void) { ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index ec1659e2b..fc18ea2a2 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -12,10 +12,10 @@ namespace Shader::IR { std::string NameOf(Condition condition) { std::string ret; - if (condition.FlowTest() != FlowTest::T) { - ret = fmt::to_string(condition.FlowTest()); + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); } - const auto [pred, negated]{condition.Pred()}; + const auto [pred, negated]{condition.GetPred()}; if (!ret.empty()) { ret += '&'; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 51c2f15cf..aa8597c60 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -30,11 +30,11 @@ public: auto operator<=>(const Condition&) const noexcept = default; - [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { return static_cast(flow_test); } - [[nodiscard]] std::pair Pred() const noexcept { + [[nodiscard]] std::pair GetPred() const noexcept { return {static_cast(pred), pred_negated != 0}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13eb2de4c..a2104bdb3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { } U1 IREmitter::Condition(IR::Condition cond) { - const FlowTest flow_test{cond.FlowTest()}; - const auto [pred, is_negated]{cond.Pred()}; + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 481202d94..ceb44e604 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -12,7 +12,7 @@ namespace Shader::IR { namespace { void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { - if (inst && inst->Opcode() != opcode) { + if (inst && inst->GetOpcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } @@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { } void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { - if (inst->Opcode() != expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } + +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} } // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { @@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void AllocAssociatedInsts(std::unique_ptr& associated_insts) { - if (!associated_insts) { - associated_insts = std::make_unique(); - } -} - void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 6658dc674..97dc91d85 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -46,7 +46,7 @@ public: } /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode Opcode() const noexcept { + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { return op; } @@ -95,7 +95,7 @@ public: requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; - std::memcpy(&ret, &flags, sizeof(ret)); + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); return ret; } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1cb9db6c9..002dbf94e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type{type_token}, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 5f51aeb5f..89a17fb1b 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 837c1b487..1e7ffb86d 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Identity; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; } bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Phi; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; } bool Value::IsEmpty() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..a0962863d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -94,7 +94,7 @@ public: } } - explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; using U1 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb1986..cb8ec7eaa 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e..932d19c1d 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d..008625cb3 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e..02cef2645 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea..5a1b3a8fc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff7321862..723841496 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95..4a49299a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c84..b8c89810c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209..80109ca0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e..b9f4ee0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde0..035f8782a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1..cf3cf1ba6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c..fa2a7807b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032..8ae437528 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae..06226b7ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db843..5f93a1513 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a21..7550a8d4c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047..f2738a93b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hadd2.low << 6) | static_cast((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | static_cast((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6a..fd7986701 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hfma2.low << 6) | static_cast((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | static_cast((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef6..3f548ce76 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hmul2.low << 6) | static_cast((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | static_cast((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c..cca5b831f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hset2.low << 6) | static_cast((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | static_cast((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92..b3931dae3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hsetp2.low << 6) | + static_cast((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | + static_cast((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4..88bbac0a5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast(imm.value) << 12}; return ir.Imm32(Common::BitCast(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e1815..8ffd84867 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2..5a0fc36a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea32..2300088e3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea..e24b49721 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d7..36c5cff2f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9..95d416586 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f4..fe2c7db85 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad..2ba9c1018 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23..0863bdfcd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bca..0459e5473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fcc..ec5e74f6d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; -- cgit v1.2.3 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 4 ++-- src/shader_recompiler/frontend/maxwell/control_flow.h | 16 ++++++++-------- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- .../frontend/maxwell/translate/impl/common_funcs.cpp | 5 +++-- .../frontend/maxwell/translate/impl/not_implemented.cpp | 1 - 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 002dbf94e..7d3e0b2ab 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type = type_token, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index cb8ec7eaa..9811183f1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -44,7 +44,7 @@ void Split(Block* old_block, Block* new_block, Location pc) { *new_block = Block{}; new_block->begin = pc; new_block->end = old_block->end; - new_block->end_class = old_block->end_class, + new_block->end_class = old_block->end_class; new_block->cond = old_block->cond; new_block->stack = old_block->stack; new_block->branch_true = old_block->branch_true; @@ -428,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += static_cast(brx_table->branch_offset); + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 9f570fbb5..89966b16a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -78,15 +78,15 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; - EndClass end_class; - IR::Condition cond; + EndClass end_class{}; + IR::Condition cond{}; Stack stack; - Block* branch_true; - Block* branch_false; - FunctionId function_call; - Block* return_block; - IR::Reg branch_reg; - s32 branch_offset; + Block* branch_true{}; + Block* branch_false{}; + FunctionId function_call{}; + Block* return_block{}; + IR::Reg branch_reg{}; + s32 branch_offset{}; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index 932d19c1d..972f677dc 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode = Opcode::name, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index d30e82b10..10bb01d99 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,8 +72,9 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, - FPCompareOp compare_op, IR::FpControl control) { +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { case FPCompareOp::F: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba0cfa673..c23901052 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,7 +65,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } -- cgit v1.2.3 From 094da34456bbf56353211b47fcb227c09637aa15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 03:44:05 -0300 Subject: shader: Fix Windows build issues --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index cf3cf1ba6..3cb896950 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -193,7 +193,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; } } -- cgit v1.2.3 From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: shader: Implement SULD and SUST --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 11 + src/shader_recompiler/frontend/ir/ir_emitter.h | 7 +- src/shader_recompiler/frontend/ir/modifiers.h | 12 +- src/shader_recompiler/frontend/ir/opcodes.inc | 6 + .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/surface_load_store.cpp | 280 +++++++++++++++++++++ .../maxwell/translate/impl/texture_fetch.cpp | 19 +- .../translate/impl/texture_fetch_swizzled.cpp | 12 +- .../maxwell/translate/impl/texture_gather.cpp | 19 +- .../translate/impl/texture_gather_swizzled.cpp | 3 +- .../maxwell/translate/impl/texture_gradient.cpp | 18 +- .../maxwell/translate/impl/texture_load.cpp | 18 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../translate/impl/texture_mipmap_level.cpp | 18 +- 14 files changed, 365 insertions(+), 68 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index a2104bdb3..17be0c639 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1620,6 +1620,17 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const V return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); } +Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead}; + return Inst(op, Flags{info}, handle, coords); +} + +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; + Inst(op, Flags{info}, handle, coords, color); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2cab1dc5d..ec60070ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -265,20 +265,19 @@ public: [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); - [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); - [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); - [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivates, const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); + [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 461671326..447e9703c 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -43,11 +43,13 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; - BitField<8, 1, u32> has_bias; - BitField<9, 1, u32> has_lod_clamp; - BitField<10, 1, u32> relaxed_precision; - BitField<11, 2, u32> gather_component; - BitField<13, 2, u32> num_derivates; + BitField<8, 1, u32> is_depth; + BitField<9, 1, u32> has_bias; + BitField<10, 1, u32> has_lod_clamp; + BitField<11, 1, u32> relaxed_precision; + BitField<12, 2, u32> gather_component; + BitField<14, 2, u32> num_derivates; + BitField<16, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1697de965..82c5b37ba 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -389,6 +389,8 @@ OPCODE(BindlessImageFetch, F32x4, U32, OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) +OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -400,6 +402,8 @@ OPCODE(BoundImageFetch, F32x4, U32, OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageRead, U32x4, U32, Opaque, ) +OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -411,6 +415,8 @@ OPCODE(ImageFetch, F32x4, U32, OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, U32, Opaque, ) +OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c23901052..327941223 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -281,18 +281,10 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SULD(u64) { - ThrowNotImplemented(Opcode::SULD); -} - void TranslatorVisitor::SURED(u64) { ThrowNotImplemented(Opcode::SURED); } -void TranslatorVisitor::SUST(u64) { - ThrowNotImplemented(Opcode::SUST); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..9a2d16a6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -0,0 +1,280 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +constexpr unsigned R = 1 << 0; +constexpr unsigned G = 1 << 1; +constexpr unsigned B = 1 << 2; +constexpr unsigned A = 1 << 3; + +constexpr std::array MASK{ + 0U, // + R, // + G, // + R | G, // + B, // + R | B, // + G | B, // + R | G | B, // + A, // + R | A, // + G | A, // + R | G | A, // + B | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +enum class LoadCache : u64 { + Default, + CG, + CI, + CV, +}; + +enum class StoreCache : u64 { + Default, + CG, + CS, + WT, +}; + +ImageFormat Format(Size size) { + switch (size) { + case Size::U8: + return ImageFormat::R8_UINT; + case Size::S8: + return ImageFormat::R8_SINT; + case Size::U16: + return ImageFormat::R16_UINT; + case Size::S16: + return ImageFormat::R16_SINT; + case Size::B32: + return ImageFormat::R32_UINT; + case Size::B64: + return ImageFormat::R32G32_UINT; + case Size::B128: + return ImageFormat::R32G32B32A32_UINT; + } + throw NotImplementedException("Invalid size {}", size); +} + +int SizeInRegs(Size size) { + switch (size) { + case Size::U8: + case Size::S8: + case Size::U16: + case Size::S16: + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B128: + return 4; + } + throw NotImplementedException("Invalid size {}", size); +} + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg), array(1)); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + } + throw NotImplementedException("Invalid type {}", type); +} + +unsigned SwizzleMask(u64 swizzle) { + if (swizzle == 0 || swizzle >= MASK.size()) { + throw NotImplementedException("Invalid swizzle {}", swizzle); + } + return MASK[swizzle]; +} + +IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { + std::array colors; + for (int i = 0; i < num_regs; ++i) { + colors[i] = ir.GetReg(reg + i); + } + for (int i = num_regs; i < 4; ++i) { + colors[i] = ir.Imm32(0); + } + return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); +} +} // Anonymous namespace + +void TranslatorVisitor::SULD(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, LoadCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const suld{insn}; + + if (suld.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", suld.clamp.Value()); + } + if (suld.cache != LoadCache::Default) { + throw NotImplementedException("Cache {}", suld.cache.Value()); + } + const bool is_typed{suld.d != 0}; + if (is_typed && suld.ba != 0) { + throw NotImplementedException("BA"); + } + + const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; + const TextureType type{GetType(suld.type)}; + const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; + const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast(suld.bound_offset * 4)) + : X(suld.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + const IR::Value result{ir.ImageRead(handle, coords, info)}; + IR::Reg dest_reg{suld.dest_reg}; + if (is_typed) { + const int num_regs{SizeInRegs(suld.size)}; + for (int i = 0; i < num_regs; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } + } else { + const unsigned mask{SwizzleMask(suld.swizzle)}; + const int bits{std::popcount(mask)}; + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + throw NotImplementedException("Unaligned destination register"); + } + for (unsigned component = 0; component < 4; ++component) { + if (((mask >> component) & 1) == 0) { + continue; + } + X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); + ++dest_reg; + } + } +} + +void TranslatorVisitor::SUST(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, StoreCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sust{insn}; + + if (sust.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", sust.clamp.Value()); + } + if (sust.cache != StoreCache::Default) { + throw NotImplementedException("Cache {}", sust.cache.Value()); + } + const bool is_typed{sust.d != 0}; + if (is_typed && sust.ba != 0) { + throw NotImplementedException("BA"); + } + const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; + const TextureType type{GetType(sust.type)}; + const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; + const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast(sust.bound_offset * 4)) + : X(sust.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + IR::Value color; + if (is_typed) { + color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); + } else { + const unsigned mask{SwizzleMask(sust.swizzle)}; + if (mask != 0xf) { + throw NotImplementedException("Non-full mask"); + } + color = MakeColor(ir, sust.data_reg, 4); + } + ir.ImageWrite(handle, coords, color, info); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 95d416586..9671d115e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -33,24 +33,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -169,7 +169,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.type.Assign(GetType(tex.type)); + info.is_depth.Assign(tex.dc != 0 ? 1 : 0); info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); info.has_lod_clamp.Assign(lc ? 1 : 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index fe2c7db85..3500a4559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -95,18 +95,21 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { {}, info); case 4: // 2D.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, {}, {}, info); case 5: // 2D.LL.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b + 1), v.F(reg_b), {}, {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), zero, {}, {}, info); case 7: // ARRAY_2D @@ -124,7 +127,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::ShadowArray2D); + info.type.Assign(TextureType::ColorArray2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), v.F(reg_b + 1), zero, {}, {}, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index b2f9cda46..218cbc1a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -37,24 +37,24 @@ enum class ComponentType : u64 { A = 3, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -163,7 +163,8 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.type.Assign(GetType(tld4.type)); + info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); info.gather_component.Assign(static_cast(component_type)); const IR::Value sample{[&] { if (tld4.dc == 0) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index 2ba9c1018..34efa2d50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -59,7 +59,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.relaxed_precision.Assign(1); } info.gather_component.Assign(static_cast(tld4s.component_type.Value())); - info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + info.type.Assign(Shader::TextureType::Color2D); + info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); IR::Value coords; if (tld4s.aoffi != 0) { CheckAlignment(reg_a, 2); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index c66468a48..c3fe3ffda 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -152,7 +152,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; - info.type.Assign(GetType(txd.type, false)); + info.type.Assign(GetType(txd.type)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index 987b7ec34..983058303 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -137,7 +137,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { throw NotImplementedException("TLD.CL - CLAMP is not implmented"); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld.type, false)); + info.type.Assign(GetType(tld.type)); const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 0863bdfcd..5dd7e31b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include "common/bit_field.h" #include "common/common_types.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index b6efc04f0..2277d24ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -97,7 +97,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tmml.type, false)); + info.type.Assign(GetType(tmml.type)); const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; IR::Reg dest_reg{tmml.dest_reg}; -- cgit v1.2.3 From 8cea39b5a63b350f7c6334b91b9d7e2b30bd61bf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 03:52:49 -0300 Subject: shader: Remove outdated comment in F2I --- .../maxwell/translate/impl/floating_point_conversion_integer.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 3cb896950..92b1ce015 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -142,10 +142,6 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } }()}; - - // TODO: Handle out of bounds conversions. - // For example converting F32 65537.0 to U16, the expected value is 0xffff, - const bool is_signed{f2i.is_signed != 0}; const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); -- cgit v1.2.3 From 1be6705408d1a3454146c705fae3dc55031e966e Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 10 Apr 2021 00:29:12 +0200 Subject: shader: Implement CC for ISET, FSET, PSET, CSET, and DSET Throw when other instructions are missing CC. --- .../maxwell/translate/impl/bitfield_extract.cpp | 5 +++++ .../frontend/maxwell/translate/impl/bitfield_insert.cpp | 5 +++++ .../maxwell/translate/impl/condition_code_set.cpp | 16 ++++++++++++++-- .../maxwell/translate/impl/double_compare_and_set.cpp | 17 +++++++++++++++-- .../translate/impl/double_fused_multiply_add.cpp | 5 +++++ .../frontend/maxwell/translate/impl/double_min_max.cpp | 5 +++++ .../frontend/maxwell/translate/impl/double_multiply.cpp | 5 +++++ .../translate/impl/floating_point_compare_and_set.cpp | 17 +++++++++++++++-- .../impl/floating_point_conversion_floating_point.cpp | 5 +++++ .../maxwell/translate/impl/floating_point_min_max.cpp | 5 +++++ .../maxwell/translate/impl/integer_compare_and_set.cpp | 17 ++++++++++++++--- .../impl/integer_floating_point_conversion.cpp | 2 +- .../maxwell/translate/impl/integer_funnel_shift.cpp | 5 +++++ .../maxwell/translate/impl/integer_minimum_maximum.cpp | 5 +++++ .../maxwell/translate/impl/integer_shift_right.cpp | 4 ++++ .../maxwell/translate/impl/load_effective_address.cpp | 10 +++++++++- .../translate/impl/logic_operation_three_input.cpp | 5 +++++ .../maxwell/translate/impl/predicate_set_register.cpp | 16 ++++++++++++++-- 18 files changed, 136 insertions(+), 13 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp index 4a03e6939..0738ae7a6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -14,9 +14,14 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> offset_reg; BitField<40, 1, u64> brev; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const bfe{insn}; + if (bfe.cc != 0) { + throw NotImplementedException("BFE CC"); + } + const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp index ee312c30d..fb7f821e6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -13,8 +13,13 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> insert_reg; + BitField<47, 1, u64> cc; } const bfi{insn}; + if (bfi.cc != 0) { + throw NotImplementedException("BFI CC"); + } + const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; const IR::U32 max_size{v.ir.Imm32(32)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp index ea0c40a54..420f2fb94 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp @@ -18,17 +18,29 @@ void TranslatorVisitor::CSET(u64 insn) { BitField<42, 1, u64> neg_bop_pred; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; } const cset{insn}; const IR::U32 one_mask{ir.Imm32(-1)}; const IR::U32 fp_one{ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{ir.Imm32(0)}; + const IR::U32 zero{ir.Imm32(0)}; const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; - const IR::U32 result{ir.Select(pred_result, pass_result, fail_result)}; + const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; X(cset.dest_reg, result); + if (cset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (cset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } } void TranslatorVisitor::CSETP(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp index e2ec852c9..1173192e4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -19,6 +19,7 @@ void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_b; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 4, FPCompareOp> compare_op; BitField<52, 1, u64> bf; BitField<53, 1, u64> negate_b; @@ -37,10 +38,22 @@ void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; - v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); + v.X(dset.dest_reg, result); + if (dset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (dset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index 723841496..f66097014 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -16,10 +16,15 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_a_reg; BitField<50, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<49, 1, u64> neg_c; } const dfma{insn}; + if (dfma.cc != 0) { + throw NotImplementedException("DFMA CC"); + } + const IR::F64 src_a{v.D(dfma.src_a_reg)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp index 55a224db3..6b551847c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -17,10 +17,15 @@ void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<42, 1, u64> neg_pred; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; } const dmnmx{insn}; + if (dmnmx.cc != 0) { + throw NotImplementedException("DMNMX CC"); + } + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 4a49299a0..c0159fb65 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -16,9 +16,14 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_a_reg; BitField<39, 2, FpRounding> fp_rounding; + BitField<47, 1, u64> cc; BitField<48, 1, u64> neg; } const dmul{insn}; + if (dmul.cc != 0) { + throw NotImplementedException("DMUL CC"); + } + const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ .no_contraction = true, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index b9f4ee0d9..eece4f28f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -19,6 +19,7 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<43, 1, u64> negate_a; BitField<44, 1, u64> abs_b; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 4, FPCompareOp> compare_op; BitField<52, 1, u64> bf; BitField<53, 1, u64> negate_b; @@ -43,10 +44,22 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; - v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); + v.X(fset.dest_reg, result); + if (fset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (fset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 035f8782a..ce2cf470d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -41,6 +41,7 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { BitField<0, 8, IR::Reg> dest_reg; BitField<44, 1, u64> ftz; BitField<45, 1, u64> neg; + BitField<47, 1, u64> cc; BitField<50, 1, u64> sat; BitField<39, 4, u64> rounding_op; BitField<39, 2, FpRounding> rounding; @@ -53,6 +54,10 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { } } const f2f{insn}; + if (f2f.cc != 0) { + throw NotImplementedException("F2F CC"); + } + IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 8ae437528..c0d6ee5af 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -18,10 +18,15 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<44, 1, u64> ftz; BitField<45, 1, u64> negate_b; BitField<46, 1, u64> abs_a; + BitField<47, 1, u64> cc; BitField<48, 1, u64> negate_a; BitField<49, 1, u64> abs_b; } const fmnmx{insn}; + if (fmnmx.cc) { + throw NotImplementedException("FMNMX CC"); + } + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 914af010f..a2cd8d7c6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -19,6 +19,7 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<43, 1, u64> x; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const iset{insn}; @@ -38,12 +39,22 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; - - const IR::U32 result{v.ir.Select(bop_result, pass_result, fail_result)}; + const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; v.X(iset.dest_reg, result); + if (iset.cc != 0) { + const IR::U1 is_zero{v.ir.IEqual(result, zero)}; + v.SetZFlag(is_zero); + if (iset.bf != 0) { + v.ResetSFlag(); + } else { + v.SetSFlag(v.ir.LogicalNot(is_zero)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index 5a0fc36a0..3c233597f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -62,7 +62,7 @@ IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { const Encoding i2f{insn}; if (i2f.cc != 0) { - throw NotImplementedException("CC"); + throw NotImplementedException("I2F CC"); } const bool is_signed{i2f.is_signed != 0}; int src_bitsize{}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp index d8d6c939e..5feefc0ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp @@ -33,9 +33,14 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> lo_bits_reg; BitField<37, 2, MaxShift> max_shift; + BitField<47, 1, u64> cc; BitField<48, 2, u64> x_mode; BitField<50, 1, u64> wrap; } const shf{insn}; + + if (shf.cc != 0) { + throw NotImplementedException("SHF CC"); + } if (shf.x_mode != 0) { throw NotImplementedException("SHF X Mode"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp index 40f14ab8a..1badbacc4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp @@ -16,9 +16,14 @@ void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<39, 3, IR::Pred> pred; BitField<42, 1, u64> neg_pred; BitField<43, 2, u64> mode; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const imnmx{insn}; + if (imnmx.cc != 0) { + throw NotImplementedException("IMNMX CC"); + } + if (imnmx.mode != 0) { throw NotImplementedException("IMNMX.MODE"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp index 4025b1358..be00bb605 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp @@ -16,12 +16,16 @@ void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { BitField<39, 1, u64> is_wrapped; BitField<40, 1, u64> brev; BitField<43, 1, u64> xmode; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const shr{insn}; if (shr.xmode != 0) { throw NotImplementedException("SHR.XMODE"); } + if (shr.cc != 0) { + throw NotImplementedException("SHR.CC"); + } IR::U32 base{v.X(shr.src_reg_a)}; if (shr.brev == 1) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp index 784588e83..4a0f04e47 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp @@ -14,6 +14,7 @@ void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_ u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> offset_lo_reg; + BitField<47, 1, u64> cc; BitField<48, 3, IR::Pred> pred; } const lea{insn}; @@ -21,7 +22,10 @@ void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_ throw NotImplementedException("LEA.HI X"); } if (lea.pred != IR::Pred::PT) { - throw NotImplementedException("LEA.LO Pred"); + throw NotImplementedException("LEA.HI Pred"); + } + if (lea.cc != 0) { + throw NotImplementedException("LEA.HI CC"); } const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; @@ -44,6 +48,7 @@ void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { BitField<39, 5, u64> scale; BitField<45, 1, u64> neg; BitField<46, 1, u64> x; + BitField<47, 1, u64> cc; BitField<48, 3, IR::Pred> pred; } const lea{insn}; if (lea.x != 0) { @@ -52,6 +57,9 @@ void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { if (lea.pred != IR::Pred::PT) { throw NotImplementedException("LEA.LO Pred"); } + if (lea.cc != 0) { + throw NotImplementedException("LEA.LO CC"); + } const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; const s32 scale{static_cast(lea.scale)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp index 256c47504..e0fe47912 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp @@ -73,8 +73,13 @@ IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> cc; } const lop3{insn}; + if (lop3.cc != 0) { + throw NotImplementedException("LOP3 CC"); + } + const IR::U32 op_a{v.X(lop3.src_reg)}; const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; v.X(lop3.dest_reg, result); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp index 6c15963fa..b02789874 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp @@ -21,6 +21,7 @@ void TranslatorVisitor::PSET(u64 insn) { BitField<42, 1, u64> neg_pred_c; BitField<44, 1, u64> bf; BitField<45, 2, BooleanOp> bop_2; + BitField<47, 1, u64> cc; } const pset{insn}; const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; @@ -31,11 +32,22 @@ void TranslatorVisitor::PSET(u64 insn) { const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; - const IR::U32 false_result{ir.Imm32(0)}; + const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 result{ir.Select(res_2, true_result, false_result)}; + const IR::U32 result{ir.Select(res_2, true_result, zero)}; X(pset.dest_reg, result); + if (pset.cc != 0) { + const IR::U1 is_zero{ir.IEqual(result, zero)}; + SetZFlag(is_zero); + if (pset.bf != 0) { + ResetSFlag(); + } else { + SetSFlag(ir.LogicalNot(is_zero)); + } + ResetOFlag(); + ResetCFlag(); + } } } // namespace Shader::Maxwell -- cgit v1.2.3 From d404b871d595794184b8d80fc05682eb6e2792fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Apr 2021 16:46:26 -0300 Subject: shader: Mark ImageWrite with side effects --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ceb44e604..2df631791 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -93,6 +93,9 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::WriteSharedU128: + case Opcode::BindlessImageWrite: + case Opcode::BoundImageWrite: + case Opcode::ImageWrite: return true; default: return false; -- cgit v1.2.3 From 9280cd649a9c4cd53b929643377547db598bf5f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:22:20 -0300 Subject: shader: Move LaneId to the warp emission file and fix AMD --- src/shader_recompiler/frontend/ir/opcodes.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 82c5b37ba..86ea02560 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -58,7 +58,6 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) -OPCODE(LaneId, U32, ) // Undefined OPCODE(UndefU1, U1, ) @@ -419,6 +418,7 @@ OPCODE(ImageRead, U32x4, U32, OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) // Warp operations +OPCODE(LaneId, U32, ) OPCODE(VoteAll, U1, U1, ) OPCODE(VoteAny, U1, U1, ) OPCODE(VoteEqual, U1, U1, ) -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 200 ++++++++++++++++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 39 ++++ .../frontend/ir/microinstruction.cpp | 66 ++++++ src/shader_recompiler/frontend/ir/opcodes.inc | 70 +++++++ .../impl/atomic_operations_global_memory.cpp | 222 +++++++++++++++++++++ .../impl/atomic_operations_shared_memory.cpp | 110 ++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- 7 files changed, 706 insertions(+), 13 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 17be0c639..a3339f624 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); } +U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicIAdd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMin(pointer_offset, value) + : SharedAtomicUMin(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMax(pointer_offset, value) + : SharedAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicDec32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicAnd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicOr32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicXor32, pointer_offset, value); +} + +U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::SharedAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicIAdd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicIAdd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMin(pointer_offset, value) + : GlobalAtomicUMin(pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMax(pointer_offset, value) + : GlobalAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicDec32, pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicAnd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicAnd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicOr32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicOr64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicXor32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicXor64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst(Opcode::LogicalOr, a, b); } @@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst } void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { + TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; Inst(op, Flags{info}, handle, coords, color); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ec60070ef..f9cbf1304 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -228,6 +228,45 @@ public: [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); + + [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); + + [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 2df631791..0f66c5627 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::WriteSharedU128: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicInc32: + case Opcode::SharedAtomicDec32: + case Opcode::SharedAtomicAnd32: + case Opcode::SharedAtomicOr32: + case Opcode::SharedAtomicXor32: + case Opcode::SharedAtomicExchange32: + case Opcode::SharedAtomicExchange64: + case Opcode::GlobalAtomicIAdd32: + case Opcode::GlobalAtomicSMin32: + case Opcode::GlobalAtomicUMin32: + case Opcode::GlobalAtomicSMax32: + case Opcode::GlobalAtomicUMax32: + case Opcode::GlobalAtomicInc32: + case Opcode::GlobalAtomicDec32: + case Opcode::GlobalAtomicAnd32: + case Opcode::GlobalAtomicOr32: + case Opcode::GlobalAtomicXor32: + case Opcode::GlobalAtomicExchange32: + case Opcode::GlobalAtomicIAdd64: + case Opcode::GlobalAtomicSMin64: + case Opcode::GlobalAtomicUMin64: + case Opcode::GlobalAtomicSMax64: + case Opcode::GlobalAtomicUMax64: + case Opcode::GlobalAtomicAnd64: + case Opcode::GlobalAtomicOr64: + case Opcode::GlobalAtomicXor64: + case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicAddF32: + case Opcode::GlobalAtomicAddF16x2: + case Opcode::GlobalAtomicAddF32x2: + case Opcode::GlobalAtomicMinF16x2: + case Opcode::GlobalAtomicMinF32x2: + case Opcode::GlobalAtomicMaxF16x2: + case Opcode::GlobalAtomicMaxF32x2: + case Opcode::StorageAtomicIAdd32: + case Opcode::StorageAtomicSMin32: + case Opcode::StorageAtomicUMin32: + case Opcode::StorageAtomicSMax32: + case Opcode::StorageAtomicUMax32: + case Opcode::StorageAtomicInc32: + case Opcode::StorageAtomicDec32: + case Opcode::StorageAtomicAnd32: + case Opcode::StorageAtomicOr32: + case Opcode::StorageAtomicXor32: + case Opcode::StorageAtomicExchange32: + case Opcode::StorageAtomicIAdd64: + case Opcode::StorageAtomicSMin64: + case Opcode::StorageAtomicUMin64: + case Opcode::StorageAtomicSMax64: + case Opcode::StorageAtomicUMax64: + case Opcode::StorageAtomicAnd64: + case Opcode::StorageAtomicOr64: + case Opcode::StorageAtomicXor64: + case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicAddF32: + case Opcode::StorageAtomicAddF16x2: + case Opcode::StorageAtomicAddF32x2: + case Opcode::StorageAtomicMinF16x2: + case Opcode::StorageAtomicMinF32x2: + case Opcode::StorageAtomicMaxF16x2: + case Opcode::StorageAtomicMaxF32x2: case Opcode::BindlessImageWrite: case Opcode::BoundImageWrite: case Opcode::ImageWrite: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 86ea02560..dc776a73e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32, OPCODE(SGreaterThanEqual, U1, U32, U32, ) OPCODE(UGreaterThanEqual, U1, U32, U32, ) +// Atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicInc32, U32, U32, U32, ) +OPCODE(SharedAtomicDec32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange64, U64, U32, U64, ) + +OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicInc32, U32, U64, U32, ) +OPCODE(GlobalAtomicDec32, U32, U64, U32, ) +OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) +OPCODE(GlobalAtomicOr32, U32, U64, U32, ) +OPCODE(GlobalAtomicXor32, U32, U64, U32, ) +OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) +OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) +OPCODE(GlobalAtomicOr64, U64, U64, U64, ) +OPCODE(GlobalAtomicXor64, U64, U64, U64, ) +OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) +OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) + +OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) +OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) + // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,222 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::DontCare}, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + + const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; + const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; + const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + IR::Value result; + + if (AtomOpNotApplicable(atom.size, atom.op)) { + result = LoadGlobal(ir, offset, atom.size); + } else if (!is_integer) { + if (atom.size == AtomSize::F32) { + result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; + result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); + } + } else if (size_64) { + result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); + } else { + result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); + } + StoreResult(*this, atom.dest_reg, result, atom.size); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg_b; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + + if (AtomOpNotApplicable(red.size, red.op)) { + return; + } + const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; + const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; + const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + if (!is_integer) { + if (red.size == AtomSize::F32) { + ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; + ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); + } + } else if (size_64) { + ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); + } else { + ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -void TranslatorVisitor::ATOM(u64) { - ThrowNotImplemented(Opcode::ATOM); -} - void TranslatorVisitor::ATOMS_cas(u64) { ThrowNotImplemented(Opcode::ATOMS_cas); } -void TranslatorVisitor::ATOMS(u64) { - ThrowNotImplemented(Opcode::ATOMS); -} - void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -void TranslatorVisitor::RED(u64) { - ThrowNotImplemented(Opcode::RED); -} - void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } -- cgit v1.2.3 From c9337a4ae45639c0d5b6c83c30d098878f3c344a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 04:14:55 -0300 Subject: shader: Apply sign bit in FCMP (imm) --- .../frontend/maxwell/translate/impl/floating_point_compare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 80109ca0e..7127ebf54 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::FCMP_imm(u64 insn) { const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; const u32 value{static_cast(fcmp.value) << 12}; - FCMP(*this, insn, ir.Imm32(value), GetFloatReg39(insn)); + FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 5c61e860e4f83524ffce10ca447398e83de81640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:12 -0300 Subject: shader: Implement SR_THREAD_KILL --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ 4 files changed, 9 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index a3339f624..54a273a92 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -347,6 +347,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U1 IREmitter::IsHelperInvocation() { + return Inst(Opcode::IsHelperInvocation); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f9cbf1304..d04224707 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -90,6 +90,8 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U1 IsHelperInvocation(); + [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 LoadGlobalU8(const U64& address); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dc776a73e..f70008682 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -58,6 +58,7 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(IsHelperInvocation, U1, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index be1f21e7b..50650cc56 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_THREAD_KILL: + return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: -- cgit v1.2.3 From 2ed80f6b1e85823d7a13dfbb119545a0a0ec7427 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 19:16:47 -0300 Subject: shader: Implement LOP CC --- .../frontend/maxwell/translate/impl/logic_operation.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp index 89e5cd6de..92cd27ed4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp @@ -44,9 +44,6 @@ void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv if (x) { throw NotImplementedException("X"); } - if (cc) { - throw NotImplementedException("CC"); - } IR::U32 op_a{v.X(lop.src_reg)}; if (inv_a != 0) { op_a = v.ir.BitwiseNot(op_a); @@ -60,6 +57,17 @@ void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; v.ir.SetPred(dest_pred, pred_result); } + if (cc) { + if (bit_op == LogicalOp::PASS_B) { + v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); + v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); + } else { + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + } + v.ResetCFlag(); + v.ResetOFlag(); + } v.X(lop.dest_reg, result); } -- cgit v1.2.3 From dfd5341d7117e4299b6c34e8b1feb0e66c230478 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:40:00 -0300 Subject: shader: Mark blocks with no end branch as unreachable --- .../frontend/maxwell/structured_control_flow.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 02cef2645..e63e25aa6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -816,8 +816,13 @@ private: throw NotImplementedException("Statement type {}", stmt.type); } } - if (current_block && continue_block) { - IR::IREmitter{*current_block}.Branch(continue_block); + if (current_block) { + IR::IREmitter ir{*current_block}; + if (continue_block) { + ir.Branch(continue_block); + } else { + ir.Unreachable(); + } } } -- cgit v1.2.3 From 415c7e46ed2f00bb4611cf2913eac1b92ca130bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:54:51 -0300 Subject: shader: Simplify FLO and throw on CC --- .../maxwell/translate/impl/find_leading_one.cpp | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp index d5361bec5..f0cb25d61 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp @@ -8,26 +8,27 @@ namespace Shader::Maxwell { namespace { -void FLO(TranslatorVisitor& v, u64 insn, const IR::U32& src) { +void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; BitField<40, 1, u64> tilde; BitField<41, 1, u64> shift; + BitField<47, 1, u64> cc; BitField<48, 1, u64> is_signed; } const flo{insn}; - const bool invert{flo.tilde != 0}; - const bool is_signed{flo.is_signed != 0}; - const bool shift_op{flo.shift != 0}; - - const IR::U32 operand{invert ? v.ir.BitwiseNot(src) : src}; - const IR::U32 find_result{is_signed ? v.ir.FindSMsb(operand) : v.ir.FindUMsb(operand)}; - const IR::U1 find_fail{v.ir.IEqual(find_result, v.ir.Imm32(-1))}; - const IR::U32 offset{v.ir.Imm32(31)}; - const IR::U32 success_result{shift_op ? IR::U32{v.ir.ISub(offset, find_result)} : find_result}; - - const IR::U32 result{v.ir.Select(find_fail, find_result, success_result)}; + if (flo.cc != 0) { + throw NotImplementedException("CC"); + } + if (flo.tilde != 0) { + src = v.ir.BitwiseNot(src); + } + IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; + if (flo.shift != 0) { + const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; + result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; + } v.X(flo.dest_reg, result); } } // Anonymous namespace -- cgit v1.2.3 From 2516829e4cfa30378ce049a8c66dee9b3482d673 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:15:44 -0300 Subject: shader: Fix CC in I2I --- .../frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp index 2f1a58805..53e8d8923 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp @@ -105,6 +105,8 @@ void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { if (i2i.cc != 0) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); + v.ResetCFlag(); + v.ResetOFlag(); } } } // Anonymous namespace -- cgit v1.2.3 From f71208414775a6fca87130d2defcdeba75314084 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:33:18 -0300 Subject: shader: Stub ISBERD --- .../impl/internal_stage_buffer_entry_read.cpp | 55 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 4 -- 2 files changed, 55 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..8c01b7d30 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -0,0 +1,55 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +#pragma optimize("", off) + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +} // Anonymous namespace + +void TranslatorVisitor::ISBERD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<31, 1, u64> skew; + BitField<32, 1, u64> o; + BitField<33, 2, Mode> mode; + BitField<47, 2, Shift> shift; + } const isberd{insn}; + + if (isberd.skew != 0) { + throw NotImplementedException("SKEW"); + } + if (isberd.o != 0) { + throw NotImplementedException("O"); + } + if (isberd.mode != Mode::Default) { + throw NotImplementedException("Mode {}", isberd.mode.Value()); + } + if (isberd.shift != Shift::Default) { + throw NotImplementedException("Shift {}", isberd.shift.Value()); + } + // LOG_WARNING(..., "ISBERD is stubbed"); + X(isberd.dest_reg, X(isberd.src_reg)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index aebe3072a..694bdfccb 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -141,10 +141,6 @@ void TranslatorVisitor::IMUL32I(u64) { ThrowNotImplemented(Opcode::IMUL32I); } -void TranslatorVisitor::ISBERD(u64) { - ThrowNotImplemented(Opcode::ISBERD); -} - void TranslatorVisitor::JCAL(u64) { ThrowNotImplemented(Opcode::JCAL); } -- cgit v1.2.3 From 4b0172f6debf9ba595d5fd2d3e2329328513f5db Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 21:33:41 -0300 Subject: shader: Stub SR_INVOCATION_INFO --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 50650cc56..bc822d585 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -115,6 +115,9 @@ enum class SpecialRegister : u64 { switch (special_register) { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; + case SpecialRegister::SR_INVOCATION_INFO: + // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: @@ -128,10 +131,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_XY (Stubbed)"); + // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(ShaderDecompiler, "SR_WSCALEFACTOR_Z (Stubbed)"); + // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); -- cgit v1.2.3 From dd3432d357ce0bdf8bb295094c89bf659c939259 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Apr 2021 23:11:24 -0400 Subject: internal_stage_buffer_entry_read: Remove pragma optimize off --- .../maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 8c01b7d30..edd6220a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -6,8 +6,6 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#pragma optimize("", off) - namespace Shader::Maxwell { namespace { enum class Mode : u64 { -- cgit v1.2.3 From a6cef71cc0b03f929f1bc97152b302562f46bc53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 03:48:15 -0300 Subject: shader: Implement OUT --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 3 ++ .../frontend/ir/microinstruction.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/load_store_attribute.cpp | 7 +--- .../maxwell/translate/impl/not_implemented.cpp | 12 ------ .../maxwell/translate/impl/output_geometry.cpp | 45 ++++++++++++++++++++++ 7 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 54a273a92..7d48fa1ba 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,14 @@ void IREmitter::Epilogue() { Inst(Opcode::Epilogue); } +void IREmitter::EmitVertex(const U32& stream) { + Inst(Opcode::EmitVertex, stream); +} + +void IREmitter::EndPrimitive(const U32& stream) { + Inst(Opcode::EndPrimitive, stream); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index d04224707..033c4332e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -43,6 +43,9 @@ public: void Prologue(); void Epilogue(); + void EmitVertex(const U32& stream); + void EndPrimitive(const U32& stream); + [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 0f66c5627..204c55fa8 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -69,6 +69,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::MemoryBarrierSystemLevel: case Opcode::Prologue: case Opcode::Epilogue: + case Opcode::EmitVertex: + case Opcode::EndPrimitive: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::SetFragColor: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index f70008682..0e487f1a7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -25,6 +25,8 @@ OPCODE(MemoryBarrierSystemLevel, Void, // Special operations OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) +OPCODE(EmitVertex, Void, U32, ) +OPCODE(EndPrimitive, Void, U32, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index f629e7167..79293bd6b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) { BitField<8, 8, IR::Reg> index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> stream_reg; + BitField<39, 8, IR::Reg> array_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -100,16 +100,13 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> stream_reg; + BitField<39, 8, IR::Reg> array_reg; BitField<47, 2, Size> size; } const ast{insn}; if (ast.patch != 0) { throw NotImplementedException("P"); } - if (ast.stream_reg != IR::Reg::RZ) { - throw NotImplementedException("Stream store"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 694bdfccb..a45d1e4be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -169,18 +169,6 @@ void TranslatorVisitor::NOP(u64) { // NOP is No-Op. } -void TranslatorVisitor::OUT_reg(u64) { - ThrowNotImplemented(Opcode::OUT_reg); -} - -void TranslatorVisitor::OUT_cbuf(u64) { - ThrowNotImplemented(Opcode::OUT_cbuf); -} - -void TranslatorVisitor::OUT_imm(u64) { - ThrowNotImplemented(Opcode::OUT_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp @@ -0,0 +1,45 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> output_reg; // Not needed on host + BitField<39, 1, u64> emit; + BitField<40, 1, u64> cut; + } const out{insn}; + + stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); + + if (out.emit != 0) { + v.ir.EmitVertex(stream_index); + } + if (out.cut != 0) { + v.ir.EndPrimitive(stream_index); + } + // Host doesn't need the output register, but we can write to it to avoid undefined reads + v.X(out.dest_reg, v.ir.Imm32(0)); +} +} // Anonymous namespace + +void TranslatorVisitor::OUT_reg(u64 insn) { + OUT(*this, insn, GetReg20(insn)); +} + +void TranslatorVisitor::OUT_cbuf(u64 insn) { + OUT(*this, insn, GetCbuf(insn)); +} + +void TranslatorVisitor::OUT_imm(u64 insn) { + OUT(*this, insn, GetImm20(insn)); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From f263760c5a3aff771123b32b15677e1f7a089640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:22 -0300 Subject: shader: Implement geometry shaders --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 20 ++++++++++++++------ src/shader_recompiler/frontend/ir/ir_emitter.h | 6 ++++-- src/shader_recompiler/frontend/ir/opcodes.inc | 8 ++++---- src/shader_recompiler/frontend/ir/program.h | 4 ++++ src/shader_recompiler/frontend/maxwell/program.cpp | 13 ++++++++++++- .../maxwell/translate/impl/load_store_attribute.cpp | 16 ++++++++++------ 6 files changed, 48 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7d48fa1ba..d66eb17a6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -308,19 +308,27 @@ U1 IREmitter::GetFlowTestResult(FlowTest test) { } F32 IREmitter::GetAttribute(IR::Attribute attribute) { - return Inst(Opcode::GetAttribute, attribute); + return GetAttribute(attribute, Imm32(0)); } -void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { - Inst(Opcode::SetAttribute, attribute, value); +F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { + return Inst(Opcode::GetAttribute, attribute, vertex); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttribute, attribute, value, vertex); } F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { - return Inst(Opcode::GetAttributeIndexed, phys_address); + return GetAttributeIndexed(phys_address, Imm32(0)); +} + +F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { + return Inst(Opcode::GetAttributeIndexed, phys_address, vertex); } -void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { - Inst(Opcode::SetAttributeIndexed, phys_address, value); +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 033c4332e..e70359eb1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -77,10 +77,12 @@ public: [[nodiscard]] U1 GetFlowTestResult(FlowTest test); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); - void SetAttribute(IR::Attribute attribute, const F32& value); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); + void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); - void SetAttributeIndexed(const U32& phys_address, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); + void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0e487f1a7..7a21fe746 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -44,10 +44,10 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU32x2, U32x2, U32, U32, ) -OPCODE(GetAttribute, F32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, F32, ) -OPCODE(GetAttributeIndexed, F32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetAttributeIndexed, F32, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 3a37b3ab9..51e1a8c77 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/program_header.h" #include "shader_recompiler/shader_info.h" #include "shader_recompiler/stage.h" @@ -21,6 +22,9 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + OutputTopology output_topology{}; + u32 output_vertices{}; + u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aaf2a74a7..ab67446c8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ald.vertex_reg)}; const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); } return; } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<47, 2, Size> size; } const ast{insn}; @@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ast.vertex_reg)}; const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); } return; } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); } -- cgit v1.2.3 From 09165ae18989c17661faf188e6825a9eb4e03a27 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 06:11:18 -0300 Subject: shader: Document and relax cache control on surface instructions --- .../maxwell/translate/impl/surface_load_store.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 9a2d16a6e..e1b8aa8ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -61,18 +61,19 @@ enum class Clamp : u64 { TRAP, }; +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators enum class LoadCache : u64 { - Default, - CG, - CI, - CV, + CA, // Cache at all levels, likely to be accessed again + CG, // Cache at global level (L2 and below, not L1) + CI, // ??? + CV, // Don't cache and fetch again (volatile) }; enum class StoreCache : u64 { - Default, - CG, - CS, - WT, + WB, // Cache write-back all coherent levels + CG, // Cache at global level (L2 and below, not L1) + CS, // Cache streaming, likely to be accessed once + WT, // Cache write-through (to system memory, volatile?) }; ImageFormat Format(Size size) { @@ -188,7 +189,7 @@ void TranslatorVisitor::SULD(u64 insn) { if (suld.clamp != Clamp::IGN) { throw NotImplementedException("Clamp {}", suld.clamp.Value()); } - if (suld.cache != LoadCache::Default) { + if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { throw NotImplementedException("Cache {}", suld.cache.Value()); } const bool is_typed{suld.d != 0}; @@ -248,7 +249,7 @@ void TranslatorVisitor::SUST(u64 insn) { if (sust.clamp != Clamp::IGN) { throw NotImplementedException("Clamp {}", sust.clamp.Value()); } - if (sust.cache != StoreCache::Default) { + if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { throw NotImplementedException("Cache {}", sust.cache.Value()); } const bool is_typed{sust.d != 0}; -- cgit v1.2.3 From b126987c59964d81ae3705ad7ad6c0ace8714e19 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 01:04:59 -0300 Subject: shader: Implement transform feedbacks and define file format --- src/shader_recompiler/frontend/ir/attribute.cpp | 7 +++++++ src/shader_recompiler/frontend/ir/attribute.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7993e5c43..4d0b8b8e5 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -20,6 +20,13 @@ u32 GenericAttributeIndex(Attribute attribute) { return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } +u32 GenericAttributeElement(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return static_cast(attribute) % 4; +} + std::string NameOf(Attribute attribute) { switch (attribute) { case Attribute::PrimitiveId: diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 34ec7e0cd..8bf2ddf30 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -226,6 +226,8 @@ enum class Attribute : u64 { [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeElement(Attribute attribute); + [[nodiscard]] std::string NameOf(Attribute attribute); } // namespace Shader::IR -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + src/shader_recompiler/frontend/ir/patch.cpp | 28 ++++ src/shader_recompiler/frontend/ir/patch.h | 149 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/type.h | 41 +++--- src/shader_recompiler/frontend/ir/value.cpp | 9 ++ src/shader_recompiler/frontend/ir/value.h | 4 + src/shader_recompiler/frontend/maxwell/program.cpp | 5 + .../translate/impl/load_store_attribute.cpp | 33 +++-- .../translate/impl/move_special_register.cpp | 2 + 13 files changed, 259 insertions(+), 33 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/patch.cpp create mode 100644 src/shader_recompiler/frontend/ir/patch.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d66eb17a6..b821d9f47 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -331,6 +331,14 @@ void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, c Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } @@ -363,6 +371,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U32 IREmitter::InvocationId() { + return Inst(Opcode::InvocationId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e70359eb1..7f8f1ad42 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -84,6 +84,9 @@ public: [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); @@ -95,6 +98,7 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U32 InvocationId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 204c55fa8..b2d7573d9 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -73,6 +73,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::EndPrimitive: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetPatch: case Opcode::SetFragColor: case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 7d3e0b2ab..7f04b647b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -24,6 +24,7 @@ constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 7a21fe746..a86542cd8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -48,6 +48,8 @@ OPCODE(GetAttribute, F32, Attr OPCODE(SetAttribute, Void, Attribute, F32, U32, ) OPCODE(GetAttributeIndexed, F32, U32, U32, ) OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) @@ -60,6 +62,7 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(InvocationId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..1f770bc48 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/patch.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +u32 GenericPatchIndex(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +u32 GenericPatchElement(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + ComponentPadding0, + ComponentPadding1, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 127); + +[[nodiscard]] bool IsGeneric(Patch patch) noexcept; + +[[nodiscard]] u32 GenericPatchIndex(Patch patch); + +[[nodiscard]] u32 GenericPatchElement(Patch patch); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 9a32ca1e8..8b3b33852 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -20,26 +20,27 @@ enum class Type { Reg = 1 << 2, Pred = 1 << 3, Attribute = 1 << 4, - U1 = 1 << 5, - U8 = 1 << 6, - U16 = 1 << 7, - U32 = 1 << 8, - U64 = 1 << 9, - F16 = 1 << 10, - F32 = 1 << 11, - F64 = 1 << 12, - U32x2 = 1 << 13, - U32x3 = 1 << 14, - U32x4 = 1 << 15, - F16x2 = 1 << 16, - F16x3 = 1 << 17, - F16x4 = 1 << 18, - F32x2 = 1 << 19, - F32x3 = 1 << 20, - F32x4 = 1 << 21, - F64x2 = 1 << 22, - F64x3 = 1 << 23, - F64x4 = 1 << 24, + Patch = 1 << 5, + U1 = 1 << 6, + U8 = 1 << 7, + U16 = 1 << 8, + U32 = 1 << 9, + U64 = 1 << 10, + F16 = 1 << 11, + F32 = 1 << 12, + F64 = 1 << 13, + U32x2 = 1 << 14, + U32x3 = 1 << 15, + U32x4 = 1 << 16, + F16x2 = 1 << 17, + F16x3 = 1 << 18, + F16x4 = 1 << 19, + F32x2 = 1 << 20, + F32x3 = 1 << 21, + F32x4 = 1 << 22, + F64x2 = 1 << 23, + F64x3 = 1 << 24, + F64x4 = 1 << 25, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e7ffb86d..bf5f8c0c2 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -18,6 +18,8 @@ Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} @@ -109,6 +111,11 @@ IR::Attribute Value::Attribute() const { return attribute; } +IR::Patch Value::Patch() const { + ValidateAccess(Type::Patch); + return patch; +} + bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); @@ -182,6 +189,8 @@ bool Value::operator==(const Value& other) const { return pred == other.pred; case Type::Attribute: return attribute == other.attribute; + case Type::Patch: + return patch == other.patch; case Type::U1: return imm_u1 == other.imm_u1; case Type::U8: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index a0962863d..303745563 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -9,6 +9,7 @@ #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { @@ -24,6 +25,7 @@ public: explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch value) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -46,6 +48,7 @@ public: [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -67,6 +70,7 @@ private: IR::Reg reg; IR::Pred pred; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ab67446c8..20a1d61cc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -70,6 +70,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool size; } const ald{insn}; - if (ald.o != 0) { - throw NotImplementedException("O"); - } - if (ald.patch != 0) { - throw NotImplementedException("P"); - } const u64 offset{ald.absolute_offset.Value()}; if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); @@ -84,11 +78,19 @@ void TranslatorVisitor::ALD(u64 insn) { const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + } } return; } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); @@ -106,9 +108,6 @@ void TranslatorVisitor::AST(u64 insn) { BitField<47, 2, Size> size; } const ast{insn}; - if (ast.patch != 0) { - throw NotImplementedException("P"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } @@ -120,11 +119,19 @@ void TranslatorVisitor::AST(u64 insn) { const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + element)); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + } } return; } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index bc822d585..660b84c20 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: -- cgit v1.2.3 From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: shader: Implement PIXLD.MY_INDEX --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/pixel_load.cpp | 46 ++++++++++++++++++++++ 5 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b821d9f47..141efd86c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -375,6 +375,10 @@ U32 IREmitter::InvocationId() { return Inst(Opcode::InvocationId); } +U32 IREmitter::SampleId() { + return Inst(Opcode::SampleId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7f8f1ad42..81833d928 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,7 @@ public: [[nodiscard]] U32 LocalInvocationIdZ(); [[nodiscard]] U32 InvocationId(); + [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a86542cd8..d5e443673 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -63,6 +63,7 @@ OPCODE(SetOFlag, Void, U1, OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) OPCODE(InvocationId, U32, ) +OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a45d1e4be..a4f99bbbe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,10 +181,6 @@ void TranslatorVisitor::PEXIT(u64) { ThrowNotImplemented(Opcode::PEXIT); } -void TranslatorVisitor::PIXLD(u64) { - ThrowNotImplemented(Opcode::PIXLD); -} - void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 80940b17069f6baa733a9b572445b27bc7509137 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 18:47:26 -0300 Subject: shader: Implement SampleMask --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/exit_program.cpp | 2 +- 5 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 141efd86c..ef3b00bc2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -343,6 +343,10 @@ void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } +void IREmitter::SetSampleMask(const U32& value) { + Inst(Opcode::SetSampleMask, value); +} + void IREmitter::SetFragDepth(const F32& value) { Inst(Opcode::SetFragDepth, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 81833d928..1a585df15 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -88,6 +88,7 @@ public: void SetPatch(Patch patch, const F32& value); void SetFragColor(u32 index, u32 component, const F32& value); + void SetSampleMask(const U32& value); void SetFragDepth(const F32& value); [[nodiscard]] U32 WorkgroupIdX(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b2d7573d9..b53fe2e2a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -75,6 +75,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SetAttributeIndexed: case Opcode::SetPatch: case Opcode::SetFragColor: + case Opcode::SetSampleMask: case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index d5e443673..0748efa8d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -51,6 +51,7 @@ OPCODE(SetAttributeIndexed, Void, U32, OPCODE(GetPatch, F32, Patch, ) OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetSampleMask, Void, U32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp index 58a53c0ec..c2443c886 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -22,7 +22,7 @@ void ExitFragment(TranslatorVisitor& v) { } } if (sph.ps.omap.sample_mask != 0) { - throw NotImplementedException("Sample mask"); + v.ir.SetSampleMask(v.X(src_reg)); } if (sph.ps.omap.depth != 0) { v.ir.SetFragDepth(v.F(src_reg + 1)); -- cgit v1.2.3 From be431f5ed080955cce358e9750347229b2bc9a04 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Apr 2021 00:48:35 -0400 Subject: shader: Implement BFE and BFI CC Fix two bugs in BFI. --- .../maxwell/translate/impl/bitfield_extract.cpp | 11 +++++++---- .../frontend/maxwell/translate/impl/bitfield_insert.cpp | 17 +++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp index 0738ae7a6..9d5a87e52 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp @@ -18,10 +18,6 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { BitField<48, 1, u64> is_signed; } const bfe{insn}; - if (bfe.cc != 0) { - throw NotImplementedException("BFE CC"); - } - const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; @@ -53,6 +49,13 @@ void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { result = IR::U32{v.ir.Select(zero_count, zero, result)}; v.X(bfe.dest_reg, result); + + if (bfe.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp index fb7f821e6..1e1ec2119 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp @@ -16,18 +16,14 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba BitField<47, 1, u64> cc; } const bfi{insn}; - if (bfi.cc != 0) { - throw NotImplementedException("BFI CC"); - } - - const IR::U32 offset{v.ir.BitFieldExtract(src_a, v.ir.Imm32(0), v.ir.Imm32(8), false)}; + const IR::U32 zero{v.ir.Imm32(0)}; + const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; const IR::U32 max_size{v.ir.Imm32(32)}; // Edge case conditions - const IR::U1 zero_offset{v.ir.IEqual(offset, v.ir.Imm32(0))}; const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; - const IR::U1 exceed_count{v.ir.IGreaterThanEqual(unsafe_count, max_size, false)}; + const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; @@ -36,9 +32,14 @@ void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& ba IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; result = IR::U32{v.ir.Select(exceed_offset, base, result)}; - result = IR::U32{v.ir.Select(zero_offset, base, result)}; v.X(bfi.dest_reg, result); + if (bfi.cc != 0) { + v.SetZFlag(v.ir.IEqual(result, zero)); + v.SetSFlag(v.ir.ILessThan(result, zero, true)); + v.ResetCFlag(); + v.ResetOFlag(); + } } } // Anonymous namespace -- cgit v1.2.3 From 0a0818c0259b4f90f1f7bb37fcffbc1f194ca4d0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 03:21:03 -0300 Subject: shader: Fix memory barriers --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 20 +++++---------- src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++-- .../frontend/ir/microinstruction.cpp | 5 ++-- src/shader_recompiler/frontend/ir/modifiers.h | 8 ------ src/shader_recompiler/frontend/ir/opcodes.inc | 5 ++-- .../maxwell/translate/impl/barrier_operations.cpp | 30 ++++++++-------------- 6 files changed, 23 insertions(+), 50 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ef3b00bc2..aebe7200f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -86,20 +86,12 @@ void IREmitter::Barrier() { Inst(Opcode::Barrier); } -void IREmitter::MemoryBarrier(MemoryScope scope) { - switch (scope) { - case MemoryScope::Workgroup: - Inst(Opcode::MemoryBarrierWorkgroupLevel); - break; - case MemoryScope::Device: - Inst(Opcode::MemoryBarrierDeviceLevel); - break; - case MemoryScope::System: - Inst(Opcode::MemoryBarrierSystemLevel); - break; - default: - throw InvalidArgument("Invalid memory scope {}", scope); - } +void IREmitter::WorkgroupMemoryBarrier() { + Inst(Opcode::WorkgroupMemoryBarrier); +} + +void IREmitter::DeviceMemoryBarrier() { + Inst(Opcode::DeviceMemoryBarrier); } void IREmitter::Return() { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1a585df15..b9d051b43 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -144,8 +144,9 @@ public: [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); - [[nodiscard]] void Barrier(); - [[nodiscard]] void MemoryBarrier(MemoryScope scope); + void Barrier(); + void WorkgroupMemoryBarrier(); + void DeviceMemoryBarrier(); template [[nodiscard]] Dest BitCast(const Source& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b53fe2e2a..efa426808 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -64,9 +64,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: - case Opcode::MemoryBarrierWorkgroupLevel: - case Opcode::MemoryBarrierDeviceLevel: - case Opcode::MemoryBarrierSystemLevel: + case Opcode::WorkgroupMemoryBarrier: + case Opcode::DeviceMemoryBarrier: case Opcode::Prologue: case Opcode::Epilogue: case Opcode::EmitVertex: diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 447e9703c..5d7efa14c 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -25,14 +25,6 @@ enum class FpRounding : u8 { RZ, // Round towards zero }; -enum class MemoryScope : u32 { - DontCare, - Warp, - Workgroup, - Device, - System, -}; - struct FpControl { bool no_contraction{false}; FpRounding rounding{FpRounding::DontCare}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0748efa8d..1cfc2a943 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -18,9 +18,8 @@ OPCODE(DemoteToHelperInvocation, Void, Labe // Barriers OPCODE(Barrier, Void, ) -OPCODE(MemoryBarrierWorkgroupLevel, Void, ) -OPCODE(MemoryBarrierDeviceLevel, Void, ) -OPCODE(MemoryBarrierSystemLevel, Void, ) +OPCODE(WorkgroupMemoryBarrier, Void, ) +OPCODE(DeviceMemoryBarrier, Void, ) // Special operations OPCODE(Prologue, Void, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp index 2a2a294df..86e433e41 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp @@ -12,34 +12,24 @@ namespace Shader::Maxwell { namespace { // Seems to be in CUDA terminology. enum class LocalScope : u64 { - CTG = 0, - GL = 1, - SYS = 2, - VC = 3, + CTA, + GL, + SYS, + VC, }; - -IR::MemoryScope LocalScopeToMemoryScope(LocalScope scope) { - switch (scope) { - case LocalScope::CTG: - return IR::MemoryScope::Workgroup; - case LocalScope::GL: - return IR::MemoryScope::Device; - case LocalScope::SYS: - return IR::MemoryScope::System; - default: - throw NotImplementedException("Unimplemented Local Scope {}", scope); - } -} - } // Anonymous namespace void TranslatorVisitor::MEMBAR(u64 inst) { union { u64 raw; BitField<8, 2, LocalScope> scope; - } membar{inst}; + } const membar{inst}; - ir.MemoryBarrier(LocalScopeToMemoryScope(membar.scope)); + if (membar.scope == LocalScope::CTA) { + ir.WorkgroupMemoryBarrier(); + } else { + ir.DeviceMemoryBarrier(); + } } void TranslatorVisitor::DEPBAR() { -- cgit v1.2.3 From 50f8007172ce143a632270510f96093c82018952 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 17 Apr 2021 16:40:35 -0300 Subject: shader: Fix Phi node types --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index efa426808..7555ac00a 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -275,10 +275,6 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { Use(value); } - if (Flags() == IR::Type::Void) { - // Set the type of the phi node - SetFlags(value.Type()); - } phi_args.emplace_back(predecessor, value); } -- cgit v1.2.3 From f18a6dd1bdaffda4c3e771af3cf7cf41919ebd67 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 16 Apr 2021 23:52:58 +0200 Subject: shader: Implement SR_Y_DIRECTION --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ 4 files changed, 8 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index aebe7200f..c3e8d0681 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -379,6 +379,10 @@ U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } +F32 IREmitter::YDirection() { + return Inst(Opcode::YDirection); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index b9d051b43..7e67f5e30 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -102,6 +102,7 @@ public: [[nodiscard]] U32 InvocationId(); [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); + [[nodiscard]] F32 YDirection(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1cfc2a943..269de8ca5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -65,6 +65,7 @@ OPCODE(LocalInvocationId, U32x3, OPCODE(InvocationId, U32, ) OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) +OPCODE(YDirection, F32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 660b84c20..b0baff74b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -150,6 +150,8 @@ enum class SpecialRegister : u64 { return ir.SubgroupGtMask(); case SpecialRegister::SR_GEMASK: return ir.SubgroupGeMask(); + case SpecialRegister::SR_Y_DIRECTION: + return ir.BitCast(ir.YDirection()); default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From 04c459fc8d99b41fa8a03c49523599e9bf797f9d Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Apr 2021 11:56:45 +0200 Subject: shader: Implement fine derivates constant propagation --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ 3 files changed, 14 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index c3e8d0681..845a57b1e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1925,4 +1925,12 @@ F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpCon return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); } +F32 IREmitter::DPdxFine(const F32& a) { + return Inst(Opcode::DPdxFine, a); +} + +F32 IREmitter::DPdyFine(const F32& a) { + return Inst(Opcode::DPdyFine, a); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7e67f5e30..c7101d668 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -353,6 +353,10 @@ public: [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control = {}); + [[nodiscard]] F32 DPdxFine(const F32& a); + + [[nodiscard]] F32 DPdyFine(const F32& a); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 269de8ca5..e4cb8964a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -511,3 +511,5 @@ OPCODE(ShuffleUp, U32, U32, OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) +OPCODE(DPdxFine, F32, F32, ) +OPCODE(DPdyFine, F32, F32, ) -- cgit v1.2.3 From 080857b60e78836901cf6e9601f48613812fcd04 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 17 Apr 2021 12:51:43 +0200 Subject: shader: Add coarse derivatives --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ 3 files changed, 14 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 845a57b1e..b3c9fe72a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1933,4 +1933,12 @@ F32 IREmitter::DPdyFine(const F32& a) { return Inst(Opcode::DPdyFine, a); } +F32 IREmitter::DPdxCoarse(const F32& a) { + return Inst(Opcode::DPdxCoarse, a); +} + +F32 IREmitter::DPdyCoarse(const F32& a) { + return Inst(Opcode::DPdyCoarse, a); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index c7101d668..4441c495d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -357,6 +357,10 @@ public: [[nodiscard]] F32 DPdyFine(const F32& a); + [[nodiscard]] F32 DPdxCoarse(const F32& a); + + [[nodiscard]] F32 DPdyCoarse(const F32& a); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e4cb8964a..b6869d4e4 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -513,3 +513,5 @@ OPCODE(ShuffleButterfly, U32, U32, OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) OPCODE(DPdxFine, F32, F32, ) OPCODE(DPdyFine, F32, F32, ) +OPCODE(DPdxCoarse, F32, F32, ) +OPCODE(DPdyCoarse, F32, F32, ) -- cgit v1.2.3 From 21a878237bcc6f19f41a4bce156714fd76be9d58 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 03:41:37 +0200 Subject: shader: Implement IADD3.CC/.X --- .../translate/impl/integer_add_three_input.cpp | 29 ++++++++++++++++------ 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index c2dbd7998..e88c0ffb6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -58,13 +58,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { BitField<51, 1, u64> neg_a; } iadd3{insn}; - if (iadd3.x != 0) { - throw NotImplementedException("IADD3 X"); - } - if (iadd3.cc != 0) { - throw NotImplementedException("IADD3 CC"); - } - IR::U32 op_a{v.X(iadd3.src_a)}; op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); @@ -81,10 +74,32 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { } IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; + IR::U1 of_1; + if (iadd3.cc != 0) { + of_1 = v.ir.GetOverflowFromOp(lhs); + } + if (iadd3.x != 0) { + const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; + lhs = v.ir.IAdd(lhs, carry); + } + if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { + IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; + of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); + } lhs = IntegerShift(v.ir, lhs, iadd3.shift); const IR::U32 result{v.ir.IAdd(lhs, op_c)}; v.X(iadd3.dest_reg, result); + if (iadd3.cc != 0) { + // TODO: How does CC behave when X is set? + if (iadd3.x != 0) { + throw NotImplementedException("IADD3 X+CC"); + } + v.SetZFlag(v.ir.GetZeroFromOp(result)); + v.SetSFlag(v.ir.GetSignFromOp(result)); + v.SetCFlag(v.ir.GetCarryFromOp(result)); + v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); + } } } // Anonymous namespace -- cgit v1.2.3 From 881b33da3ba16fc105c6ccd20f6fbc9c4552ead9 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 14 Apr 2021 03:42:40 +0200 Subject: shader: Implement F2F (Imm) --- .../floating_point_conversion_floating_point.cpp | 30 ++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index ce2cf470d..61484df57 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -179,7 +179,33 @@ void TranslatorVisitor::F2F_cbuf(u64 insn) { } void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { - throw NotImplementedException("Instruction"); -} + union { + u64 insn; + BitField<49, 1, u64> abs; + BitField<10, 2, FloatFormat> src_size; + BitField<41, 1, u64> selector; + BitField<20, 20, u64> imm; + + } const f2f{insn}; + + IR::F16F32F64 src_a; + switch (f2f.src_size) { + case FloatFormat::F16: { + const u32 imm{static_cast(f2f.imm & 0x00ffff)}; + IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, 0)}; + break; + } + case FloatFormat::F32: + src_a = GetFloatImm20(insn); + break; + case FloatFormat::F64: + src_a = GetDoubleImm20(insn); + break; + default: + throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); + } + F2F(*this, insn, src_a, f2f.abs != 0); +} // namespace Shader::Maxwell } // namespace Shader::Maxwell -- cgit v1.2.3 From 29990289767c41c162473c9775ad3ba08e7ee9ea Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 10:08:22 +0200 Subject: shader: Address feedback --- .../impl/floating_point_conversion_floating_point.cpp | 15 +++++++++------ .../maxwell/translate/impl/integer_add_three_input.cpp | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 61484df57..02ab023c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -184,16 +184,19 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { BitField<49, 1, u64> abs; BitField<10, 2, FloatFormat> src_size; BitField<41, 1, u64> selector; - BitField<20, 20, u64> imm; - + BitField<20, 19, u64> imm; + BitField<56, 1, u64> imm_neg; } const f2f{insn}; IR::F16F32F64 src_a; switch (f2f.src_size) { case FloatFormat::F16: { - const u32 imm{static_cast(f2f.imm & 0x00ffff)}; - IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; - src_a = IR::F16{ir.CompositeExtract(vector, 0)}; + const u32 imm{static_cast(f2f.imm & 0x0000ffff)}; + const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; + src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; + if (f2f.imm_neg != 0) { + throw NotImplementedException("Neg bit on F16"); + } break; } case FloatFormat::F32: @@ -206,6 +209,6 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); } F2F(*this, insn, src_a, f2f.abs != 0); -} // namespace Shader::Maxwell +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index e88c0ffb6..15da90365 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -83,7 +83,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { lhs = v.ir.IAdd(lhs, carry); } if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { - IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; + const IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); } lhs = IntegerShift(v.ir, lhs, iadd3.shift); -- cgit v1.2.3 From 7018e524f5e6217b3259333acc4ea09ad036d331 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 19 Apr 2021 16:33:23 -0300 Subject: shader: Add NVN storage buffer fallbacks When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime. --- src/shader_recompiler/frontend/maxwell/program.cpp | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 20a1d61cc..14180dcd9 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { }(); } } + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + continue; + } + // Assume these are written for now + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = true, + }); + } +} } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, @@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Mon, 19 Apr 2021 16:36:17 -0300 Subject: shader: Simplify code for local memory --- .../maxwell/translate/impl/load_store_local_shared.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index e24b49721..20df163f2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -34,6 +34,15 @@ IR::U32 Offset(TranslatorVisitor& v, u64 insn) { } } +std::pair WordOffset(TranslatorVisitor& v, u64 insn) { + const IR::U32 offset{Offset(v, insn)}; + if (offset.IsImmediate()) { + return {v.ir.Imm32(offset.U32() / 4), offset}; + } else { + return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; + } +} + std::pair GetSize(u64 insn) { union { u64 raw; @@ -79,9 +88,7 @@ IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { } // Anonymous namespace void TranslatorVisitor::LDL(u64 insn) { - const IR::U32 offset{Offset(*this, insn)}; - const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; - + const auto [word_offset, offset]{WordOffset(*this, insn)}; const IR::Reg dest{Reg(insn)}; const auto [bit_size, is_signed]{GetSize(insn)}; switch (bit_size) { @@ -133,9 +140,7 @@ void TranslatorVisitor::LDS(u64 insn) { } void TranslatorVisitor::STL(u64 insn) { - const IR::U32 offset{Offset(*this, insn)}; - const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; - + const auto [word_offset, offset]{WordOffset(*this, insn)}; const IR::Reg reg{Reg(insn)}; const IR::U32 src{X(reg)}; const int bit_size{GetSize(insn).first}; -- cgit v1.2.3 From 79c2e43fcd5a254121d48e6957ac159041c4fac0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 22:20:46 -0300 Subject: shader: Calculate number of arguments in an opcode at compile time --- src/shader_recompiler/frontend/ir/opcodes.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 7f04b647b..4207d548c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -57,6 +57,17 @@ constexpr std::array META_TABLE{ #undef OPCODE }; +constexpr size_t CalculateNumArgsOf(Opcode op) { + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); +} + +constexpr std::array NUM_ARGS{ +#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), +#include "opcodes.inc" +#undef OPCODE +}; + void ValidateOpcode(Opcode op) { const size_t raw{static_cast(op)}; if (raw >= META_TABLE.size()) { @@ -72,9 +83,7 @@ Type TypeOf(Opcode op) { size_t NumArgsOf(Opcode op) { ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - const auto distance{std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))}; - return static_cast(distance); + return NUM_ARGS[static_cast(op)]; } Type ArgTypeOf(Opcode op, size_t arg_index) { -- cgit v1.2.3 From 24cc29866036350e538100a64ea8bcd1bfba1468 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 22:21:16 -0300 Subject: shader: Use a small_vector for phi blocks --- src/shader_recompiler/frontend/ir/microinstruction.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 97dc91d85..dc9f683fe 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -10,6 +10,7 @@ #include #include +#include #include #include "common/bit_cast.h" @@ -131,7 +132,7 @@ private: u32 definition{}; union { NonTriviallyDummy dummy{}; - std::vector> phi_args; + boost::container::small_vector, 2> phi_args; std::array args; }; std::unique_ptr associated_insts; -- cgit v1.2.3 From 4bbe5303376e693d15d7de80b25f5fda783281ce Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 22:28:06 -0300 Subject: shader: Inline common IR::Block methods --- src/shader_recompiler/frontend/ir/basic_block.cpp | 12 ------------ src/shader_recompiler/frontend/ir/basic_block.h | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index e1f0191f4..f92fc2571 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -69,24 +69,12 @@ u32 Block::LocationEnd() const noexcept { return location_end; } -Block::InstructionList& Block::Instructions() noexcept { - return instructions; -} - -const Block::InstructionList& Block::Instructions() const noexcept { - return instructions; -} - void Block::AddImmediatePredecessor(Block* block) { if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { imm_predecessors.push_back(block); } } -std::span Block::ImmediatePredecessors() const noexcept { - return imm_predecessors; -} - static std::string BlockToIndex(const std::map& block_to_index, Block* block) { if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index b14a35ec5..6a1d615d9 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -59,15 +59,22 @@ public: /// Gets the end location for this basic block. [[nodiscard]] u32 LocationEnd() const noexcept; + /// Adds a new immediate predecessor to this basic block. + void AddImmediatePredecessor(Block* block); + /// Gets a mutable reference to the instruction list for this basic block. - [[nodiscard]] InstructionList& Instructions() noexcept; + [[nodiscard]] InstructionList& Instructions() noexcept { + return instructions; + } /// Gets an immutable reference to the instruction list for this basic block. - [[nodiscard]] const InstructionList& Instructions() const noexcept; + [[nodiscard]] const InstructionList& Instructions() const noexcept { + return instructions; + } - /// Adds a new immediate predecessor to this basic block. - void AddImmediatePredecessor(Block* block); /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + [[nodiscard]] std::span ImmediatePredecessors() const noexcept { + return imm_predecessors; + } /// Intrusively store the host definition of this instruction. template -- cgit v1.2.3 From 6944cabb899c4367a63cde97ae2bc2eb1a0fb790 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:25:46 -0300 Subject: shader: Inline common Opcode and Inst functions --- .../frontend/ir/microinstruction.cpp | 18 ----- .../frontend/ir/microinstruction.h | 13 +++- src/shader_recompiler/frontend/ir/opcodes.cpp | 90 +--------------------- src/shader_recompiler/frontend/ir/opcodes.h | 74 +++++++++++++++++- 4 files changed, 83 insertions(+), 112 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 7555ac00a..41f9fa0cd 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -221,28 +221,10 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { } } -size_t Inst::NumArgs() const { - return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); -} - IR::Type Inst::Type() const { return TypeOf(op); } -Value Inst::Arg(size_t index) const { - if (op == Opcode::Phi) { - if (index >= phi_args.size()) { - throw InvalidArgument("Out of bounds argument index {} in phi instruction", index); - } - return phi_args[index].second; - } else { - if (index >= NumArgsOf(op)) { - throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); - } - return args[index]; - } -} - void Inst::SetArg(size_t index, Value value) { if (index >= NumArgs()) { throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index dc9f683fe..ea55fc29c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -73,10 +73,19 @@ public: [[nodiscard]] IR::Type Type() const; /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const; + [[nodiscard]] size_t NumArgs() const { + return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } /// Get the value of a given argument index. - [[nodiscard]] Value Arg(size_t index) const; + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + /// Set the value of a given argument index. void SetArg(size_t index, Value value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 4207d548c..24d024ad7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -2,102 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include #include -#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/opcodes.h" namespace Shader::IR { -namespace { -struct OpcodeMeta { - std::string_view name; - Type type; - std::array arg_types; -}; - -// using enum Type; -constexpr Type Void{Type::Void}; -constexpr Type Opaque{Type::Opaque}; -constexpr Type Label{Type::Label}; -constexpr Type Reg{Type::Reg}; -constexpr Type Pred{Type::Pred}; -constexpr Type Attribute{Type::Attribute}; -constexpr Type Patch{Type::Patch}; -constexpr Type U1{Type::U1}; -constexpr Type U8{Type::U8}; -constexpr Type U16{Type::U16}; -constexpr Type U32{Type::U32}; -constexpr Type U64{Type::U64}; -constexpr Type F16{Type::F16}; -constexpr Type F32{Type::F32}; -constexpr Type F64{Type::F64}; -constexpr Type U32x2{Type::U32x2}; -constexpr Type U32x3{Type::U32x3}; -constexpr Type U32x4{Type::U32x4}; -constexpr Type F16x2{Type::F16x2}; -constexpr Type F16x3{Type::F16x3}; -constexpr Type F16x4{Type::F16x4}; -constexpr Type F32x2{Type::F32x2}; -constexpr Type F32x3{Type::F32x3}; -constexpr Type F32x4{Type::F32x4}; -constexpr Type F64x2{Type::F64x2}; -constexpr Type F64x3{Type::F64x3}; -constexpr Type F64x4{Type::F64x4}; - -constexpr std::array META_TABLE{ -#define OPCODE(name_token, type_token, ...) \ - OpcodeMeta{ \ - .name{#name_token}, \ - .type = type_token, \ - .arg_types{__VA_ARGS__}, \ - }, -#include "opcodes.inc" -#undef OPCODE -}; - -constexpr size_t CalculateNumArgsOf(Opcode op) { - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); -} - -constexpr std::array NUM_ARGS{ -#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), -#include "opcodes.inc" -#undef OPCODE -}; - -void ValidateOpcode(Opcode op) { - const size_t raw{static_cast(op)}; - if (raw >= META_TABLE.size()) { - throw InvalidArgument("Invalid opcode with raw value {}", raw); - } -} -} // Anonymous namespace - -Type TypeOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast(op)].type; -} - -size_t NumArgsOf(Opcode op) { - ValidateOpcode(op); - return NUM_ARGS[static_cast(op)]; -} - -Type ArgTypeOf(Opcode op, size_t arg_index) { - ValidateOpcode(op); - const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - if (arg_index >= arg_types.size() || arg_types[arg_index] == Type::Void) { - throw InvalidArgument("Out of bounds argument"); - } - return arg_types[arg_index]; -} std::string_view NameOf(Opcode op) { - ValidateOpcode(op); - return META_TABLE[static_cast(op)].name; + return Detail::META_TABLE[static_cast(op)].name; } } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 999fb2e77..b5697c7f9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include @@ -18,14 +20,80 @@ enum class Opcode { #undef OPCODE }; +namespace Detail { + +struct OpcodeMeta { + std::string_view name; + Type type; + std::array arg_types; +}; + +// using enum Type; +constexpr Type Void{Type::Void}; +constexpr Type Opaque{Type::Opaque}; +constexpr Type Label{Type::Label}; +constexpr Type Reg{Type::Reg}; +constexpr Type Pred{Type::Pred}; +constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; +constexpr Type U1{Type::U1}; +constexpr Type U8{Type::U8}; +constexpr Type U16{Type::U16}; +constexpr Type U32{Type::U32}; +constexpr Type U64{Type::U64}; +constexpr Type F16{Type::F16}; +constexpr Type F32{Type::F32}; +constexpr Type F64{Type::F64}; +constexpr Type U32x2{Type::U32x2}; +constexpr Type U32x3{Type::U32x3}; +constexpr Type U32x4{Type::U32x4}; +constexpr Type F16x2{Type::F16x2}; +constexpr Type F16x3{Type::F16x3}; +constexpr Type F16x4{Type::F16x4}; +constexpr Type F32x2{Type::F32x2}; +constexpr Type F32x3{Type::F32x3}; +constexpr Type F32x4{Type::F32x4}; +constexpr Type F64x2{Type::F64x2}; +constexpr Type F64x3{Type::F64x3}; +constexpr Type F64x4{Type::F64x4}; + +constexpr std::array META_TABLE{ +#define OPCODE(name_token, type_token, ...) \ + OpcodeMeta{ \ + .name{#name_token}, \ + .type = type_token, \ + .arg_types{__VA_ARGS__}, \ + }, +#include "opcodes.inc" +#undef OPCODE +}; + +constexpr size_t CalculateNumArgsOf(Opcode op) { + const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; + return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); +} + +constexpr std::array NUM_ARGS{ +#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), +#include "opcodes.inc" +#undef OPCODE +}; +} // namespace Detail + /// Get return type of an opcode -[[nodiscard]] Type TypeOf(Opcode op); +[[nodiscard]] inline Type TypeOf(Opcode op) noexcept { + return Detail::META_TABLE[static_cast(op)].type; +} /// Get the number of arguments an opcode accepts -[[nodiscard]] size_t NumArgsOf(Opcode op); +[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { + return Detail::NUM_ARGS[static_cast(op)]; +} /// Get the required type of an argument of an opcode -[[nodiscard]] Type ArgTypeOf(Opcode op, size_t arg_index); +[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept { + return Detail::META_TABLE[static_cast(op)].arg_types[arg_index]; +} /// Get the name of an opcode [[nodiscard]] std::string_view NameOf(Opcode op); -- cgit v1.2.3 From 420982864634a5e52cea42c43f8623f75483fbcc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:27:55 -0300 Subject: shader: Intrusively store register values in block for SSA pass --- src/shader_recompiler/frontend/ir/basic_block.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 6a1d615d9..3a4230755 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -101,6 +101,13 @@ public: return branch_false; } + void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { + ssa_reg_values[RegIndex(reg)] = value; + } + const Value& SsaRegValue(IR::Reg reg) const noexcept { + return ssa_reg_values[RegIndex(reg)]; + } + [[nodiscard]] bool empty() const { return instructions.empty(); } @@ -182,6 +189,9 @@ private: /// Block immediate predecessors std::vector imm_predecessors; + /// Intrusively store the value of a register in the block. + std::array ssa_reg_values; + /// Intrusively stored host definition of this block. u32 definition{}; }; -- cgit v1.2.3 From e4d1122082e74410baac6677c850fea1a0be5c52 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:08 -0300 Subject: shader: Move siblings check to a separate function and comment them out --- .../frontend/maxwell/structured_control_flow.cpp | 37 ++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index e63e25aa6..6021ac891 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -304,6 +304,23 @@ bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { return false; } +bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { + Node it{goto_stmt}; + do { + if (it == label_stmt) { + return true; + } + --it; + } while (it != goto_stmt->up->children.begin()); + while (it != goto_stmt->up->children.end()) { + if (it == label_stmt) { + return true; + } + ++it; + } + return false; +} + class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, @@ -353,22 +370,10 @@ private: } } } - // TODO: Remove this - { - Node it{goto_stmt}; - bool sibling{false}; - do { - sibling |= it == label_stmt; - --it; - } while (it != goto_stmt->up->children.begin()); - while (it != goto_stmt->up->children.end()) { - sibling |= it == label_stmt; - ++it; - } - if (!sibling) { - throw LogicError("Not siblings"); - } - } + // Expensive operation: + // if (!AreSiblings(goto_stmt, label_stmt)) { + // throw LogicError("Goto is not a sibling with the label"); + // } // goto_stmt and label_stmt are guaranteed to be siblings, eliminate if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it -- cgit v1.2.3 From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: shader: Move microinstruction header to the value header --- src/shader_recompiler/frontend/ir/basic_block.h | 1 - .../frontend/ir/breadth_first_search.h | 1 - .../frontend/ir/microinstruction.cpp | 2 +- .../frontend/ir/microinstruction.h | 162 --------------------- src/shader_recompiler/frontend/ir/program.cpp | 2 +- src/shader_recompiler/frontend/ir/value.cpp | 1 - src/shader_recompiler/frontend/ir/value.h | 151 ++++++++++++++++++- .../frontend/maxwell/structured_control_flow.h | 2 +- 8 files changed, 153 insertions(+), 169 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/microinstruction.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3a4230755..ab7ddb3d5 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -13,7 +13,6 @@ #include "common/bit_cast.h" #include "shader_recompiler/frontend/ir/condition.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h index b35f062d4..a52ccbd58 100644 --- a/src/shader_recompiler/frontend/ir/breadth_first_search.h +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h @@ -10,7 +10,6 @@ #include -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 41f9fa0cd..701746a0c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -6,8 +6,8 @@ #include #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { namespace { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h deleted file mode 100644 index ea55fc29c..000000000 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include "common/bit_cast.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/ir/opcodes.h" -#include "shader_recompiler/frontend/ir/type.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::IR { - -class Block; - -struct AssociatedInsts; - -class Inst : public boost::intrusive::list_base_hook<> { -public: - explicit Inst(Opcode op_, u32 flags_) noexcept; - ~Inst(); - - Inst& operator=(const Inst&) = delete; - Inst(const Inst&) = delete; - - Inst& operator=(Inst&&) = delete; - Inst(Inst&&) = delete; - - /// Get the number of uses this instruction has. - [[nodiscard]] int UseCount() const noexcept { - return use_count; - } - - /// Determines whether this instruction has uses or not. - [[nodiscard]] bool HasUses() const noexcept { - return use_count > 0; - } - - /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode GetOpcode() const noexcept { - return op; - } - - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { - return associated_insts != nullptr; - } - - /// Determines whether or not this instruction may have side effects. - [[nodiscard]] bool MayHaveSideEffects() const noexcept; - - /// Determines whether or not this instruction is a pseudo-instruction. - /// Pseudo-instructions depend on their parent instructions for their semantics. - [[nodiscard]] bool IsPseudoInstruction() const noexcept; - - /// Determines if all arguments of this instruction are immediates. - [[nodiscard]] bool AreAllArgsImmediates() const; - - /// Gets a pseudo-operation associated with this instruction - [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); - - /// Get the type this instruction returns. - [[nodiscard]] IR::Type Type() const; - - /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const { - return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); - } - - /// Get the value of a given argument index. - [[nodiscard]] Value Arg(size_t index) const noexcept { - if (op == Opcode::Phi) { - return phi_args[index].second; - } else { - return args[index]; - } - } - - /// Set the value of a given argument index. - void SetArg(size_t index, Value value); - - /// Get a pointer to the block of a phi argument. - [[nodiscard]] Block* PhiBlock(size_t index) const; - /// Add phi operand to a phi instruction. - void AddPhiOperand(Block* predecessor, const Value& value); - - void Invalidate(); - void ClearArgs(); - - void ReplaceUsesWith(Value replacement); - - void ReplaceOpcode(IR::Opcode opcode); - - template - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] FlagsType Flags() const noexcept { - FlagsType ret; - std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); - return ret; - } - - template - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] void SetFlags(FlagsType value) noexcept { - std::memcpy(&flags, &value, sizeof(value)); - } - - /// Intrusively store the host definition of this instruction. - template - void SetDefinition(DefinitionType def) { - definition = Common::BitCast(def); - } - - /// Return the intrusively stored host definition of this instruction. - template - [[nodiscard]] DefinitionType Definition() const noexcept { - return Common::BitCast(definition); - } - -private: - struct NonTriviallyDummy { - NonTriviallyDummy() noexcept {} - }; - - void Use(const Value& value); - void UndoUse(const Value& value); - - IR::Opcode op{}; - int use_count{}; - u32 flags{}; - u32 definition{}; - union { - NonTriviallyDummy dummy{}; - boost::container::small_vector, 2> phi_args; - std::array args; - }; - std::unique_ptr associated_insts; -}; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); - -struct AssociatedInsts { - union { - Inst* in_bounds_inst; - Inst* sparse_inst; - Inst* zero_inst{}; - }; - Inst* sign_inst{}; - Inst* carry_inst{}; - Inst* overflow_inst{}; -}; - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 89a17fb1b..3fc06f855 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -8,8 +8,8 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index bf5f8c0c2..a8a919e0e 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 303745563..d90a68b37 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -4,19 +4,34 @@ #pragma once +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" -#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { class Block; class Inst; +struct AssociatedInsts; + class Value { public: Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} @@ -101,6 +116,140 @@ public: explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { + return op; + } + + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines whether or not this instruction is a pseudo-instruction. + /// Pseudo-instructions depend on their parent instructions for their semantics. + [[nodiscard]] bool IsPseudoInstruction() const noexcept; + + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const; + + /// Gets a pseudo-operation associated with this instruction + [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const { + return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == IR::Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + + void ReplaceOpcode(IR::Opcode opcode); + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); + return ret; + } + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); + } + + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + +private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + u32 flags{}; + u32 definition{}; + union { + NonTriviallyDummy dummy{}; + boost::container::small_vector, 2> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* in_bounds_inst; + Inst* sparse_inst; + Inst* zero_inst{}; + }; + Inst* sign_inst{}; + Inst* carry_inst{}; + Inst* overflow_inst{}; +}; + using U1 = TypedValue; using U8 = TypedValue; using U16 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index e4797291e..a6be12ba2 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,7 +11,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" -- cgit v1.2.3 From c84bbd9e44e34bba0e602c1a6736535aa531445c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 02:42:36 -0300 Subject: shader: Inline common Value functions into the header --- src/shader_recompiler/frontend/ir/value.cpp | 19 ------------------- src/shader_recompiler/frontend/ir/value.h | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index a8a919e0e..c021d3fa9 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,25 +33,6 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} -bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; -} - -bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; -} - -bool Value::IsEmpty() const noexcept { - return type == Type::Void; -} - -bool Value::IsImmediate() const noexcept { - if (IsIdentity()) { - return inst->Arg(0).IsImmediate(); - } - return type != Type::Opaque; -} - bool Value::IsLabel() const noexcept { return type == Type::Label; } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index d90a68b37..5425e42a1 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -264,4 +264,27 @@ using U16U32U64 = TypedValue; using F16F32F64 = TypedValue; using UAny = TypedValue; +inline bool Value::IsIdentity() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; +} + +inline bool Value::IsPhi() const noexcept { + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; +} + +inline bool Value::IsEmpty() const noexcept { + return type == Type::Void; +} + +inline bool Value::IsImmediate() const noexcept { + IR::Type current_type{type}; + const IR::Inst* current_inst{inst}; + while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) { + const Value& arg{current_inst->Arg(0)}; + current_type = arg.type; + current_inst = arg.inst; + } + return current_type != Type::Opaque; +} + } // namespace Shader::IR -- cgit v1.2.3 From f66851e37682ce538172b0945908227ada8d21ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 02:43:44 -0300 Subject: shader: Use memset to reset instruction arguments --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 +++- src/shader_recompiler/frontend/ir/value.h | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 701746a0c..e3350931b 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -279,8 +279,10 @@ void Inst::ClearArgs() { if (!value.IsImmediate()) { UndoUse(value); } - value = {}; } + // Reset arguments to null + // std::memset was measured to be faster on MSVC than std::ranges:fill + std::memset(&args, 0, sizeof(args)); } } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 5425e42a1..7b20824ed 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -34,7 +34,7 @@ struct AssociatedInsts; class Value { public: - Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} + Value() noexcept = default; explicit Value(IR::Inst* value) noexcept; explicit Value(IR::Block* value) noexcept; explicit Value(IR::Reg value) noexcept; @@ -78,9 +78,9 @@ public: private: void ValidateAccess(IR::Type expected) const; - IR::Type type; + IR::Type type{}; union { - IR::Inst* inst; + IR::Inst* inst{}; IR::Block* label; IR::Reg reg; IR::Pred pred; @@ -95,6 +95,7 @@ private: f64 imm_f64; }; }; +static_assert(static_cast(IR::Type::Void) == 0, "memset relies on IR::Type being zero"); static_assert(std::is_trivially_copyable_v); template -- cgit v1.2.3 From cc0fcd1b8d3080ae83709874e1d66f9e4cf3f1be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 03:39:35 -0300 Subject: shader: Improve goto removal algorithm complexity Find sibling node containing a nephew searching from the nephew itself instead of the uncle. --- .../frontend/maxwell/structured_control_flow.cpp | 77 ++++++++-------------- 1 file changed, 28 insertions(+), 49 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 6021ac891..b85b613f3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -222,27 +222,6 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { return ret; } -bool HasNode(const Tree& tree, ConstNode stmt) { - const auto end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == stmt || (HasChildren(it->type) && HasNode(it->children, stmt))) { - return true; - } - } - return false; -} - -Node FindStatementWithLabel(Tree& tree, ConstNode goto_stmt) { - const ConstNode label_stmt{goto_stmt->label}; - const ConstNode end{tree.end()}; - for (auto it = tree.begin(); it != end; ++it) { - if (it == label_stmt || (HasChildren(it->type) && HasNode(it->children, label_stmt))) { - return it; - } - } - throw LogicError("Lift label not in tree"); -} - void SanitizeNoBreaks(const Tree& tree) { if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { throw NotImplementedException("Capturing statement with break nodes"); @@ -288,22 +267,6 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); } -bool SearchNode(const Tree& tree, ConstNode stmt, size_t& offset) { - ++offset; - - const auto end = tree.end(); - for (ConstNode it = tree.begin(); it != end; ++it) { - ++offset; - if (stmt == it) { - return true; - } - if (HasChildren(it->type) && SearchNode(it->children, stmt, offset)) { - return true; - } - } - return false; -} - bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { Node it{goto_stmt}; do { @@ -321,6 +284,30 @@ bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { return false; } +Node SiblingFromNephew(Node uncle, Node nephew) noexcept { + Statement* const parent{uncle->up}; + Statement* it{&*nephew}; + while (it->up != parent) { + it = it->up; + } + return Tree::s_iterator_to(*it); +} + +bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { + const Node end{right_sibling->up->children.end()}; + for (auto it = right_sibling; it != end; ++it) { + if (it == left_sibling) { + return false; + } + } + return true; +} + +bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { + const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; + return AreOrdered(sibling, goto_stmt); +} + class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, @@ -358,7 +345,7 @@ private: --goto_level; } } else { // Level(goto_stmt) < Level(label_stmt) - if (Offset(goto_stmt) > Offset(label_stmt)) { + if (NeedsLift(goto_stmt, label_stmt)) { // Lift goto_stmt to above stmt containing label_stmt using goto-lifting // transformations goto_stmt = Lift(goto_stmt); @@ -378,7 +365,7 @@ private: if (std::next(goto_stmt) == label_stmt) { // Simply eliminate the goto if the label is next to it goto_stmt->up->children.erase(goto_stmt); - } else if (Offset(goto_stmt) < Offset(label_stmt)) { + } else if (AreOrdered(goto_stmt, label_stmt)) { // Eliminate goto_stmt with a conditional EliminateAsConditional(goto_stmt, label_stmt); } else { @@ -523,8 +510,8 @@ private: [[nodiscard]] Node MoveInward(Node goto_stmt) { Statement* const parent{goto_stmt->up}; Tree& body{parent->children}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; const Node label{goto_stmt->label}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; const u32 label_id{label->id}; Statement* const goto_cond{goto_stmt->cond}; @@ -562,7 +549,7 @@ private: Tree& body{parent->children}; const Node label{goto_stmt->label}; const u32 label_id{label->id}; - const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; + const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -627,14 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - size_t Offset(ConstNode stmt) const { - size_t offset{0}; - if (!SearchNode(root_stmt.children, stmt, offset)) { - throw LogicError("Node not found in tree"); - } - return offset; - } - ObjectPool& inst_pool; ObjectPool& block_pool; ObjectPool& pool; -- cgit v1.2.3 From 23182fa59c45a88b706022c1373e307ba4636cca Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 04:58:23 -0300 Subject: shader: Intrusively store in a block if it's sealed or not --- src/shader_recompiler/frontend/ir/basic_block.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index ab7ddb3d5..0b0c97af6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -107,6 +107,13 @@ public: return ssa_reg_values[RegIndex(reg)]; } + void SsaSeal() noexcept { + is_ssa_sealed = true; + } + [[nodiscard]] bool IsSsaSealed() const noexcept { + return is_ssa_sealed; + } + [[nodiscard]] bool empty() const { return instructions.empty(); } @@ -190,6 +197,8 @@ private: /// Intrusively store the value of a register in the block. std::array ssa_reg_values; + /// Intrusively store if the block is sealed in the SSA pass. + bool is_ssa_sealed{false}; /// Intrusively stored host definition of this block. u32 definition{}; -- cgit v1.2.3 From 75dee55486cac936f3a2d68533772d23aa733b72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 04:59:16 -0300 Subject: shader: Inline common Value getters --- src/shader_recompiler/frontend/ir/value.cpp | 107 ---------------------------- src/shader_recompiler/frontend/ir/value.h | 104 ++++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 109 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index c021d3fa9..b962f170d 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -51,107 +51,6 @@ IR::Type Value::Type() const noexcept { return type; } -IR::Inst* Value::Inst() const { - ValidateAccess(Type::Opaque); - return inst; -} - -IR::Block* Value::Label() const { - ValidateAccess(Type::Label); - return label; -} - -IR::Inst* Value::InstRecursive() const { - ValidateAccess(Type::Opaque); - if (IsIdentity()) { - return inst->Arg(0).InstRecursive(); - } - return inst; -} - -IR::Value Value::Resolve() const { - if (IsIdentity()) { - return inst->Arg(0).Resolve(); - } - return *this; -} - -IR::Reg Value::Reg() const { - ValidateAccess(Type::Reg); - return reg; -} - -IR::Pred Value::Pred() const { - ValidateAccess(Type::Pred); - return pred; -} - -IR::Attribute Value::Attribute() const { - ValidateAccess(Type::Attribute); - return attribute; -} - -IR::Patch Value::Patch() const { - ValidateAccess(Type::Patch); - return patch; -} - -bool Value::U1() const { - if (IsIdentity()) { - return inst->Arg(0).U1(); - } - ValidateAccess(Type::U1); - return imm_u1; -} - -u8 Value::U8() const { - if (IsIdentity()) { - return inst->Arg(0).U8(); - } - ValidateAccess(Type::U8); - return imm_u8; -} - -u16 Value::U16() const { - if (IsIdentity()) { - return inst->Arg(0).U16(); - } - ValidateAccess(Type::U16); - return imm_u16; -} - -u32 Value::U32() const { - if (IsIdentity()) { - return inst->Arg(0).U32(); - } - ValidateAccess(Type::U32); - return imm_u32; -} - -f32 Value::F32() const { - if (IsIdentity()) { - return inst->Arg(0).F32(); - } - ValidateAccess(Type::F32); - return imm_f32; -} - -u64 Value::U64() const { - if (IsIdentity()) { - return inst->Arg(0).U64(); - } - ValidateAccess(Type::U64); - return imm_u64; -} - -f64 Value::F64() const { - if (IsIdentity()) { - return inst->Arg(0).F64(); - } - ValidateAccess(Type::F64); - return imm_f64; -} - bool Value::operator==(const Value& other) const { if (type != other.type) { return false; @@ -205,10 +104,4 @@ bool Value::operator!=(const Value& other) const { return !operator==(other); } -void Value::ValidateAccess(IR::Type expected) const { - if (type != expected) { - throw LogicError("Reading {} out of {}", expected, type); - } -} - } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 7b20824ed..bb7d19001 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -14,6 +14,7 @@ #include #include +#include "common/assert.h" #include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" @@ -76,8 +77,6 @@ public: [[nodiscard]] bool operator!=(const Value& other) const; private: - void ValidateAccess(IR::Type expected) const; - IR::Type type{}; union { IR::Inst* inst{}; @@ -288,4 +287,105 @@ inline bool Value::IsImmediate() const noexcept { return current_type != Type::Opaque; } +inline IR::Inst* Value::Inst() const { + DEBUG_ASSERT(type == Type::Opaque); + return inst; +} + +inline IR::Block* Value::Label() const { + DEBUG_ASSERT(type == Type::Label); + return label; +} + +inline IR::Inst* Value::InstRecursive() const { + DEBUG_ASSERT(type == Type::Opaque); + if (IsIdentity()) { + return inst->Arg(0).InstRecursive(); + } + return inst; +} + +inline IR::Value Value::Resolve() const { + if (IsIdentity()) { + return inst->Arg(0).Resolve(); + } + return *this; +} + +inline IR::Reg Value::Reg() const { + DEBUG_ASSERT(type == Type::Reg); + return reg; +} + +inline IR::Pred Value::Pred() const { + DEBUG_ASSERT(type == Type::Pred); + return pred; +} + +inline IR::Attribute Value::Attribute() const { + DEBUG_ASSERT(type == Type::Attribute); + return attribute; +} + +inline IR::Patch Value::Patch() const { + DEBUG_ASSERT(type == Type::Patch); + return patch; +} + +inline bool Value::U1() const { + if (IsIdentity()) { + return inst->Arg(0).U1(); + } + DEBUG_ASSERT(type == Type::U1); + return imm_u1; +} + +inline u8 Value::U8() const { + if (IsIdentity()) { + return inst->Arg(0).U8(); + } + DEBUG_ASSERT(type == Type::U8); + return imm_u8; +} + +inline u16 Value::U16() const { + if (IsIdentity()) { + return inst->Arg(0).U16(); + } + DEBUG_ASSERT(type == Type::U16); + return imm_u16; +} + +inline u32 Value::U32() const { + if (IsIdentity()) { + return inst->Arg(0).U32(); + } + DEBUG_ASSERT(type == Type::U32); + return imm_u32; +} + +inline f32 Value::F32() const { + if (IsIdentity()) { + return inst->Arg(0).F32(); + } + DEBUG_ASSERT(type == Type::F32); + return imm_f32; +} + +inline u64 Value::U64() const { + if (IsIdentity()) { + return inst->Arg(0).U64(); + } + DEBUG_ASSERT(type == Type::U64); + return imm_u64; +} + +inline f64 Value::F64() const { + if (IsIdentity()) { + return inst->Arg(0).F64(); + } + DEBUG_ASSERT(type == Type::F64); + return imm_f64; +} + } // namespace Shader::IR -- cgit v1.2.3 From c8f9772d6590a018665d47a165951864ff783017 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 06:10:04 -0300 Subject: shader: Fix gcc warnings --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 2 +- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index e3350931b..b424d038e 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -282,7 +282,7 @@ void Inst::ClearArgs() { } // Reset arguments to null // std::memset was measured to be faster on MSVC than std::ranges:fill - std::memset(&args, 0, sizeof(args)); + std::memset(reinterpret_cast(&args), 0, sizeof(args)); } } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index b85b613f3..cc5410c6d 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -267,7 +267,7 @@ bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); } -bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { +[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { Node it{goto_stmt}; do { if (it == label_stmt) { -- cgit v1.2.3 From 427951d6fe8a0914434b0fcf897eef67749cba9d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Apr 2021 20:59:51 -0400 Subject: shader: add missing include guard in half_floating_point_helper.h --- .../frontend/maxwell/translate/impl/half_floating_point_helper.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h index 24063b2ab..59da56a7e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" -- cgit v1.2.3 From 7a9dc7839876fe5b24d1c841f182e01108ba676e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 21 Apr 2021 21:34:51 -0400 Subject: shader: Refactor atomic_operations_global_memory --- .../impl/atomic_operations_global_memory.cpp | 80 ++++++++++------------ 1 file changed, 36 insertions(+), 44 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 7a32c5eb3..66f39e44e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -154,69 +154,61 @@ void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result break; } } + +IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, + AtomSize size, AtomOp op) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); + case AtomSize::U64: + case AtomSize::S64: + return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); + case AtomSize::F32: + return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); + case AtomSize::F16x2: { + return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); + } + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, + const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { + IR::Value result; + if (AtomOpNotApplicable(size, op)) { + result = LoadGlobal(v.ir, offset, size); + } else { + result = ApplyAtomOp(v, operand_reg, offset, size, op); + } + if (write_dest) { + StoreResult(v, dest_reg, result, size); + } +} } // Anonymous namespace void TranslatorVisitor::ATOM(u64 insn) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> addr_reg; - BitField<20, 8, IR::Reg> src_reg_b; + BitField<20, 8, IR::Reg> operand_reg; BitField<49, 3, AtomSize> size; BitField<52, 4, AtomOp> op; } const atom{insn}; - - const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; - const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; - const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; const IR::U64 offset{AtomOffset(*this, insn)}; - IR::Value result; - - if (AtomOpNotApplicable(atom.size, atom.op)) { - result = LoadGlobal(ir, offset, atom.size); - } else if (!is_integer) { - if (atom.size == AtomSize::F32) { - result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); - } else { - const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; - result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); - } - } else if (size_64) { - result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); - } else { - result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); - } - StoreResult(*this, atom.dest_reg, result, atom.size); + GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); } void TranslatorVisitor::RED(u64 insn) { union { u64 raw; - BitField<0, 8, IR::Reg> src_reg_b; - BitField<8, 8, IR::Reg> addr_reg; + BitField<0, 8, IR::Reg> operand_reg; BitField<20, 3, AtomSize> size; BitField<23, 3, AtomOp> op; } const red{insn}; - - if (AtomOpNotApplicable(red.size, red.op)) { - return; - } - const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; - const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; - const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; const IR::U64 offset{AtomOffset(*this, insn)}; - if (!is_integer) { - if (red.size == AtomSize::F32) { - ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); - } else { - const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; - ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); - } - } else if (size_64) { - ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); - } else { - ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); - } + GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); } } // namespace Shader::Maxwell -- cgit v1.2.3 From d10cf55353175b13bed4cf18791e080ecb7fd95b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:17:59 -0300 Subject: shader: Implement indexed textures --- src/shader_recompiler/frontend/ir/modifiers.h | 17 +++++++++-------- src/shader_recompiler/frontend/ir/opcodes.inc | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 5d7efa14c..77cda1f8a 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -34,14 +34,15 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; - BitField<0, 8, TextureType> type; - BitField<8, 1, u32> is_depth; - BitField<9, 1, u32> has_bias; - BitField<10, 1, u32> has_lod_clamp; - BitField<11, 1, u32> relaxed_precision; - BitField<12, 2, u32> gather_component; - BitField<14, 2, u32> num_derivates; - BitField<16, 3, ImageFormat> image_format; + BitField<0, 16, u32> descriptor_index; + BitField<16, 3, TextureType> type; + BitField<19, 1, u32> is_depth; + BitField<20, 1, u32> has_bias; + BitField<21, 1, u32> has_lod_clamp; + BitField<22, 1, u32> relaxed_precision; + BitField<23, 2, u32> gather_component; + BitField<25, 2, u32> num_derivates; + BitField<27, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b6869d4e4..8f32c9e74 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -482,18 +482,18 @@ OPCODE(BoundImageGradient, F32x4, U32, OPCODE(BoundImageRead, U32x4, U32, Opaque, ) OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) -OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) -OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) -OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageRead, U32x4, U32, Opaque, ) -OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) // Warp operations OPCODE(LaneId, U32, ) -- cgit v1.2.3 From 21e3382830e4a2b294ea8b84ffad531e752fc512 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:18:54 -0300 Subject: shader: Simplify code in opcodes.h to fix Intellisense Avoid using std::array to fix Intellisense not properly compiling this code and disabling itself on all files that include it. While we are at it, change the code to use u8 instead of size_t for the number of instructions in an opcode. --- src/shader_recompiler/frontend/ir/opcodes.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index b5697c7f9..2b9c0ed8c 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include #include @@ -21,7 +21,6 @@ enum class Opcode { }; namespace Detail { - struct OpcodeMeta { std::string_view name; Type type; @@ -57,9 +56,9 @@ constexpr Type F64x2{Type::F64x2}; constexpr Type F64x3{Type::F64x3}; constexpr Type F64x4{Type::F64x4}; -constexpr std::array META_TABLE{ +constexpr OpcodeMeta META_TABLE[]{ #define OPCODE(name_token, type_token, ...) \ - OpcodeMeta{ \ + { \ .name{#name_token}, \ .type = type_token, \ .arg_types{__VA_ARGS__}, \ @@ -67,14 +66,13 @@ constexpr std::array META_TABLE{ #include "opcodes.inc" #undef OPCODE }; - constexpr size_t CalculateNumArgsOf(Opcode op) { const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); } -constexpr std::array NUM_ARGS{ -#define OPCODE(name_token, type_token, ...) CalculateNumArgsOf(Opcode::name_token), +constexpr u8 NUM_ARGS[]{ +#define OPCODE(name_token, type_token, ...) static_cast(CalculateNumArgsOf(Opcode::name_token)), #include "opcodes.inc" #undef OPCODE }; @@ -87,7 +85,7 @@ constexpr std::array NUM_ARGS{ /// Get the number of arguments an opcode accepts [[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { - return Detail::NUM_ARGS[static_cast(op)]; + return static_cast(Detail::NUM_ARGS[static_cast(op)]); } /// Get the required type of an argument of an opcode -- cgit v1.2.3 From aece958c2ba5d4fe37246a6a7502d182931a7483 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:50:13 -0300 Subject: shader: Add missing UndoUse case for GetSparseFromOp --- src/shader_recompiler/frontend/ir/microinstruction.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index b424d038e..5c1b02d53 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -364,6 +364,10 @@ void Inst::UndoUse(const Value& value) { AllocAssociatedInsts(assoc_inst); RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp); + break; case Opcode::GetInBoundsFromOp: AllocAssociatedInsts(assoc_inst); RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); -- cgit v1.2.3 From fe25f42403493dc4b5e801f78d7f4ec5372aa538 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 18:35:15 -0300 Subject: shader: Remove identity removal pass for better build times --- src/shader_recompiler/frontend/maxwell/program.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 14180dcd9..aee96eae3 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -143,7 +143,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Fri, 23 Apr 2021 07:09:02 -0300 Subject: shader: Fix VMNMX selector B --- .../frontend/maxwell/translate/impl/video_set_predicate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index ec5e74f6d..1b66abc33 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,13 +69,14 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; + const u32 b_selector{static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; const bool src_a_signed{vsetp.src_a_sign != 0}; const bool src_b_signed{vsetp.src_b_sign != 0}; const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; - const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, a_selector, src_b_signed)}; + const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; // Compare operation's sign is only dependent on operand b's sign const bool compare_signed{src_b_signed}; -- cgit v1.2.3 From fb14820c86f082f970183c2722c5c38bcbb5a2ab Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 27 Apr 2021 21:05:41 -0400 Subject: shader: Fix IADD3.CC --- .../maxwell/translate/impl/integer_add_three_input.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 15da90365..259a6e6ac 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -73,21 +73,13 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { op_c = v.ir.INeg(op_c); } - IR::U32 lhs{v.ir.IAdd(op_a, op_b)}; - IR::U1 of_1; - if (iadd3.cc != 0) { - of_1 = v.ir.GetOverflowFromOp(lhs); - } + IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; - lhs = v.ir.IAdd(lhs, carry); - } - if (iadd3.cc != 0 && iadd3.shift == Shift::Left) { - const IR::U32 high_bits{v.ir.ShiftRightLogical(lhs, v.ir.Imm32(16))}; - of_1 = v.ir.LogicalOr(of_1, v.ir.INotEqual(v.ir.Imm32(0), high_bits)); + lhs_1 = v.ir.IAdd(lhs_1, carry); } - lhs = IntegerShift(v.ir, lhs, iadd3.shift); - const IR::U32 result{v.ir.IAdd(lhs, op_c)}; + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, iadd3.shift)}; + const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; v.X(iadd3.dest_reg, result); if (iadd3.cc != 0) { @@ -98,6 +90,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); v.SetCFlag(v.ir.GetCarryFromOp(result)); + const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); } } -- cgit v1.2.3 From da936d6ad8cef5418b7644754ee4bcbf7f6125f8 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 18 Apr 2021 19:10:55 +0200 Subject: shader: Implement delegation of Exit to dispatcher on CFG --- .../frontend/maxwell/control_flow.cpp | 41 ++++++++++++++++++++-- .../frontend/maxwell/control_flow.h | 9 ++++- 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 9811183f1..298faa03e 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -185,8 +185,20 @@ Function::Function(ObjectPool& block_pool, Location start_address) label.block->branch_false = nullptr; } -CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) - : env{env_}, block_pool{block_pool_}, program_start{start_address} { +CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address, + bool exits_to_dispatcher_) + : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ + exits_to_dispatcher_} { + if (exits_to_dispatcher) { + dispatch_block = block_pool.Create(Block{}); + dispatch_block->begin = {}; + dispatch_block->end = {}; + dispatch_block->end_class = EndClass::Exit; + dispatch_block->cond = IR::Condition(true); + dispatch_block->stack = {}; + dispatch_block->branch_true = nullptr; + dispatch_block->branch_false = nullptr; + } functions.emplace_back(block_pool, start_address); for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { while (!functions[function_id].labels.empty()) { @@ -196,6 +208,12 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre AnalyzeLabel(function_id, label); } } + if (exits_to_dispatcher) { + const auto it = functions[0].blocks.rbegin(); + dispatch_block->begin = it->end + 1; + dispatch_block->end = it->end + 1; + functions[0].blocks.insert(*dispatch_block); + } } void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { @@ -462,11 +480,22 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati // EXIT will never be taken return AnalysisState::Continue; } + if (exits_to_dispatcher && function_id != 0) { + throw NotImplementedException("Dispatch EXIT on external function."); + } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; + if (exits_to_dispatcher) { + block->end = pc; + block->branch_true = dispatch_block; + block->end_class = EndClass::Branch; + block->cond = cond; + block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); + return AnalysisState::Branch; + } AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } @@ -477,6 +506,14 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } + if (exits_to_dispatcher) { + block->cond = IR::Condition{true}; + block->end = pc; + block->end_class = EndClass::Branch; + block->branch_true = dispatch_block; + block->branch_false = nullptr; + return AnalysisState::Branch; + } block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 89966b16a..0e515c3b6 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -111,7 +111,8 @@ class CFG { }; public: - explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address); + explicit CFG(Environment& env, ObjectPool& block_pool, Location start_address, + bool exits_to_dispatcher = false); CFG& operator=(const CFG&) = delete; CFG(const CFG&) = delete; @@ -128,6 +129,10 @@ public: return std::span(functions.data(), functions.size()); } + [[nodiscard]] bool ExitsToDispatcher() const { + return exits_to_dispatcher; + } + private: void AnalyzeLabel(FunctionId function_id, Label& label); @@ -158,6 +163,8 @@ private: boost::container::small_vector functions; FunctionId current_function_id{0}; Location program_start; + bool exits_to_dispatcher{}; + Block* dispatch_block{}; }; } // namespace Shader::Maxwell::Flow -- cgit v1.2.3 From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: shader: Implement VertexA stage --- .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + src/shader_recompiler/frontend/maxwell/program.cpp | 28 ++++++++++++++++++++++ src/shader_recompiler/frontend/maxwell/program.h | 2 ++ 4 files changed, 32 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5c1b02d53..dba902186 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Join: case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8f32c9e74..b14719c51 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Join, Void, ) OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aee96eae3..59897cb3e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg); +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); } // namespace Shader::Maxwell -- cgit v1.2.3 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- .../frontend/maxwell/control_flow.cpp | 10 +++--- src/shader_recompiler/frontend/maxwell/program.cpp | 37 +++++++++++----------- src/shader_recompiler/frontend/maxwell/program.h | 1 + 3 files changed, 24 insertions(+), 24 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 298faa03e..e7abea82f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -209,9 +209,9 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre } } if (exits_to_dispatcher) { - const auto it = functions[0].blocks.rbegin(); - dispatch_block->begin = it->end + 1; - dispatch_block->end = it->end + 1; + const auto last_block{functions[0].blocks.rbegin()}; + dispatch_block->begin = last_block->end + 1; + dispatch_block->end = last_block->end + 1; functions[0].blocks.insert(*dispatch_block); } } @@ -481,7 +481,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } if (exits_to_dispatcher && function_id != 0) { - throw NotImplementedException("Dispatch EXIT on external function."); + throw NotImplementedException("Dispatch EXIT on external function"); } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { @@ -490,9 +490,9 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; if (exits_to_dispatcher) { block->end = pc; - block->branch_true = dispatch_block; block->end_class = EndClass::Branch; block->cond = cond; + block->branch_true = dispatch_block; block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); return AnalysisState::Branch; } diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 59897cb3e..a4fa4319d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -151,31 +151,30 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 7 +++++++ 3 files changed, 12 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a..5913fdeff 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + U32 IREmitter::LocalInvocationIdX() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d..a12919283 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); + [[nodiscard]] Value LocalInvocationId(); [[nodiscard]] U32 LocalInvocationIdX(); [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: -- cgit v1.2.3 From 7ecc6de56ae01602b25408db8b6658d7a41a419a Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 23 Apr 2021 17:47:54 -0400 Subject: shader: Implement Int32 SUATOM/SURED --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 89 +++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 26 +++ .../frontend/ir/microinstruction.cpp | 33 ++++ src/shader_recompiler/frontend/ir/opcodes.inc | 38 ++++ src/shader_recompiler/frontend/maxwell/maxwell.inc | 3 +- .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 - .../translate/impl/surface_atomic_operations.cpp | 204 +++++++++++++++++++++ 8 files changed, 393 insertions(+), 5 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5913fdeff..354d72c9b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1869,6 +1869,95 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value Inst(op, Flags{info}, handle, coords, color); } +Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32 + : Opcode::BindlessImageAtomicIAdd32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32 + : Opcode::BindlessImageAtomicSMin32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32 + : Opcode::BindlessImageAtomicUMin32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value, + bool is_signed, TextureInstInfo info) { + return is_signed ? ImageAtomicSMin(handle, coords, value, info) + : ImageAtomicUMin(handle, coords, value, info); +} + +Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32 + : Opcode::BindlessImageAtomicSMax32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32 + : Opcode::BindlessImageAtomicUMax32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, + bool is_signed, TextureInstInfo info) { + return is_signed ? ImageAtomicSMax(handle, coords, value, info) + : ImageAtomicUMax(handle, coords, value, info); +} + +Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32 + : Opcode::BindlessImageAtomicInc32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32 + : Opcode::BindlessImageAtomicDec32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32 + : Opcode::BindlessImageAtomicAnd32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32 + : Opcode::BindlessImageAtomicOr32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32 + : Opcode::BindlessImageAtomicXor32}; + return Inst(op, Flags{info}, handle, coords, value); +} + +Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32 + : Opcode::BindlessImageAtomicExchange32}; + return Inst(op, Flags{info}, handle, coords, value); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index a12919283..4e614d424 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -334,6 +334,32 @@ public: [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords, + const Value& value, bool is_signed, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, + const Value& value, bool is_signed, TextureInstInfo info); + [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, + TextureInstInfo info); + [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, + const Value& value, TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); [[nodiscard]] U1 VoteEqual(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index dba902186..616ef17d4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -166,6 +166,39 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BindlessImageWrite: case Opcode::BoundImageWrite: case Opcode::ImageWrite: + case IR::Opcode::BindlessImageAtomicIAdd32: + case IR::Opcode::BindlessImageAtomicSMin32: + case IR::Opcode::BindlessImageAtomicUMin32: + case IR::Opcode::BindlessImageAtomicSMax32: + case IR::Opcode::BindlessImageAtomicUMax32: + case IR::Opcode::BindlessImageAtomicInc32: + case IR::Opcode::BindlessImageAtomicDec32: + case IR::Opcode::BindlessImageAtomicAnd32: + case IR::Opcode::BindlessImageAtomicOr32: + case IR::Opcode::BindlessImageAtomicXor32: + case IR::Opcode::BindlessImageAtomicExchange32: + case IR::Opcode::BoundImageAtomicIAdd32: + case IR::Opcode::BoundImageAtomicSMin32: + case IR::Opcode::BoundImageAtomicUMin32: + case IR::Opcode::BoundImageAtomicSMax32: + case IR::Opcode::BoundImageAtomicUMax32: + case IR::Opcode::BoundImageAtomicInc32: + case IR::Opcode::BoundImageAtomicDec32: + case IR::Opcode::BoundImageAtomicAnd32: + case IR::Opcode::BoundImageAtomicOr32: + case IR::Opcode::BoundImageAtomicXor32: + case IR::Opcode::BoundImageAtomicExchange32: + case IR::Opcode::ImageAtomicIAdd32: + case IR::Opcode::ImageAtomicSMin32: + case IR::Opcode::ImageAtomicUMin32: + case IR::Opcode::ImageAtomicSMax32: + case IR::Opcode::ImageAtomicUMax32: + case IR::Opcode::ImageAtomicInc32: + case IR::Opcode::ImageAtomicDec32: + case IR::Opcode::ImageAtomicAnd32: + case IR::Opcode::ImageAtomicOr32: + case IR::Opcode::ImageAtomicXor32: + case IR::Opcode::ImageAtomicExchange32: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b14719c51..9165421f8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -496,6 +496,44 @@ OPCODE(ImageGradient, F32x4, Opaq OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +// Atomic Image operations + +OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, ) +OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, ) + +OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, ) +OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, ) + +OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) + // Warp operations OPCODE(LaneId, U32, ) OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index c759bd4d4..2fee591bb 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -244,7 +244,8 @@ INST(STG, "STG", "1110 1110 1101 1---") INST(STL, "STL", "1110 1111 0101 0---") INST(STP, "STP", "1110 1110 1010 0---") INST(STS, "STS", "1110 1111 0101 1---") -INST(SUATOM_cas, "SUATOM", "1110 1010 ---- ----") +INST(SUATOM, "SUATOM", "1110 1010 0--- ----") +INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index bf7d1bae8..335e4f24f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -303,6 +303,7 @@ public: void STL(u64 insn); void STP(u64 insn); void STS(u64 insn); + void SUATOM(u64 insn); void SUATOM_cas(u64 insn); void SULD(u64 insn); void SURED(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4f99bbbe..7e26ab359 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -249,10 +249,6 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SURED(u64) { - ThrowNotImplemented(Opcode::SURED); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..994bdc3eb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -0,0 +1,204 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +enum class Size : u64 { + U32, + S32, + U64, + S64, + F32FTZRN, + F16x2FTZRN, + SD32, + SD64, +}; + +enum class AtomicOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + default: + break; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, + const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, + bool is_signed) { + switch (op) { + case AtomicOp::ADD: + return ir.ImageAtomicIAdd(handle, coords, op_b, info); + case AtomicOp::MIN: + return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); + case AtomicOp::MAX: + return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); + case AtomicOp::INC: + return ir.ImageAtomicInc(handle, coords, op_b, info); + case AtomicOp::DEC: + return ir.ImageAtomicDec(handle, coords, op_b, info); + case AtomicOp::AND: + return ir.ImageAtomicAnd(handle, coords, op_b, info); + case AtomicOp::OR: + return ir.ImageAtomicOr(handle, coords, op_b, info); + case AtomicOp::XOR: + return ir.ImageAtomicXor(handle, coords, op_b, info); + case AtomicOp::EXCH: + return ir.ImageAtomicExchange(handle, coords, op_b, info); + default: + throw NotImplementedException("Atomic Operation {}", op); + } +} + +ImageFormat Format(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return ImageFormat::R32_UINT; + default: + break; + } + throw NotImplementedException("Invalid size {}", size); +} + +bool IsSizeInt32(Size size) { + switch (size) { + case Size::U32: + case Size::S32: + case Size::SD32: + return true; + default: + return false; + } +} + +void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, + IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, + u64 bound_offset, bool is_bindless, bool write_result) { + if (clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", clamp); + } + if (!IsSizeInt32(size)) { + throw NotImplementedException("Size {}", size); + } + const bool is_signed{size == Size::S32}; + const ImageFormat format{Format(size)}; + const TextureType tex_type{GetType(type)}; + const IR::Value coords{MakeCoords(v, coord_reg, type)}; + + const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) + : v.ir.Imm32(static_cast(bound_offset * 4))}; + IR::TextureInstInfo info{}; + info.type.Assign(tex_type); + info.image_format.Assign(format); + + // TODO: float/64-bit operand + const IR::Value op_b{v.X(operand_reg)}; + const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; + + if (write_result) { + v.X(dest_reg, IR::U32{color}); + } +} +} // Anonymous namespace + +void TranslatorVisitor::SUATOM(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> is_bindless; + BitField<29, 4, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<51, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> operand_reg; + BitField<36, 13, u64> bound_offset; // !is_bindless + BitField<39, 8, IR::Reg> bindless_reg; // is_bindless + } const suatom{insn}; + + ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, + suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, + suatom.is_bindless != 0, true); +} + +void TranslatorVisitor::SURED(u64 insn) { + // TODO: confirm offsets + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<21, 3, AtomicOp> op; + BitField<33, 3, Type> type; + BitField<20, 3, Size> size; + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> operand_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sured{insn}; + ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, + sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, + sured.is_bound == 0, false); +} + +} // namespace Shader::Maxwell -- cgit v1.2.3 From 153a77efee629ccdc342e3f3f2fd49488e884233 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 17:16:54 +0200 Subject: shader: Stub SR_AFFINITY --- .../frontend/maxwell/translate/impl/move_special_register.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 01fb6f5e5..fe3cdfa96 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -159,6 +159,9 @@ enum class SpecialRegister : u64 { return ir.SubgroupGeMask(); case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); + case SpecialRegister::SR_AFFINITY: + // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); } -- cgit v1.2.3 From ee61ec2c39e6db53c56e7ac761a2223d99f06908 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 2 May 2021 01:50:27 +0200 Subject: shader: Optimize NVN Fallthrough --- src/shader_recompiler/frontend/maxwell/program.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a4fa4319d..0d3f00699 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) { }()}; auto& descs{program.info.storage_buffers_descriptors}; for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } const u32 offset{base + index * descriptor_size}; const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; continue; } - // Assume these are written for now descs.push_back({ .cbuf_index = driver_cbuf, .cbuf_offset = offset, .count = 1, - .is_written = true, + .is_written = program.info.stores_global_memory, }); } } -- cgit v1.2.3 From 09dc23f97188a4fa5ea03556a9187bfbefef1d78 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 5 May 2021 00:37:05 -0400 Subject: shader: ISET.X implementation --- .../translate/impl/integer_compare_and_set.cpp | 66 +++++++++++++++++++--- 1 file changed, 58 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index a2cd8d7c6..34fa7345c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,7 +9,56 @@ namespace Shader::Maxwell { namespace { -void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + +void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; @@ -24,27 +73,28 @@ void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { BitField<49, 3, CompareOp> compare_op; } const iset{insn}; - if (iset.x != 0) { - throw NotImplementedException("ISET.X"); - } - - const IR::U32 src_reg{v.X(iset.src_reg)}; + const IR::U32 src_a{v.X(iset.src_reg)}; const bool is_signed{iset.is_signed != 0}; + const IR::U32 zero{v.ir.Imm32(0)}; + const bool x{iset.x != 0}; + const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; + IR::U1 pred{v.ir.GetPred(iset.pred)}; if (iset.neg_pred != 0) { pred = v.ir.LogicalNot(pred); } - const IR::U1 cmp_result{IntegerCompare(v.ir, src_reg, src_a, iset.compare_op, is_signed)}; const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; const IR::U32 one_mask{v.ir.Imm32(-1)}; const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; - const IR::U32 zero{v.ir.Imm32(0)}; const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; v.X(iset.dest_reg, result); if (iset.cc != 0) { + if (x) { + throw NotImplementedException("ISET.CC + X"); + } const IR::U1 is_zero{v.ir.IEqual(result, zero)}; v.SetZFlag(is_zero); if (iset.bf != 0) { -- cgit v1.2.3 From b10cf64c486d8730fcfeb53a333814915b3b5fbe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 5 May 2021 02:19:08 -0300 Subject: glasm: Add GLASM backend infrastructure --- src/shader_recompiler/frontend/ir/value.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index bb7d19001..c73851d11 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -218,6 +218,12 @@ public: return Common::BitCast(definition); } + /// Destructively remove one reference count from the instruction + /// Useful for register allocation + void DestructiveRemoveUsage() { + --use_count; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} -- cgit v1.2.3 From 6fd190d1ae4275a06ed2e488401e1d63912954be Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:28:52 -0300 Subject: glasm: Implement basic GLASM instructions --- src/shader_recompiler/frontend/ir/value.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index c73851d11..beaf149f3 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -224,6 +224,12 @@ public: --use_count; } + /// Destructively add usages to the instruction + /// Useful for register allocation + void DestructiveAddUsage(int count) { + use_count += count; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} -- cgit v1.2.3 From 3b6a632237e2f8388f2591d54fb31bebdc2a0ade Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 10 May 2021 03:55:09 -0300 Subject: shader: Add floating-point rounding to I2F --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 60 +++++++++++----------- src/shader_recompiler/frontend/ir/ir_emitter.h | 10 ++-- .../impl/integer_floating_point_conversion.cpp | 7 ++- 3 files changed, 42 insertions(+), 35 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 354d72c9b..ce6c9af07 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1622,84 +1622,86 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); } -F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control) { switch (dest_bitsize) { case 16: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF16S8, value); + return Inst(Opcode::ConvertF16S8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF16S16, value); + return Inst(Opcode::ConvertF16S16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF16S32, value); + return Inst(Opcode::ConvertF16S32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF16S64, value); + return Inst(Opcode::ConvertF16S64, Flags{control}, value); } break; case 32: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF32S8, value); + return Inst(Opcode::ConvertF32S8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF32S16, value); + return Inst(Opcode::ConvertF32S16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF32S32, value); + return Inst(Opcode::ConvertF32S32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF32S64, value); + return Inst(Opcode::ConvertF32S64, Flags{control}, value); } break; case 64: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF64S8, value); + return Inst(Opcode::ConvertF64S8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF64S16, value); + return Inst(Opcode::ConvertF64S16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF64S32, value); + return Inst(Opcode::ConvertF64S32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF64S64, value); + return Inst(Opcode::ConvertF64S64, Flags{control}, value); } break; } throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control) { switch (dest_bitsize) { case 16: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF16U8, value); + return Inst(Opcode::ConvertF16U8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF16U16, value); + return Inst(Opcode::ConvertF16U16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF16U32, value); + return Inst(Opcode::ConvertF16U32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF16U64, value); + return Inst(Opcode::ConvertF16U64, Flags{control}, value); } break; case 32: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF32U8, value); + return Inst(Opcode::ConvertF32U8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF32U16, value); + return Inst(Opcode::ConvertF32U16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF32U32, value); + return Inst(Opcode::ConvertF32U32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF32U64, value); + return Inst(Opcode::ConvertF32U64, Flags{control}, value); } break; case 64: switch (src_bitsize) { case 8: - return Inst(Opcode::ConvertF64U8, value); + return Inst(Opcode::ConvertF64U8, Flags{control}, value); case 16: - return Inst(Opcode::ConvertF64U16, value); + return Inst(Opcode::ConvertF64U16, Flags{control}, value); case 32: - return Inst(Opcode::ConvertF64U32, value); + return Inst(Opcode::ConvertF64U32, Flags{control}, value); case 64: - return Inst(Opcode::ConvertF64U64, value); + return Inst(Opcode::ConvertF64U64, Flags{control}, value); } break; } @@ -1707,9 +1709,9 @@ F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const } F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, - const Value& value) { - return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value) - : ConvertUToF(dest_bitsize, src_bitsize, value); + const Value& value, FpControl control) { + return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control) + : ConvertUToF(dest_bitsize, src_bitsize, value, control); } U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4e614d424..fd41b7e89 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -291,12 +291,12 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, - const Value& value); - [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, - const Value& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control = {}); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, + FpControl control = {}); [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, - const Value& value); + const Value& value, FpControl control = {}); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index 3c233597f..d6224d5cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -99,7 +99,12 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; const int dst_bitsize{BitSize(i2f.float_format)}; - IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + const IR::FpControl fp_control{ + .no_contraction = false, + .rounding = CastFpRounding(i2f.fp_rounding), + .fmz_mode = IR::FmzMode::DontCare, + }; + auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive -- cgit v1.2.3 From d54d7de40e7295827b0e4e4026441b53d3fc9569 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:40:54 -0300 Subject: glasm: Rework control flow introducing a syntax list This commit regresses VertexA shaders, their transformation pass has to be adapted to the new control flow. --- .../frontend/ir/abstract_syntax_list.h | 56 +++++ src/shader_recompiler/frontend/ir/basic_block.cpp | 56 ++--- src/shader_recompiler/frontend/ir/basic_block.h | 51 +---- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 60 ++---- src/shader_recompiler/frontend/ir/ir_emitter.h | 11 +- .../frontend/ir/microinstruction.cpp | 11 +- src/shader_recompiler/frontend/ir/opcodes.h | 1 - src/shader_recompiler/frontend/ir/opcodes.inc | 21 +- src/shader_recompiler/frontend/ir/post_order.cpp | 36 ++-- src/shader_recompiler/frontend/ir/post_order.h | 3 +- src/shader_recompiler/frontend/ir/program.h | 4 +- src/shader_recompiler/frontend/ir/type.h | 49 +++-- src/shader_recompiler/frontend/ir/value.cpp | 8 - src/shader_recompiler/frontend/ir/value.h | 9 - src/shader_recompiler/frontend/maxwell/program.cpp | 24 ++- .../frontend/maxwell/structured_control_flow.cpp | 235 ++++++++++++--------- .../frontend/maxwell/structured_control_flow.h | 12 +- .../frontend/maxwell/translate/translate.cpp | 7 +- .../frontend/maxwell/translate/translate.h | 2 +- 19 files changed, 312 insertions(+), 344 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/abstract_syntax_list.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h new file mode 100644 index 000000000..1366414c2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -0,0 +1,56 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::IR { + +class Block; + +struct AbstractSyntaxNode { + enum class Type { + Block, + If, + EndIf, + Loop, + Repeat, + Break, + Return, + Unreachable, + }; + Type type{}; + union { + Block* block{}; + struct { + U1 cond; + Block* body; + Block* merge; + } if_node; + struct { + Block* merge; + } end_if; + struct { + Block* body; + Block* continue_block; + Block* merge; + } loop; + struct { + U1 cond; + Block* loop_header; + Block* merge; + } repeat; + struct { + U1 cond; + Block* merge; + Block* skip; + } break_node; + }; +}; +using AbstractSyntaxList = std::vector; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index f92fc2571..7c08b25ce 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -14,10 +14,7 @@ namespace Shader::IR { -Block::Block(ObjectPool& inst_pool_, u32 begin, u32 end) - : inst_pool{&inst_pool_}, location_begin{begin}, location_end{end} {} - -Block::Block(ObjectPool& inst_pool_) : Block{inst_pool_, 0, 0} {} +Block::Block(ObjectPool& inst_pool_) : inst_pool{&inst_pool_} {} Block::~Block() = default; @@ -40,39 +37,15 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, return result_it; } -void Block::SetBranches(Condition cond, Block* branch_true_, Block* branch_false_) { - branch_cond = cond; - branch_true = branch_true_; - branch_false = branch_false_; -} - -void Block::SetBranch(Block* branch) { - branch_cond = Condition{true}; - branch_true = branch; -} - -void Block::SetReturn() { - branch_cond = Condition{true}; - branch_true = nullptr; - branch_false = nullptr; -} - -bool Block::IsVirtual() const noexcept { - return location_begin == location_end; -} - -u32 Block::LocationBegin() const noexcept { - return location_begin; -} - -u32 Block::LocationEnd() const noexcept { - return location_end; -} - -void Block::AddImmediatePredecessor(Block* block) { - if (std::ranges::find(imm_predecessors, block) == imm_predecessors.end()) { - imm_predecessors.push_back(block); +void Block::AddBranch(Block* block) { + if (std::ranges::find(imm_successors, block) != imm_successors.end()) { + throw LogicError("Successor already inserted"); + } + if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { + throw LogicError("Predecessor already inserted"); } + imm_successors.push_back(block); + block->imm_predecessors.push_back(this); } static std::string BlockToIndex(const std::map& block_to_index, @@ -92,15 +65,11 @@ static size_t InstIndex(std::map& inst_to_index, size_t& in return it->second; } -static std::string ArgToIndex(const std::map& block_to_index, - std::map& inst_to_index, size_t& inst_index, +static std::string ArgToIndex(std::map& inst_to_index, size_t& inst_index, const Value& arg) { if (arg.IsEmpty()) { return ""; } - if (arg.IsLabel()) { - return BlockToIndex(block_to_index, arg.Label()); - } if (!arg.IsImmediate() || arg.IsIdentity()) { return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); } @@ -140,8 +109,7 @@ std::string DumpBlock(const Block& block, const std::map& if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { ret += fmt::format(" ${}", it->second); } - ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); - + ret += '\n'; for (const Inst& inst : block) { const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); @@ -153,7 +121,7 @@ std::string DumpBlock(const Block& block, const std::map& const size_t arg_count{inst.NumArgs()}; for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { const Value arg{inst.Arg(arg_index)}; - const std::string arg_str{ArgToIndex(block_to_index, inst_to_index, inst_index, arg)}; + const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; ret += arg_index != 0 ? ", " : " "; if (op == Opcode::Phi) { ret += fmt::format("[ {}, {} ]", arg_str, diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 0b0c97af6..7e134b4c7 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -12,6 +12,7 @@ #include #include "common/bit_cast.h" +#include "common/common_types.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" @@ -27,7 +28,6 @@ public: using reverse_iterator = InstructionList::reverse_iterator; using const_reverse_iterator = InstructionList::const_reverse_iterator; - explicit Block(ObjectPool& inst_pool_, u32 begin, u32 end); explicit Block(ObjectPool& inst_pool_); ~Block(); @@ -44,22 +44,8 @@ public: iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u32 flags = 0); - /// Set the branches to jump to when all instructions have executed. - void SetBranches(Condition cond, Block* branch_true, Block* branch_false); - /// Set the branch to unconditionally jump to when all instructions have executed. - void SetBranch(Block* branch); - /// Mark the block as a return block. - void SetReturn(); - - /// Returns true when the block does not implement any guest instructions directly. - [[nodiscard]] bool IsVirtual() const noexcept; - /// Gets the starting location of this basic block. - [[nodiscard]] u32 LocationBegin() const noexcept; - /// Gets the end location for this basic block. - [[nodiscard]] u32 LocationEnd() const noexcept; - - /// Adds a new immediate predecessor to this basic block. - void AddImmediatePredecessor(Block* block); + /// Adds a new branch to this basic block. + void AddBranch(Block* block); /// Gets a mutable reference to the instruction list for this basic block. [[nodiscard]] InstructionList& Instructions() noexcept { @@ -71,9 +57,13 @@ public: } /// Gets an immutable span to the immediate predecessors. - [[nodiscard]] std::span ImmediatePredecessors() const noexcept { + [[nodiscard]] std::span ImmPredecessors() const noexcept { return imm_predecessors; } + /// Gets an immutable span to the immediate successors. + [[nodiscard]] std::span ImmSuccessors() const noexcept { + return imm_successors; + } /// Intrusively store the host definition of this instruction. template @@ -87,19 +77,6 @@ public: return Common::BitCast(definition); } - [[nodiscard]] Condition BranchCondition() const noexcept { - return branch_cond; - } - [[nodiscard]] bool IsTerminationBlock() const noexcept { - return !branch_true && !branch_false; - } - [[nodiscard]] Block* TrueBranch() const noexcept { - return branch_true; - } - [[nodiscard]] Block* FalseBranch() const noexcept { - return branch_false; - } - void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { ssa_reg_values[RegIndex(reg)] = value; } @@ -178,22 +155,14 @@ public: private: /// Memory pool for instruction list ObjectPool* inst_pool; - /// Starting location of this block - u32 location_begin; - /// End location of this block - u32 location_end; /// List of instructions in this block InstructionList instructions; - /// Condition to choose the branch to take - Condition branch_cond{true}; - /// Block to jump into when the branch condition evaluates as true - Block* branch_true{nullptr}; - /// Block to jump into when the branch condition evaluates as false - Block* branch_false{nullptr}; /// Block immediate predecessors std::vector imm_predecessors; + /// Block immediate successors + std::vector imm_successors; /// Intrusively store the value of a register in the block. std::array ssa_reg_values; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce6c9af07..eb45aa477 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,25 +61,28 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::Branch(Block* label) { - label->AddImmediatePredecessor(block); - block->SetBranch(label); - Inst(Opcode::Branch, label); +void IREmitter::Prologue() { + Inst(Opcode::Prologue); } -void IREmitter::BranchConditional(const U1& condition, Block* true_label, Block* false_label) { - block->SetBranches(IR::Condition{true}, true_label, false_label); - true_label->AddImmediatePredecessor(block); - false_label->AddImmediatePredecessor(block); - Inst(Opcode::BranchConditional, condition, true_label, false_label); +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); } -void IREmitter::LoopMerge(Block* merge_block, Block* continue_target) { - Inst(Opcode::LoopMerge, merge_block, continue_target); +void IREmitter::BranchConditionRef(const U1& cond) { + Inst(Opcode::BranchConditionRef, cond); } -void IREmitter::SelectionMerge(Block* merge_block) { - Inst(Opcode::SelectionMerge, merge_block); +void IREmitter::DemoteToHelperInvocation() { + Inst(Opcode::DemoteToHelperInvocation); +} + +void IREmitter::EmitVertex(const U32& stream) { + Inst(Opcode::EmitVertex, stream); +} + +void IREmitter::EndPrimitive(const U32& stream) { + Inst(Opcode::EndPrimitive, stream); } void IREmitter::Barrier() { @@ -94,37 +97,6 @@ void IREmitter::DeviceMemoryBarrier() { Inst(Opcode::DeviceMemoryBarrier); } -void IREmitter::Return() { - block->SetReturn(); - Inst(Opcode::Return); -} - -void IREmitter::Unreachable() { - Inst(Opcode::Unreachable); -} - -void IREmitter::DemoteToHelperInvocation(Block* continue_label) { - block->SetBranch(continue_label); - continue_label->AddImmediatePredecessor(block); - Inst(Opcode::DemoteToHelperInvocation, continue_label); -} - -void IREmitter::Prologue() { - Inst(Opcode::Prologue); -} - -void IREmitter::Epilogue() { - Inst(Opcode::Epilogue); -} - -void IREmitter::EmitVertex(const U32& stream) { - Inst(Opcode::EmitVertex, stream); -} - -void IREmitter::EndPrimitive(const U32& stream) { - Inst(Opcode::EndPrimitive, stream); -} - U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index fd41b7e89..7a83c33d3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,17 +32,10 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void Branch(Block* label); - void BranchConditional(const U1& condition, Block* true_label, Block* false_label); - void LoopMerge(Block* merge_block, Block* continue_target); - void SelectionMerge(Block* merge_block); - void Return(); - void Unreachable(); - void DemoteToHelperInvocation(Block* continue_label); - void Prologue(); void Epilogue(); - + void BranchConditionRef(const U1& cond); + void DemoteToHelperInvocation(); void EmitVertex(const U32& stream); void EndPrimitive(const U32& stream); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 616ef17d4..364574240 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,19 +56,14 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { - case Opcode::Branch: - case Opcode::BranchConditional: - case Opcode::LoopMerge: - case Opcode::SelectionMerge: - case Opcode::Return: + case Opcode::Prologue: + case Opcode::Epilogue: + case Opcode::BranchConditionRef: case Opcode::Join: - case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: case Opcode::WorkgroupMemoryBarrier: case Opcode::DeviceMemoryBarrier: - case Opcode::Prologue: - case Opcode::Epilogue: case Opcode::EmitVertex: case Opcode::EndPrimitive: case Opcode::SetAttribute: diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 2b9c0ed8c..56b001902 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -30,7 +30,6 @@ struct OpcodeMeta { // using enum Type; constexpr Type Void{Type::Void}; constexpr Type Opaque{Type::Opaque}; -constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9165421f8..75ddb6b6f 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -7,27 +7,20 @@ OPCODE(Phi, Opaque, OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) -// Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) +OPCODE(BranchConditionRef, Void, U1, ) OPCODE(Join, Void, ) -OPCODE(Unreachable, Void, ) -OPCODE(DemoteToHelperInvocation, Void, Label, ) +OPCODE(DemoteToHelperInvocation, Void, ) +OPCODE(EmitVertex, Void, U32, ) +OPCODE(EndPrimitive, Void, U32, ) // Barriers OPCODE(Barrier, Void, ) OPCODE(WorkgroupMemoryBarrier, Void, ) OPCODE(DeviceMemoryBarrier, Void, ) -// Special operations -OPCODE(Prologue, Void, ) -OPCODE(Epilogue, Void, ) -OPCODE(EmitVertex, Void, U32, ) -OPCODE(EndPrimitive, Void, U32, ) - // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index 8709a2ea1..1a28df7fb 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include #include @@ -10,35 +12,31 @@ namespace Shader::IR { -BlockList PostOrder(const BlockList& blocks) { +BlockList PostOrder(const AbstractSyntaxNode& root) { boost::container::small_vector block_stack; boost::container::flat_set visited; - BlockList post_order_blocks; - post_order_blocks.reserve(blocks.size()); - Block* const first_block{blocks.front()}; + if (root.type != AbstractSyntaxNode::Type::Block) { + throw LogicError("First node in abstract syntax list root is not a block"); + } + Block* const first_block{root.block}; visited.insert(first_block); block_stack.push_back(first_block); - const auto visit_branch = [&](Block* block, Block* branch) { - if (!branch) { - return false; - } - if (!visited.insert(branch).second) { - return false; - } - // Calling push_back twice is faster than insert on MSVC - block_stack.push_back(block); - block_stack.push_back(branch); - return true; - }; while (!block_stack.empty()) { Block* const block{block_stack.back()}; + const auto visit{[&](Block* branch) { + if (!visited.insert(branch).second) { + return false; + } + // Calling push_back twice is faster than insert on MSVC + block_stack.push_back(block); + block_stack.push_back(branch); + return true; + }}; block_stack.pop_back(); - - if (!visit_branch(block, block->TrueBranch()) && - !visit_branch(block, block->FalseBranch())) { + if (std::ranges::none_of(block->ImmSuccessors(), visit)) { post_order_blocks.push_back(block); } } diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h index 30137ff57..58a0467a0 100644 --- a/src/shader_recompiler/frontend/ir/post_order.h +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -5,9 +5,10 @@ #pragma once #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" namespace Shader::IR { -BlockList PostOrder(const BlockList& blocks); +BlockList PostOrder(const AbstractSyntaxNode& root); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 51e1a8c77..9ede5b48d 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -7,8 +7,7 @@ #include #include -#include - +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/program_header.h" #include "shader_recompiler/shader_info.h" @@ -17,6 +16,7 @@ namespace Shader::IR { struct Program { + AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; Info info; diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 8b3b33852..294b230c4 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -16,31 +16,30 @@ namespace Shader::IR { enum class Type { Void = 0, Opaque = 1 << 0, - Label = 1 << 1, - Reg = 1 << 2, - Pred = 1 << 3, - Attribute = 1 << 4, - Patch = 1 << 5, - U1 = 1 << 6, - U8 = 1 << 7, - U16 = 1 << 8, - U32 = 1 << 9, - U64 = 1 << 10, - F16 = 1 << 11, - F32 = 1 << 12, - F64 = 1 << 13, - U32x2 = 1 << 14, - U32x3 = 1 << 15, - U32x4 = 1 << 16, - F16x2 = 1 << 17, - F16x3 = 1 << 18, - F16x4 = 1 << 19, - F32x2 = 1 << 20, - F32x3 = 1 << 21, - F32x4 = 1 << 22, - F64x2 = 1 << 23, - F64x3 = 1 << 24, - F64x4 = 1 << 25, + Reg = 1 << 1, + Pred = 1 << 2, + Attribute = 1 << 3, + Patch = 1 << 4, + U1 = 1 << 5, + U8 = 1 << 6, + U16 = 1 << 7, + U32 = 1 << 8, + U64 = 1 << 9, + F16 = 1 << 10, + F32 = 1 << 11, + F64 = 1 << 12, + U32x2 = 1 << 13, + U32x3 = 1 << 14, + U32x4 = 1 << 15, + F16x2 = 1 << 16, + F16x3 = 1 << 17, + F16x4 = 1 << 18, + F32x2 = 1 << 19, + F32x3 = 1 << 20, + F32x4 = 1 << 21, + F64x2 = 1 << 22, + F64x3 = 1 << 23, + F64x4 = 1 << 24, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index b962f170d..d365ea1bc 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -9,8 +9,6 @@ namespace Shader::IR { Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} -Value::Value(IR::Block* value) noexcept : type{Type::Label}, label{value} {} - Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} @@ -33,10 +31,6 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} -bool Value::IsLabel() const noexcept { - return type == Type::Label; -} - IR::Type Value::Type() const noexcept { if (IsPhi()) { // The type of a phi node is stored in its flags @@ -60,8 +54,6 @@ bool Value::operator==(const Value& other) const { return true; case Type::Opaque: return inst == other.inst; - case Type::Label: - return label == other.label; case Type::Reg: return reg == other.reg; case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index beaf149f3..2ce49f953 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -37,7 +37,6 @@ class Value { public: Value() noexcept = default; explicit Value(IR::Inst* value) noexcept; - explicit Value(IR::Block* value) noexcept; explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; @@ -54,11 +53,9 @@ public: [[nodiscard]] bool IsPhi() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsImmediate() const noexcept; - [[nodiscard]] bool IsLabel() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; [[nodiscard]] IR::Inst* Inst() const; - [[nodiscard]] IR::Block* Label() const; [[nodiscard]] IR::Inst* InstRecursive() const; [[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Reg Reg() const; @@ -80,7 +77,6 @@ private: IR::Type type{}; union { IR::Inst* inst{}; - IR::Block* label; IR::Reg reg; IR::Pred pred; IR::Attribute attribute; @@ -304,11 +300,6 @@ inline IR::Inst* Value::Inst() const { return inst; } -inline IR::Block* Value::Label() const { - DEBUG_ASSERT(type == Type::Label); - return label; -} - inline IR::Inst* Value::InstRecursive() const { DEBUG_ASSERT(type == Type::Opaque); if (IsIdentity()) { diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0d3f00699..017c4b8fd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include "shader_recompiler/frontend/ir/basic_block.h" @@ -15,6 +16,16 @@ namespace Shader::Maxwell { namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.block; }); + return blocks; +} + void RemoveUnreachableBlocks(IR::Program& program) { // Some blocks might be unreachable if a function call exists unconditionally // If this happens the number of blocks and post order blocks will mismatch @@ -23,7 +34,7 @@ void RemoveUnreachableBlocks(IR::Program& program) { } const auto begin{program.blocks.begin() + 1}; const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmediatePredecessors().empty(); }}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; program.blocks.erase(std::remove_if(begin, end, pred), end); } @@ -110,8 +121,9 @@ void AddNVNStorageBuffers(IR::Program& program) { IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; - program.blocks = VisitAST(inst_pool, block_pool, env, cfg); - program.post_order_blocks = PostOrder(program.blocks); + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); program.stage = env.ShaderStage(); program.local_memory_size = env.LocalMemorySize(); switch (program.stage) { @@ -159,9 +171,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); std::swap(result.blocks, vertex_a.blocks); - for (IR::Block* block : vertex_b.blocks) { - result.blocks.push_back(block); - } + result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); @@ -173,7 +183,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.blocks); + result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cc5410c6d..e7e2e9c82 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -36,7 +36,6 @@ using Tree = boost::intrusive::list>; using Node = Tree::iterator; -using ConstNode = Tree::const_iterator; enum class StatementType { Code, @@ -91,7 +90,8 @@ struct IndirectBranchCond {}; #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement #endif struct Statement : ListBaseHook { - Statement(IR::Block* code_, Statement* up_) : code{code_}, up{up_}, type{StatementType::Code} {} + Statement(const Flow::Block* block_, Statement* up_) + : block{block_}, up{up_}, type{StatementType::Code} {} Statement(Goto, Statement* cond_, Node label_, Statement* up_) : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} @@ -125,7 +125,7 @@ struct Statement : ListBaseHook { } union { - IR::Block* code; + const Flow::Block* block; Node label; Tree children; IR::Condition guest_cond; @@ -171,8 +171,8 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->code->LocationBegin(), stmt->code->LocationEnd(), - reinterpret_cast(stmt->code)); + stmt->block->begin, stmt->block->end, + reinterpret_cast(stmt->block)); break; case StatementType::Goto: ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), @@ -407,11 +407,7 @@ private: }}; root.push_front(make_reset_variable()); root.insert(ip, make_reset_variable()); - - const u32 begin_offset{block.begin.Offset()}; - const u32 end_offset{block.end.Offset()}; - IR::Block* const ir_block{block_pool.Create(inst_pool, begin_offset, end_offset)}; - root.insert(ip, *pool.Create(ir_block, &root_stmt)); + root.insert(ip, *pool.Create(&block, &root_stmt)); switch (block.end_class) { case Flow::EndClass::Branch: { @@ -620,13 +616,13 @@ private: Statement root_stmt{FunctionTag{}}; }; -IR::Block* TryFindForwardBlock(const Statement& stmt) { - const Tree& tree{stmt.up->children}; - const ConstNode end{tree.cend()}; - ConstNode forward_node{std::next(Tree::s_iterator_to(stmt))}; +[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { + Tree& tree{stmt.up->children}; + const Node end{tree.end()}; + Node forward_node{std::next(Tree::s_iterator_to(stmt))}; while (forward_node != end && !HasChildren(forward_node->type)) { if (forward_node->type == StatementType::Code) { - return forward_node->code; + return &*forward_node; } ++forward_node; } @@ -654,21 +650,29 @@ class TranslatePass { public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Environment& env_, Statement& root_stmt, - IR::BlockList& block_list_) + IR::AbstractSyntaxList& syntax_list_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, - block_list{block_list_} { + syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*block_list.front()}; + IR::Block& first_block{*syntax_list.front().block}; IR::IREmitter ir{first_block, first_block.begin()}; ir.Prologue(); } private: - void Visit(Statement& parent, IR::Block* continue_block, IR::Block* break_block) { + void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { + IR::Block* current_block{}; + const auto ensure_block{[&] { + if (current_block) { + return; + } + current_block = block_pool.Create(inst_pool); + auto& node{syntax_list.emplace_back()}; + node.type = IR::AbstractSyntaxNode::Type::Block; + node.block = current_block; + }}; Tree& tree{parent.children}; - IR::Block* current_block{nullptr}; - for (auto it = tree.begin(); it != tree.end(); ++it) { Statement& stmt{*it}; switch (stmt.type) { @@ -676,124 +680,157 @@ private: // Labels can be ignored break; case StatementType::Code: { - if (current_block && current_block != stmt.code) { - IR::IREmitter{*current_block}.Branch(stmt.code); - } - current_block = stmt.code; - Translate(env, stmt.code); - block_list.push_back(stmt.code); + ensure_block(); + Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); break; } case StatementType::SetVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } case StatementType::SetIndirectBranchVariable: { - if (!current_block) { - current_block = MergeBlock(parent, stmt); - } + ensure_block(); IR::IREmitter ir{*current_block}; IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; ir.SetIndirectBranchVariable(address); break; } case StatementType::If: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const merge_block{MergeBlock(parent, stmt)}; - // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, merge_block, break_block); - // Implement if header block - IR::Block* const first_if_block{block_list.at(first_block_index)}; IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.SelectionMerge(merge_block); - ir.BranchConditional(cond, first_if_block, merge_block); + ir.BranchConditionRef(cond); + const size_t if_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + + // Visit children + const size_t then_block_index{syntax_list.size()}; + Visit(stmt, break_block, merge_block); + + IR::Block* const then_block{syntax_list.at(then_block_index).block}; + current_block->AddBranch(then_block); + current_block->AddBranch(merge_block); current_block = merge_block; + + auto& if_node{syntax_list[if_node_index]}; + if_node.type = IR::AbstractSyntaxNode::Type::If; + if_node.if_node.cond = cond; + if_node.if_node.body = then_block; + if_node.if_node.merge = merge_block; + + auto& endif_node{syntax_list.emplace_back()}; + endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; + endif_node.end_if.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; if (current_block) { - IR::IREmitter{*current_block}.Branch(loop_header_block); + current_block->AddBranch(loop_header_block); } - block_list.push_back(loop_header_block); + auto& header_node{syntax_list.emplace_back()}; + header_node.type = IR::AbstractSyntaxNode::Type::Block; + header_node.block = loop_header_block; - IR::Block* const new_continue_block{block_pool.Create(inst_pool)}; + IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; + const size_t loop_node_index{syntax_list.size()}; + syntax_list.emplace_back(); + // Visit children - const size_t first_block_index{block_list.size()}; - Visit(stmt, new_continue_block, merge_block); + const size_t body_block_index{syntax_list.size()}; + Visit(stmt, merge_block, continue_block); // The continue block is located at the end of the loop - block_list.push_back(new_continue_block); + IR::IREmitter ir{*continue_block}; + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); - // Implement loop header block - IR::Block* const first_loop_block{block_list.at(first_block_index)}; - IR::IREmitter ir{*loop_header_block}; - ir.LoopMerge(merge_block, new_continue_block); - ir.Branch(first_loop_block); + IR::Block* const body_block{syntax_list.at(body_block_index).block}; + loop_header_block->AddBranch(body_block); - // Implement continue block - IR::IREmitter continue_ir{*new_continue_block}; - const IR::U1 continue_cond{VisitExpr(continue_ir, *stmt.cond)}; - continue_ir.BranchConditional(continue_cond, ir.block, merge_block); + continue_block->AddBranch(loop_header_block); + continue_block->AddBranch(merge_block); current_block = merge_block; + + auto& loop{syntax_list[loop_node_index]}; + loop.type = IR::AbstractSyntaxNode::Type::Loop; + loop.loop.body = body_block; + loop.loop.continue_block = continue_block; + loop.loop.merge = merge_block; + + auto& continue_block_node{syntax_list.emplace_back()}; + continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; + continue_block_node.block = continue_block; + + auto& repeat{syntax_list.emplace_back()}; + repeat.type = IR::AbstractSyntaxNode::Type::Repeat; + repeat.repeat.cond = cond; + repeat.repeat.loop_header = loop_header_block; + repeat.repeat.merge = merge_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = merge_block; break; } case StatementType::Break: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - ir.BranchConditional(VisitExpr(ir, *stmt.cond), break_block, skip_block); - + const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + ir.BranchConditionRef(cond); + current_block->AddBranch(break_block); + current_block->AddBranch(skip_block); current_block = skip_block; + + auto& break_node{syntax_list.emplace_back()}; + break_node.type = IR::AbstractSyntaxNode::Type::Break; + break_node.break_node.cond = cond; + break_node.break_node.merge = break_block; + break_node.break_node.skip = skip_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = skip_block; break; } case StatementType::Return: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter ir{*current_block}; - ir.Epilogue(); - ir.Return(); + ensure_block(); + IR::IREmitter{*current_block}.Epilogue(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; break; } case StatementType::Kill: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } + ensure_block(); IR::Block* demote_block{MergeBlock(parent, stmt)}; - IR::IREmitter{*current_block}.DemoteToHelperInvocation(demote_block); + IR::IREmitter{*current_block}.DemoteToHelperInvocation(); + current_block->AddBranch(demote_block); current_block = demote_block; + + auto& merge{syntax_list.emplace_back()}; + merge.type = IR::AbstractSyntaxNode::Type::Block; + merge.block = demote_block; break; } case StatementType::Unreachable: { - if (!current_block) { - current_block = block_pool.Create(inst_pool); - block_list.push_back(current_block); - } - IR::IREmitter{*current_block}.Unreachable(); + ensure_block(); current_block = nullptr; + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; break; } default: @@ -801,42 +838,42 @@ private: } } if (current_block) { - IR::IREmitter ir{*current_block}; - if (continue_block) { - ir.Branch(continue_block); + if (fallthrough_block) { + current_block->AddBranch(fallthrough_block); } else { - ir.Unreachable(); + syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; } } } IR::Block* MergeBlock(Statement& parent, Statement& stmt) { - if (IR::Block* const block{TryFindForwardBlock(stmt)}) { - return block; + Statement* merge_stmt{TryFindForwardBlock(stmt)}; + if (!merge_stmt) { + // Create a merge block we can visit later + merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); + parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); } - // Create a merge block we can visit later - IR::Block* const block{block_pool.Create(inst_pool)}; - Statement* const merge_stmt{stmt_pool.Create(block, &parent)}; - parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); - return block; + return block_pool.Create(inst_pool); } ObjectPool& stmt_pool; ObjectPool& inst_pool; ObjectPool& block_pool; Environment& env; - IR::BlockList& block_list; + IR::AbstractSyntaxList& syntax_list; + // TODO: Make this constexpr when std::vector is constexpr + const Flow::Block dummy_flow_block; }; } // Anonymous namespace -IR::BlockList VisitAST(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { +IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; Statement& root{goto_pass.RootStatement()}; - IR::BlockList block_list; - TranslatePass{inst_pool, block_pool, stmt_pool, env, root, block_list}; - return block_list; + IR::AbstractSyntaxList syntax_list; + TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; + return syntax_list; } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index a6be12ba2..88b083649 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -4,12 +4,8 @@ #pragma once -#include -#include - -#include - #include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/abstract_syntax_list.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -17,8 +13,8 @@ namespace Shader::Maxwell { -[[nodiscard]] IR::BlockList VisitAST(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); +[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index f1230f58f..0f4e7a251 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -23,13 +23,12 @@ static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { } } -void Translate(Environment& env, IR::Block* block) { - if (block->IsVirtual()) { +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { + if (location_begin == location_end) { return; } TranslatorVisitor visitor{env, *block}; - const Location pc_end{block->LocationEnd()}; - for (Location pc = block->LocationBegin(); pc != pc_end; ++pc) { + for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; const Opcode opcode{Decode(insn)}; switch (opcode) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h index e1aa2e0f4..a3edd2e46 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.h +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h @@ -9,6 +9,6 @@ namespace Shader::Maxwell { -void Translate(Environment& env, IR::Block* block); +void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); } // namespace Shader::Maxwell -- cgit v1.2.3 From bf5e48ffe4bd48ea681f2a01c8919c97125e88df Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 04:48:46 -0300 Subject: glasm: Initial implementation of phi nodes on GLASM --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 18 ++++++++++++++---- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +++- src/shader_recompiler/frontend/ir/microinstruction.cpp | 3 ++- src/shader_recompiler/frontend/ir/opcodes.inc | 3 ++- src/shader_recompiler/frontend/ir/value.h | 4 ++++ .../frontend/maxwell/structured_control_flow.cpp | 6 +++--- 6 files changed, 28 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index eb45aa477..def29143e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,6 +61,14 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } +void IREmitter::DummyReference(const Value& value) { + Inst(Opcode::DummyReference, value); +} + +void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { + Inst(Opcode::PhiMove, Value{&phi}, value); +} + void IREmitter::Prologue() { Inst(Opcode::Prologue); } @@ -69,10 +77,6 @@ void IREmitter::Epilogue() { Inst(Opcode::Epilogue); } -void IREmitter::BranchConditionRef(const U1& cond) { - Inst(Opcode::BranchConditionRef, cond); -} - void IREmitter::DemoteToHelperInvocation() { Inst(Opcode::DemoteToHelperInvocation); } @@ -106,6 +110,9 @@ void IREmitter::SetReg(IR::Reg reg, const U32& value) { } U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { + if (pred == Pred::PT) { + return Imm1(!is_negated); + } const U1 value{Inst(Opcode::GetPred, pred)}; if (is_negated) { return Inst(Opcode::LogicalNot, value); @@ -264,6 +271,9 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { U1 IREmitter::Condition(IR::Condition cond) { const FlowTest flow_test{cond.GetFlowTest()}; const auto [pred, is_negated]{cond.GetPred()}; + if (flow_test == FlowTest::T) { + return GetPred(pred, is_negated); + } return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7a83c33d3..4f7c820fe 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,9 +32,11 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; + void DummyReference(const Value& value); + void PhiMove(IR::Inst& phi, const Value& value); + void Prologue(); void Epilogue(); - void BranchConditionRef(const U1& cond); void DemoteToHelperInvocation(); void EmitVertex(const U32& stream); void EndPrimitive(const U32& stream); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 364574240..267aebc61 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,9 +56,10 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { + case Opcode::DummyReference: + case Opcode::PhiMove: case Opcode::Prologue: case Opcode::Epilogue: - case Opcode::BranchConditionRef: case Opcode::Join: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 75ddb6b6f..6196b867d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -6,11 +6,12 @@ OPCODE(Phi, Opaque, ) OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) +OPCODE(DummyReference, Void, Opaque, ) +OPCODE(PhiMove, Void, Opaque, Opaque, ) // Special operations OPCODE(Prologue, Void, ) OPCODE(Epilogue, Void, ) -OPCODE(BranchConditionRef, Void, U1, ) OPCODE(Join, Void, ) OPCODE(DemoteToHelperInvocation, Void, ) OPCODE(EmitVertex, Void, U32, ) diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 2ce49f953..0c6bf684d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -391,4 +391,8 @@ inline f64 Value::F64() const { return imm_f64; } +[[nodiscard]] inline bool IsPhi(const Inst& inst) { + return inst.GetOpcode() == Opcode::Phi; +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index e7e2e9c82..836d4b8aa 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -704,7 +704,7 @@ private: // Implement if header block IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); const size_t if_node_index{syntax_list.size()}; syntax_list.emplace_back(); @@ -755,7 +755,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); IR::Block* const body_block{syntax_list.at(body_block_index).block}; loop_header_block->AddBranch(body_block); @@ -792,7 +792,7 @@ private: IR::IREmitter ir{*current_block}; const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.BranchConditionRef(cond); + ir.DummyReference(cond); current_block->AddBranch(break_block); current_block->AddBranch(skip_block); current_block = skip_block; -- cgit v1.2.3 From 776ab3ea12f07e2d434a26857d412cff018b1b50 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:18:31 -0300 Subject: shader: Use a non-trivial dummy to construct ASL node union --- src/shader_recompiler/frontend/ir/abstract_syntax_list.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h index 1366414c2..e9afb4d92 100644 --- a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -13,6 +13,10 @@ namespace Shader::IR { class Block; struct AbstractSyntaxNode { + struct NonTrivialDummy { + NonTrivialDummy() {} + }; + enum class Type { Block, If, @@ -25,7 +29,8 @@ struct AbstractSyntaxNode { }; Type type{}; union { - Block* block{}; + NonTrivialDummy dummy{}; + Block* block; struct { U1 cond; Block* body; -- cgit v1.2.3 From f7a2340205b4fa2db32403f20d7b7afe32b15f33 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 16 May 2021 17:06:13 -0400 Subject: shader_recompiler: GCC fixes Fixes members of unnamed union not being accessible, and one function without a declaration. --- .../frontend/ir/abstract_syntax_list.h | 11 ++--- src/shader_recompiler/frontend/ir/post_order.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/structured_control_flow.cpp | 48 +++++++++++----------- 4 files changed, 30 insertions(+), 33 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h index e9afb4d92..b61773487 100644 --- a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h @@ -13,10 +13,6 @@ namespace Shader::IR { class Block; struct AbstractSyntaxNode { - struct NonTrivialDummy { - NonTrivialDummy() {} - }; - enum class Type { Block, If, @@ -27,9 +23,7 @@ struct AbstractSyntaxNode { Return, Unreachable, }; - Type type{}; - union { - NonTrivialDummy dummy{}; + union Data { Block* block; struct { U1 cond; @@ -55,6 +49,9 @@ struct AbstractSyntaxNode { Block* skip; } break_node; }; + + Data data{}; + Type type{}; }; using AbstractSyntaxList = std::vector; diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index 1a28df7fb..16bc44101 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -20,7 +20,7 @@ BlockList PostOrder(const AbstractSyntaxNode& root) { if (root.type != AbstractSyntaxNode::Type::Block) { throw LogicError("First node in abstract syntax list root is not a block"); } - Block* const first_block{root.block}; + Block* const first_block{root.data.block}; visited.insert(first_block); block_stack.push_back(first_block); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 017c4b8fd..ccdab1dad 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -22,7 +22,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { })}; IR::BlockList blocks(std::ranges::distance(syntax_blocks)); std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.block; }); + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); return blocks; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 836d4b8aa..83554a953 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -655,8 +655,8 @@ public: syntax_list{syntax_list_} { Visit(root_stmt, nullptr, nullptr); - IR::Block& first_block{*syntax_list.front().block}; - IR::IREmitter ir{first_block, first_block.begin()}; + IR::Block& first_block{*syntax_list.front().data.block}; + IR::IREmitter ir = IR::IREmitter(first_block, first_block.begin()); ir.Prologue(); } @@ -670,7 +670,7 @@ private: current_block = block_pool.Create(inst_pool); auto& node{syntax_list.emplace_back()}; node.type = IR::AbstractSyntaxNode::Type::Block; - node.block = current_block; + node.data.block = current_block; }}; Tree& tree{parent.children}; for (auto it = tree.begin(); it != tree.end(); ++it) { @@ -713,24 +713,24 @@ private: const size_t then_block_index{syntax_list.size()}; Visit(stmt, break_block, merge_block); - IR::Block* const then_block{syntax_list.at(then_block_index).block}; + IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; current_block->AddBranch(then_block); current_block->AddBranch(merge_block); current_block = merge_block; auto& if_node{syntax_list[if_node_index]}; if_node.type = IR::AbstractSyntaxNode::Type::If; - if_node.if_node.cond = cond; - if_node.if_node.body = then_block; - if_node.if_node.merge = merge_block; + if_node.data.if_node.cond = cond; + if_node.data.if_node.body = then_block; + if_node.data.if_node.merge = merge_block; auto& endif_node{syntax_list.emplace_back()}; endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; - endif_node.end_if.merge = merge_block; + endif_node.data.end_if.merge = merge_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = merge_block; + merge.data.block = merge_block; break; } case StatementType::Loop: { @@ -740,7 +740,7 @@ private: } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; - header_node.block = loop_header_block; + header_node.data.block = loop_header_block; IR::Block* const continue_block{block_pool.Create(inst_pool)}; IR::Block* const merge_block{MergeBlock(parent, stmt)}; @@ -757,7 +757,7 @@ private: const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; ir.DummyReference(cond); - IR::Block* const body_block{syntax_list.at(body_block_index).block}; + IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); continue_block->AddBranch(loop_header_block); @@ -767,23 +767,23 @@ private: auto& loop{syntax_list[loop_node_index]}; loop.type = IR::AbstractSyntaxNode::Type::Loop; - loop.loop.body = body_block; - loop.loop.continue_block = continue_block; - loop.loop.merge = merge_block; + loop.data.loop.body = body_block; + loop.data.loop.continue_block = continue_block; + loop.data.loop.merge = merge_block; auto& continue_block_node{syntax_list.emplace_back()}; continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; - continue_block_node.block = continue_block; + continue_block_node.data.block = continue_block; auto& repeat{syntax_list.emplace_back()}; repeat.type = IR::AbstractSyntaxNode::Type::Repeat; - repeat.repeat.cond = cond; - repeat.repeat.loop_header = loop_header_block; - repeat.repeat.merge = merge_block; + repeat.data.repeat.cond = cond; + repeat.data.repeat.loop_header = loop_header_block; + repeat.data.repeat.merge = merge_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = merge_block; + merge.data.block = merge_block; break; } case StatementType::Break: { @@ -799,13 +799,13 @@ private: auto& break_node{syntax_list.emplace_back()}; break_node.type = IR::AbstractSyntaxNode::Type::Break; - break_node.break_node.cond = cond; - break_node.break_node.merge = break_block; - break_node.break_node.skip = skip_block; + break_node.data.break_node.cond = cond; + break_node.data.break_node.merge = break_block; + break_node.data.break_node.skip = skip_block; auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = skip_block; + merge.data.block = skip_block; break; } case StatementType::Return: { @@ -824,7 +824,7 @@ private: auto& merge{syntax_list.emplace_back()}; merge.type = IR::AbstractSyntaxNode::Type::Block; - merge.block = demote_block; + merge.data.block = demote_block; break; } case StatementType::Unreachable: { -- cgit v1.2.3 From ec6fc5fe78c9038fc9ad7259b7b3a7be751ecef6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 17 May 2021 02:52:01 -0300 Subject: glasm: Implement TEX and TEXS instructions Remove lod clamp from texture instructions with lod, as this is not needed (nor supported). --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 11 ++++------- src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++--- .../frontend/maxwell/translate/impl/texture_fetch.cpp | 5 ++--- .../maxwell/translate/impl/texture_fetch_swizzled.cpp | 18 +++++++++--------- 4 files changed, 17 insertions(+), 22 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index def29143e..94bdbe39c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1778,12 +1778,10 @@ Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords } Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, - const Value& offset, const F32& lod_clamp, - TextureInstInfo info) { - const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Value& offset, TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod : Opcode::BindlessImageSampleExplicitLod}; - return Inst(op, Flags{info}, handle, coords, lod_lc, offset); + return Inst(op, Flags{info}, handle, coords, lod, offset); } F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, @@ -1796,12 +1794,11 @@ F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coor } F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, - const F32& lod, const Value& offset, const F32& lod_clamp, + const F32& lod, const Value& offset, TextureInstInfo info) { - const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod : Opcode::BindlessImageSampleDrefExplicitLod}; - return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); + return Inst(op, Flags{info}, handle, coords, dref, lod, offset); } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4f7c820fe..4ae69b788 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -302,15 +302,14 @@ public: const F32& lod_clamp, TextureInstInfo info); [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); + TextureInstInfo info); [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& bias, const Value& offset, const F32& lod_clamp, TextureInstInfo info); [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, const F32& lod, - const Value& offset, const F32& lod_clamp, - TextureInstInfo info); + const Value& offset, TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 9671d115e..0046b5edd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -177,14 +177,13 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, const IR::Value sample{[&]() -> IR::Value { if (tex.dc == 0) { if (HasExplicitLod(blod)) { - return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); } else { return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); } } if (HasExplicitLod(blod)) { - return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, - info); + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); } else { return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 3500a4559..154e7f1a1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -81,18 +81,18 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { switch (texs.encoding) { case 0: // 1D.LZ info.type.Assign(TextureType::Color1D); - return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info); + return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); case 1: // 2D info.type.Assign(TextureType::Color2D); return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); case 2: // 2D.LZ info.type.Assign(TextureType::Color2D); - return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info); + return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); case 3: // 2D.LL CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, - {}, info); + info); case 4: // 2D.DC CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); @@ -105,13 +105,13 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.type.Assign(TextureType::Color2D); info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), - v.F(reg_b + 1), v.F(reg_b), {}, {}, info); + v.F(reg_b + 1), v.F(reg_b), {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color2D); info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), - zero, {}, {}, info); + zero, {}, info); case 7: // ARRAY_2D CheckAlignment(reg_a, 2); info.type.Assign(TextureType::ColorArray2D); @@ -123,7 +123,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.type.Assign(TextureType::ColorArray2D); return v.ir.ImageSampleExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), - zero, {}, {}, info); + zero, {}, info); case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); @@ -131,7 +131,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), - v.F(reg_b + 1), zero, {}, {}, info); + v.F(reg_b + 1), zero, {}, info); case 10: // 3D CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color3D); @@ -141,7 +141,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { CheckAlignment(reg_a, 2); info.type.Assign(TextureType::Color3D); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, - {}, info); + info); case 12: // CUBE CheckAlignment(reg_a, 2); info.type.Assign(TextureType::ColorCube); @@ -152,7 +152,7 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { CheckAlignment(reg_b, 2); info.type.Assign(TextureType::ColorCube); return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), - v.F(reg_b + 1), {}, {}, info); + v.F(reg_b + 1), {}, info); default: throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); } -- cgit v1.2.3 From 9bb3e008c9f4bbdd35c095b506c3a3312d17e383 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 02:04:22 -0300 Subject: shader: Read branch conditions from an instruction Fixes the identity removal pass. --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 4 +++- src/shader_recompiler/frontend/ir/microinstruction.cpp | 3 ++- src/shader_recompiler/frontend/ir/opcodes.inc | 3 ++- .../frontend/maxwell/structured_control_flow.cpp | 9 +++------ 5 files changed, 16 insertions(+), 11 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 94bdbe39c..e9fd41237 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -61,8 +61,12 @@ F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } -void IREmitter::DummyReference(const Value& value) { - Inst(Opcode::DummyReference, value); +U1 IREmitter::ConditionRef(const U1& value) { + return Inst(Opcode::ConditionRef, value); +} + +void IREmitter::Reference(const Value& value) { + Inst(Opcode::Reference, value); } void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4ae69b788..bb3500c54 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -32,7 +32,9 @@ public: [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; - void DummyReference(const Value& value); + U1 ConditionRef(const U1& value); + void Reference(const Value& value); + void PhiMove(IR::Inst& phi, const Value& value); void Prologue(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 267aebc61..3dfa5a880 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,7 +56,8 @@ Inst::~Inst() { bool Inst::MayHaveSideEffects() const noexcept { switch (op) { - case Opcode::DummyReference: + case Opcode::ConditionRef: + case Opcode::Reference: case Opcode::PhiMove: case Opcode::Prologue: case Opcode::Epilogue: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6196b867d..8a8d0d759 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -6,7 +6,8 @@ OPCODE(Phi, Opaque, ) OPCODE(Identity, Opaque, Opaque, ) OPCODE(Void, Void, ) -OPCODE(DummyReference, Void, Opaque, ) +OPCODE(ConditionRef, U1, U1, ) +OPCODE(Reference, Void, Opaque, ) OPCODE(PhiMove, Void, Opaque, Opaque, ) // Special operations diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 83554a953..ebe5c2654 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -703,8 +703,7 @@ private: // Implement if header block IR::IREmitter ir{*current_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; const size_t if_node_index{syntax_list.size()}; syntax_list.emplace_back(); @@ -754,8 +753,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -791,8 +789,7 @@ private: IR::Block* const skip_block{MergeBlock(parent, stmt)}; IR::IREmitter ir{*current_block}; - const IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - ir.DummyReference(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; current_block->AddBranch(break_block); current_block->AddBranch(skip_block); current_block = skip_block; -- cgit v1.2.3 From a49532c8eb29807814214ab326ff970f5a964a03 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:52 -0300 Subject: video_core,shader: Clang-format fixes --- src/shader_recompiler/frontend/ir/patch.cpp | 2 +- src/shader_recompiler/frontend/ir/post_order.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp index 1f770bc48..4c956a970 100644 --- a/src/shader_recompiler/frontend/ir/patch.cpp +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -2,8 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/patch.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h index 58a0467a0..07bfbadc3 100644 --- a/src/shader_recompiler/frontend/ir/post_order.h +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -4,8 +4,8 @@ #pragma once -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" +#include "shader_recompiler/frontend/ir/basic_block.h" namespace Shader::IR { -- cgit v1.2.3 From 586c785366307cb3c648bd33345b431b8312612d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 15:41:53 -0300 Subject: glasm: Skip phi moves on undefined instructions --- src/shader_recompiler/frontend/ir/value.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 0c6bf684d..090cc1739 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -395,4 +395,17 @@ inline f64 Value::F64() const { return inst.GetOpcode() == Opcode::Phi; } +[[nodiscard]] inline bool IsUndef(const Inst& inst) { + switch (inst.GetOpcode()) { + case Opcode::UndefU1: + case Opcode::UndefU8: + case Opcode::UndefU16: + case Opcode::UndefU32: + case Opcode::UndefU64: + return true; + default: + return false; + } +} + } // namespace Shader::IR -- cgit v1.2.3 From 8f3043c3cf6f6baa1d235e6789533fbf567d1c2d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:42:37 -0300 Subject: Revert "glasm: Skip phi moves on undefined instructions" Causes regressions on Bowser's Fury. --- src/shader_recompiler/frontend/ir/value.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 090cc1739..0c6bf684d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -395,17 +395,4 @@ inline f64 Value::F64() const { return inst.GetOpcode() == Opcode::Phi; } -[[nodiscard]] inline bool IsUndef(const Inst& inst) { - switch (inst.GetOpcode()) { - case Opcode::UndefU1: - case Opcode::UndefU8: - case Opcode::UndefU16: - case Opcode::UndefU32: - case Opcode::UndefU64: - return true; - default: - return false; - } -} - } // namespace Shader::IR -- cgit v1.2.3 From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: shader: Handle host exceptions --- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/translate/translate.cpp | 13 +++++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp index 12ddf2ac9..ccc40c20c 100644 --- a/src/shader_recompiler/frontend/maxwell/opcodes.cpp +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, +#define INST(name, cute, encode) cute, #include "maxwell.inc" #undef INST }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ccdab1dad..900fc7ab1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/maxwell/program.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 0f4e7a251..8e3c4c5d5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -30,16 +30,21 @@ void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 locat TranslatorVisitor visitor{env, *block}; for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; - const Opcode opcode{Decode(insn)}; - switch (opcode) { + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { #define INST(name, cute, mask) \ case Opcode::name: \ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ break; #include "shader_recompiler/frontend/maxwell/maxwell.inc" #undef OPCODE - default: - throw LogicError("Invalid opcode {}", opcode); + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; } } } -- cgit v1.2.3 From b659212dbdcac6e4f54a4306fd716b7fb74505ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 19:59:22 -0300 Subject: shader: Fix TMML queries --- .../maxwell/translate/impl/texture_mipmap_level.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 2277d24ff..abf87a0df 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -84,9 +84,6 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - - IR::F32 transform_constant{v.ir.Imm32(256.0f)}; - const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; @@ -107,9 +104,16 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::F32 value{v.ir.CompositeExtract(sample, element)}; if (element < 2) { - value = v.ir.FPMul(value, transform_constant); + IR::U32 casted_value; + if (element == 0) { + casted_value = v.ir.ConvertFToU(32, value); + } else { + casted_value = v.ir.ConvertFToS(16, value); + } + v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); + } else { + v.F(dest_reg, value); } - v.F(dest_reg, value); ++dest_reg; } } -- cgit v1.2.3 From d093522fac5f3f4c2c27d30c9ad93421460792a0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:51:32 -0300 Subject: shader: Fix ImageWrite indexing --- .../frontend/maxwell/translate/impl/surface_load_store.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index e1b8aa8ad..7dc793ad7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -145,7 +145,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { case Type::ARRAY_2D: return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); case Type::_3D: - return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); } throw NotImplementedException("Invalid type {}", type); } -- cgit v1.2.3 From 329dea217d05a47ee00bb005eba1f0fc6b3dd0f6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 19:58:36 -0300 Subject: shader: Always initialize up reference in structure control flow Fixes ubsan issue. --- .../frontend/maxwell/structured_control_flow.cpp | 67 ++++++++++++---------- 1 file changed, 36 insertions(+), 31 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index ebe5c2654..c1e0646e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -101,22 +101,24 @@ struct Statement : ListBaseHook { : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} Statement(Break, Statement* cond_, Statement* up_) : cond{cond_}, up{up_}, type{StatementType::Break} {} - Statement(Return) : type{StatementType::Return} {} - Statement(Kill) : type{StatementType::Kill} {} - Statement(Unreachable) : type{StatementType::Unreachable} {} + Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} + Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} + Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} - Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} - Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} - Statement(Or, Statement* op_a_, Statement* op_b_) - : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} + Statement(Identity, IR::Condition cond_, Statement* up_) + : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} + Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} + Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) + : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} - Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) : branch_offset{branch_offset_}, - branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} - Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} - Statement(IndirectBranchCond, u32 location_) - : location{location_}, type{StatementType::IndirectBranchCond} {} + branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} + Statement(Variable, u32 id_, Statement* up_) + : id{id_}, up{up_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_, Statement* up_) + : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -385,7 +387,7 @@ private: void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, std::vector& gotos, Node function_insert_point, std::optional return_label) { - Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false})}; + Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; Tree& root{root_stmt.children}; std::unordered_map local_labels; local_labels.reserve(function.blocks.size()); @@ -411,7 +413,8 @@ private: switch (block.end_class) { case Flow::EndClass::Branch: { - Statement* const always_cond{pool.Create(Identity{}, IR::Condition{true})}; + Statement* const always_cond{ + pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; if (block.cond == IR::Condition{true}) { const Node true_label{local_labels.at(block.branch_true)}; gotos.push_back( @@ -423,7 +426,7 @@ private: } else { const Node true_label{local_labels.at(block.branch_true)}; const Node false_label{local_labels.at(block.branch_false)}; - Statement* const true_cond{pool.Create(Identity{}, block.cond)}; + Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; gotos.push_back( root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); gotos.push_back(root.insert( @@ -433,14 +436,15 @@ private: } case Flow::EndClass::IndirectBranch: root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, - block.branch_offset)); + block.branch_offset, &root_stmt)); for (const Flow::IndirectBranch& indirect : block.indirect_branches) { const Node indirect_label{local_labels.at(indirect.block)}; - Statement* cond{pool.Create(IndirectBranchCond{}, indirect.address)}; + Statement* cond{ + pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); } - root.insert(ip, *pool.Create(Unreachable{})); + root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; @@ -449,16 +453,16 @@ private: break; } case Flow::EndClass::Exit: - root.insert(ip, *pool.Create(Return{})); + root.insert(ip, *pool.Create(Return{}, &root_stmt)); break; case Flow::EndClass::Return: { - Statement* const always_cond{pool.Create(Identity{}, block.cond)}; + Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; gotos.push_back(root.insert(ip, *goto_stmt)); break; } case Flow::EndClass::Kill: - root.insert(ip, *pool.Create(Kill{})); + root.insert(ip, *pool.Create(Kill{}, &root_stmt)); break; } } @@ -474,7 +478,7 @@ private: Tree& body{goto_stmt->up->children}; Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); - Statement* const cond{pool.Create(Not{}, goto_stmt->cond)}; + Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; UpdateTreeUp(if_stmt); body.insert(goto_stmt, *if_stmt); @@ -516,8 +520,8 @@ private: Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); - Statement* const variable{pool.Create(Variable{}, label_id)}; - Statement* const neg_var{pool.Create(Not{}, variable)}; + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; if (!if_body.empty()) { Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; UpdateTreeUp(if_stmt); @@ -528,7 +532,8 @@ private: switch (label_nested_stmt->type) { case StatementType::If: // Update nested if condition - label_nested_stmt->cond = pool.Create(Or{}, variable, label_nested_stmt->cond); + label_nested_stmt->cond = + pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); break; case StatementType::Loop: break; @@ -550,7 +555,7 @@ private: Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); SanitizeNoBreaks(loop_body); - Statement* const variable{pool.Create(Variable{}, label_id)}; + Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); body.insert(goto_stmt, *loop_stmt); @@ -577,15 +582,15 @@ private: Tree if_body; if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); if_body.pop_front(); - Statement* const cond{pool.Create(Variable{}, label_id)}; - Statement* const neg_cond{pool.Create(Not{}, cond)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; + Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; UpdateTreeUp(if_stmt); body.insert(goto_stmt, *if_stmt); body.erase(goto_stmt); - Statement* const new_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; Tree& parent_tree{parent->up->children}; return parent_tree.insert(std::next(parent), *new_goto); @@ -597,14 +602,14 @@ private: const u32 label_id{goto_stmt->label->id}; Statement* const goto_cond{goto_stmt->cond}; Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; - Statement* const cond{pool.Create(Variable{}, label_id)}; + Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; body.insert(goto_stmt, *set_goto_var); body.insert(goto_stmt, *break_stmt); body.erase(goto_stmt); const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; - Statement* const new_goto_cond{pool.Create(Variable{}, label_id)}; + Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; Tree& parent_tree{loop->up->children}; return parent_tree.insert(std::next(loop), *new_goto); -- cgit v1.2.3 From 5d170de0b5c57afdfc7c633c0b3b36d7ea9299c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:46:07 -0300 Subject: shader: Implement ISCADD32I --- .../maxwell/translate/impl/integer_scaled_add.cpp | 48 ++++++++++++++-------- 1 file changed, 31 insertions(+), 17 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 93cc2c0b1..044671943 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp @@ -8,40 +8,36 @@ namespace Shader::Maxwell { namespace { -void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, + u64 scale_imm) { union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; BitField<8, 8, IR::Reg> op_a; - BitField<47, 1, u64> cc; - BitField<48, 2, u64> three_for_po; - BitField<48, 1, u64> neg_b; - BitField<49, 1, u64> neg_a; - BitField<39, 5, u64> scale; } const iscadd{insn}; - const bool po{iscadd.three_for_po == 3}; + const bool po{neg_a && neg_b}; IR::U32 op_a{v.X(iscadd.op_a)}; - if (!po) { + if (po) { + // When PO is present, add one + op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); + } else { // When PO is not present, the bits are interpreted as negation - if (iscadd.neg_a != 0) { + if (neg_a) { op_a = v.ir.INeg(op_a); } - if (iscadd.neg_b != 0) { + if (neg_b) { op_b = v.ir.INeg(op_b); } - } else { - // When PO is present, add one - op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); } // With the operands already processed, scale A - const IR::U32 scale{v.ir.Imm32(static_cast(iscadd.scale))}; + const IR::U32 scale{v.ir.Imm32(static_cast(scale_imm))}; const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; v.X(iscadd.dest_reg, result); - if (iscadd.cc != 0) { + if (cc) { v.SetZFlag(v.ir.GetZeroFromOp(result)); v.SetSFlag(v.ir.GetSignFromOp(result)); const IR::U1 carry{v.ir.GetCarryFromOp(result)}; @@ -51,6 +47,18 @@ void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } } +void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { + union { + u64 raw; + BitField<47, 1, u64> cc; + BitField<48, 1, u64> neg_b; + BitField<49, 1, u64> neg_a; + BitField<39, 5, u64> scale; + } const iscadd{insn}; + + ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); +} + } // Anonymous namespace void TranslatorVisitor::ISCADD_reg(u64 insn) { @@ -65,8 +73,14 @@ void TranslatorVisitor::ISCADD_imm(u64 insn) { ISCADD(*this, insn, GetImm20(insn)); } -void TranslatorVisitor::ISCADD32I(u64) { - throw NotImplementedException("ISCADD32I"); +void TranslatorVisitor::ISCADD32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 5, u64> scale; + } const iscadd{insn}; + + return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 05d41fa9b70af6d469f2f6f1474436c9255e9bc3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 23:08:17 -0300 Subject: shader: Add support for "negative" and unaligned offsets "Negative" offsets don't exist. They are shown as such due to a bug in nvdisasm. Unaligned offsets have been proved to read the aligned offset. For example, when reading an U32, if the offset is 6, the offset read will be 4. --- src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 88bbac0a5..b446aae0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -122,14 +122,14 @@ IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { static std::pair CbufAddr(u64 insn) { union { u64 raw; - BitField<20, 14, s64> offset; + BitField<20, 14, u64> offset; BitField<34, 5, u64> binding; } const cbuf{insn}; if (cbuf.binding >= 18) { throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); } - if (cbuf.offset >= 0x10'000 || cbuf.offset < 0) { + if (cbuf.offset >= 0x10'000) { throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); } const IR::Value binding{static_cast(cbuf.binding)}; -- cgit v1.2.3 From 4f8b68fb0424ccd273107e45709acb6a5c35cecb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:10:15 -0300 Subject: shader: Avoid CPU side undefined behavior on I2F --- .../maxwell/translate/impl/integer_floating_point_conversion.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index d6224d5cc..e0e157275 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -114,6 +114,8 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { IR::U1 is_least; if (src_bitsize == 64) { is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else if (src_bitsize == 32) { + is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits::min())); } else { const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; is_least = v.ir.IEqual(src, least_value); -- cgit v1.2.3 From ec9a78885e6a07b5259c9fbec19d9756443651b1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:34 -0300 Subject: shader: Add 2D and 3D variants to SUATOM and SURED Used by Claybook. --- .../frontend/maxwell/translate/impl/surface_atomic_operations.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 994bdc3eb..44144f154 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -76,6 +76,10 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { case Type::_1D: case Type::BUFFER_1D: return v.X(reg); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); default: break; } -- cgit v1.2.3 From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: shader: Fix VertexA Shaders. --- src/shader_recompiler/frontend/maxwell/program.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 900fc7ab1..8489f9a5f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -171,20 +171,29 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b IR::Program result{}; Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); - std::swap(result.blocks, vertex_a.blocks); - result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); -- cgit v1.2.3 From 3c125d41348b08a467333cf0e40bed7ce26cd7cc Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 16:23:04 -0400 Subject: tmml: Remove index component from coords vec The lod query functions exposed by the rendering API's do not make use of the texturearray layer indexing. --- .../frontend/maxwell/translate/impl/texture_mipmap_level.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index abf87a0df..667c69a0d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -46,16 +46,15 @@ Shader::TextureType GetType(TextureType type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); case TextureType::ARRAY_1D: - return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); + return v.F(reg + 1); case TextureType::_2D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); case TextureType::ARRAY_2D: - return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); case TextureType::_3D: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_3D: @@ -63,7 +62,7 @@ IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { case TextureType::CUBE: return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); case TextureType::ARRAY_CUBE: - return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); + return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); } throw NotImplementedException("Invalid texture type {}", type); } -- cgit v1.2.3 From 487057b8d2bd79892423ad3a1b5a96d0407b307a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:28:06 -0300 Subject: shader: Comment why the array component is not read in TMML --- .../frontend/maxwell/translate/impl/texture_mipmap_level.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index 667c69a0d..aea3c0e62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -46,6 +46,8 @@ Shader::TextureType GetType(TextureType type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + // The ISA reads an array component here, but this is not needed on high level shading languages + // We are dropping this information. switch (type) { case TextureType::_1D: return v.F(reg); -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 ++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../frontend/maxwell/structured_control_flow.cpp | 42 +++++++++++++++++++--- 4 files changed, 55 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e9fd41237..6c37af5e7 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,12 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + if (pred != IR::Pred::PT) { + Inst(Opcode::SetPred, pred, value); + } +} + U1 IREmitter::GetGotoVariable(u32 id) { return Inst(Opcode::GetGotoVariable, id); } @@ -141,8 +147,12 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -void IREmitter::SetPred(IR::Pred pred, const U1& value) { - Inst(Opcode::SetPred, pred, value); +U32 IREmitter::GetLoopSafetyVariable(u32 id) { + return Inst(Opcode::GetLoopSafetyVariable, id); +} + +void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { + Inst(Opcode::SetLoopSafetyVariable, id, counter); } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..7caab1f61 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,6 +55,9 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); + void SetLoopSafetyVariable(u32 id, const U32& counter); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..e87aeddd5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,6 +32,8 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) +OPCODE(GetLoopSafetyVariable, U32, U32, ) +OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c1e0646e6..b2b8c492a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -9,11 +9,13 @@ #include #include #include +#include #include #include +#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -739,8 +741,25 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - current_block->AddBranch(loop_header_block); + const u32 this_loop_id{loop_id++}; + + if (Settings::values.disable_shader_loop_safety_checks) { + if (current_block) { + current_block->AddBranch(loop_header_block); + } + } else { + IR::Block* const init_block{block_pool.Create(inst_pool)}; + IR::IREmitter ir{*init_block}; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + if (current_block) { + current_block->AddBranch(init_block); + } + init_block->AddBranch(loop_header_block); + + auto& init_node{syntax_list.emplace_back()}; + init_node.type = IR::AbstractSyntaxNode::Type::Block; + init_node.data.block = init_block; } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -758,7 +777,16 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; + const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; + ir.SetLoopSafetyVariable(this_loop_id, new_counter); + + const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; + cond = ir.LogicalAnd(cond, safety_cond); + } + cond = ir.ConditionRef(cond); IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -863,8 +891,14 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - // TODO: Make this constexpr when std::vector is constexpr + u32 loop_id{}; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else const Flow::Block dummy_flow_block; +#endif }; } // Anonymous namespace -- cgit v1.2.3 From 61cd7dd30128633b656ce3264da74bef1ba00bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 02:27:49 -0300 Subject: shader: Add logging --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 +- .../maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | 2 +- .../frontend/maxwell/translate/impl/move_special_register.cpp | 8 ++++---- src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6c37af5e7..d2ac2acac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -270,7 +270,7 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { case FlowTest::RGT: return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); case FlowTest::FCSM_TR: - // LOG_WARNING(ShaderDecompiler, "FCSM_TR CC State (Stubbed)"); + LOG_WARNING(Shader, "(STUBBED) FCSM_TR"); return ir.Imm1(false); case FlowTest::CSM_TA: case FlowTest::CSM_TR: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index edd6220a8..9b85f8059 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -46,7 +46,7 @@ void TranslatorVisitor::ISBERD(u64 insn) { if (isberd.shift != Shift::Default) { throw NotImplementedException("Shift {}", isberd.shift.Value()); } - // LOG_WARNING(..., "ISBERD is stubbed"); + LOG_WARNING(Shader, "(STUBBED) called"); X(isberd.dest_reg, X(isberd.src_reg)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index fe3cdfa96..20cb2674e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -118,7 +118,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: - // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); return ir.Imm32(0x00ff'0000); case SpecialRegister::SR_TID: { const IR::Value tid{ir.LocalInvocationId()}; @@ -140,10 +140,10 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_CTAID_Z: return ir.WorkgroupIdZ(); case SpecialRegister::SR_WSCALEFACTOR_XY: - // LOG_WARNING(..., "SR_WSCALEFACTOR_XY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_WSCALEFACTOR_Z: - // LOG_WARNING(..., "SR_WSCALEFACTOR_Z is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); return ir.Imm32(Common::BitCast(1.0f)); case SpecialRegister::SR_LANEID: return ir.LaneId(); @@ -160,7 +160,7 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_Y_DIRECTION: return ir.BitCast(ir.YDirection()); case SpecialRegister::SR_AFFINITY: - // LOG_WARNING(..., "SR_AFFINITY is stubbed"); + LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: throw NotImplementedException("S2R special register {}", special_register); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp index 0793611ff..7ce370f09 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -48,7 +48,7 @@ void TranslatorVisitor::VOTE(u64 insn) { } void TranslatorVisitor::VOTE_vtg(u64) { - // LOG_WARNING(ShaderDecompiler, "VOTE.VTG: Stubbed!"); + LOG_WARNING(Shader, "(STUBBED) called"); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 7ac55c2a750f00b41582a86eba5a44dcd781ae98 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 17:00:07 -0300 Subject: shader: Fix loop safety to SSA pass --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index b2b8c492a..605ec38e1 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -750,7 +750,9 @@ private: } else { IR::Block* const init_block{block_pool.Create(inst_pool)}; IR::IREmitter ir{*init_block}; - ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + static constexpr u32 SAFETY_THRESHOLD = 0x2000; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); if (current_block) { current_block->AddBranch(init_block); -- cgit v1.2.3 From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- src/shader_recompiler/frontend/maxwell/program.cpp | 203 --------------------- src/shader_recompiler/frontend/maxwell/program.h | 27 --- .../frontend/maxwell/translate_program.cpp | 203 +++++++++++++++++++++ .../frontend/maxwell/translate_program.h | 22 +++ 4 files changed, 225 insertions(+), 230 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.h (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp deleted file mode 100644 index 8489f9a5f..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" -#include "shader_recompiler/frontend/maxwell/translate/translate.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Maxwell { -namespace { -IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); - return blocks; -} - -void RemoveUnreachableBlocks(IR::Program& program) { - // Some blocks might be unreachable if a function call exists unconditionally - // If this happens the number of blocks and post order blocks will mismatch - if (program.blocks.size() == program.post_order_blocks.size()) { - return; - } - const auto begin{program.blocks.begin() + 1}; - const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; - program.blocks.erase(std::remove_if(begin, end, pred), end); -} - -void CollectInterpolationInfo(Environment& env, IR::Program& program) { - if (program.stage != Stage::Fragment) { - return; - } - const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - std::optional imap; - for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { - if (value == PixelImap::Unused) { - continue; - } - if (imap && imap != value) { - throw NotImplementedException("Per component interpolation"); - } - imap = value; - } - if (!imap) { - continue; - } - program.info.input_generics[index].interpolation = [&] { - switch (*imap) { - case PixelImap::Unused: - case PixelImap::Perspective: - return Interpolation::Smooth; - case PixelImap::Constant: - return Interpolation::Flat; - case PixelImap::ScreenLinear: - return Interpolation::NoPerspective; - } - throw NotImplementedException("Unknown interpolation {}", *imap); - }(); - } -} - -void AddNVNStorageBuffers(IR::Program& program) { - if (!program.info.uses_global_memory) { - return; - } - const u32 driver_cbuf{0}; - const u32 descriptor_size{0x10}; - const u32 num_buffers{16}; - const u32 base{[&] { - switch (program.stage) { - case Stage::VertexA: - case Stage::VertexB: - return 0x110u; - case Stage::TessellationControl: - return 0x210u; - case Stage::TessellationEval: - return 0x310u; - case Stage::Geometry: - return 0x410u; - case Stage::Fragment: - return 0x510u; - case Stage::Compute: - return 0x310u; - } - throw InvalidArgument("Invalid stage {}", program.stage); - }()}; - auto& descs{program.info.storage_buffers_descriptors}; - for (u32 index = 0; index < num_buffers; ++index) { - if (!program.info.nvn_buffer_used[index]) { - continue; - } - const u32 offset{base + index * descriptor_size}; - const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; - if (it != descs.end()) { - it->is_written |= program.info.stores_global_memory; - continue; - } - descs.push_back({ - .cbuf_index = driver_cbuf, - .cbuf_offset = offset, - .count = 1, - .is_written = program.info.stores_global_memory, - }); - } -} -} // Anonymous namespace - -IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { - IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); - program.blocks = GenerateBlocks(program.syntax_list); - program.post_order_blocks = PostOrder(program.syntax_list.front()); - program.stage = env.ShaderStage(); - program.local_memory_size = env.LocalMemorySize(); - switch (program.stage) { - case Stage::TessellationControl: { - const ProgramHeader& sph{env.SPH()}; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Geometry: { - const ProgramHeader& sph{env.SPH()}; - program.output_topology = sph.common3.output_topology; - program.output_vertices = sph.common4.max_output_vertices; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Compute: - program.workgroup_size = env.WorkgroupSize(); - program.shared_memory_size = env.SharedMemorySize(); - break; - default: - break; - } - RemoveUnreachableBlocks(program); - - // Replace instructions before the SSA rewrite - Optimization::LowerFp16ToFp32(program); - - Optimization::SsaRewritePass(program); - - Optimization::GlobalMemoryToStorageBufferPass(program); - Optimization::TexturePass(env, program); - - Optimization::ConstantPropagationPass(program); - Optimization::DeadCodeEliminationPass(program); - Optimization::VerificationPass(program); - Optimization::CollectShaderInfoPass(env, program); - CollectInterpolationInfo(env, program); - AddNVNStorageBuffers(program); - return program; -} - -IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b) { - IR::Program result{}; - Optimization::VertexATransformPass(vertex_a); - Optimization::VertexBTransformPass(vertex_b); - for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; - } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); - } - result.blocks = GenerateBlocks(result.syntax_list); - result.post_order_blocks = vertex_b.post_order_blocks; - for (const auto& block : vertex_a.post_order_blocks) { - result.post_order_blocks.push_back(block); - } - result.stage = Stage::VertexB; - result.info = vertex_a.info; - result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; - } - Optimization::JoinTextureInfo(result.info, vertex_b.info); - Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DeadCodeEliminationPass(result); - Optimization::VerificationPass(result); - Optimization::CollectShaderInfoPass(env_vertex_b, result); - return result; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h deleted file mode 100644 index f7f5930e4..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::Maxwell { - -[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); - -[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..e52170e3e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + Optimization::LowerFp16ToFp32(program); + + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::VerificationPass(program); + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + for (size_t index = 0; index < 32; ++index) { + result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; + result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + } + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + Optimization::VerificationPass(result); + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..1e5536443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); + +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); + +} // namespace Shader::Maxwell -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 8 +++++--- src/shader_recompiler/frontend/maxwell/translate_program.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 5250509c1..ed8729fca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -192,7 +192,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + if (vertex_b.info.stores_generics[index]) { + result.info.stores_generics[index] = true; + } } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); -- cgit v1.2.3 From 1091995f8e5ba79d659ab39fe4dbbca26ad01488 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 05:02:19 -0300 Subject: shader: Simplify MergeDualVertexPrograms --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index ed8729fca..e728b43cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -174,14 +174,12 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; + if (term.type != IR::AbstractSyntaxNode::Type::Return) { + result.syntax_list.push_back(term); } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); } + result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), + vertex_b.syntax_list.end()); result.blocks = GenerateBlocks(result.syntax_list); result.post_order_blocks = vertex_b.post_order_blocks; for (const auto& block : vertex_a.post_order_blocks) { -- cgit v1.2.3 From 1d182fc0f5f8a6facf6e4aebcf79d6d9a092a48c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 21:30:27 -0300 Subject: shader: Calibrate loop safety threshold --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 605ec38e1..0fb870a69 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -751,7 +751,7 @@ private: IR::Block* const init_block{block_pool.Create(inst_pool)}; IR::IREmitter ir{*init_block}; - static constexpr u32 SAFETY_THRESHOLD = 0x2000; + static constexpr u32 SAFETY_THRESHOLD = 0x1000; ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); if (current_block) { -- cgit v1.2.3 From 808ef97a086e7cc58a3ceded1de516ad6a6be5d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 21 Jun 2021 01:07:10 -0300 Subject: shader: Move loop safety tests to code emission --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 ----- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 -- src/shader_recompiler/frontend/ir/opcodes.inc | 2 -- .../frontend/maxwell/structured_control_flow.cpp | 37 +++------------------- 4 files changed, 4 insertions(+), 46 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d2ac2acac..2e75208e6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -147,14 +147,6 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -U32 IREmitter::GetLoopSafetyVariable(u32 id) { - return Inst(Opcode::GetLoopSafetyVariable, id); -} - -void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { - Inst(Opcode::SetLoopSafetyVariable, id, counter); -} - U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { return Inst(Opcode::GetCbufU32, binding, byte_offset); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7caab1f61..bb3500c54 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,9 +55,6 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); - [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); - void SetLoopSafetyVariable(u32 id, const U32& counter); - [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e87aeddd5..8a8d0d759 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,8 +32,6 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) -OPCODE(GetLoopSafetyVariable, U32, U32, ) -OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 0fb870a69..10d05dc4c 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -15,7 +15,6 @@ #include -#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -663,7 +662,7 @@ public: Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; - IR::IREmitter ir = IR::IREmitter(first_block, first_block.begin()); + IR::IREmitter ir(first_block, first_block.begin()); ir.Prologue(); } @@ -741,27 +740,8 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - const u32 this_loop_id{loop_id++}; - - if (Settings::values.disable_shader_loop_safety_checks) { - if (current_block) { - current_block->AddBranch(loop_header_block); - } - } else { - IR::Block* const init_block{block_pool.Create(inst_pool)}; - IR::IREmitter ir{*init_block}; - - static constexpr u32 SAFETY_THRESHOLD = 0x1000; - ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(SAFETY_THRESHOLD)); - - if (current_block) { - current_block->AddBranch(init_block); - } - init_block->AddBranch(loop_header_block); - - auto& init_node{syntax_list.emplace_back()}; - init_node.type = IR::AbstractSyntaxNode::Type::Block; - init_node.data.block = init_block; + if (current_block) { + current_block->AddBranch(loop_header_block); } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -779,16 +759,7 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - IR::U1 cond{VisitExpr(ir, *stmt.cond)}; - if (!Settings::values.disable_shader_loop_safety_checks) { - const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; - const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; - ir.SetLoopSafetyVariable(this_loop_id, new_counter); - - const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; - cond = ir.LogicalAnd(cond, safety_cond); - } - cond = ir.ConditionRef(cond); + const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); -- cgit v1.2.3 From a7536825dfd3a424ff709995653da4da0ce6dea6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 21 Jun 2021 21:07:52 -0400 Subject: shader_recompiler: Fix IADD3 input partitioning --- .../translate/impl/integer_add_three_input.cpp | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 259a6e6ac..33e2a51ae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -42,14 +42,10 @@ enum class Half : u64 { } } -void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_a; - BitField<31, 2, Half> half_c; - BitField<33, 2, Half> half_b; - BitField<35, 2, Half> half_a; BitField<37, 2, Shift> shift; BitField<47, 1, u64> cc; BitField<48, 1, u64> x; @@ -58,11 +54,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { BitField<51, 1, u64> neg_a; } iadd3{insn}; - IR::U32 op_a{v.X(iadd3.src_a)}; - op_a = IntegerHalf(v.ir, op_a, iadd3.half_a); - op_b = IntegerHalf(v.ir, op_b, iadd3.half_b); - op_c = IntegerHalf(v.ir, op_c, iadd3.half_c); - if (iadd3.neg_a != 0) { op_a = v.ir.INeg(op_a); } @@ -72,7 +63,6 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { if (iadd3.neg_c != 0) { op_c = v.ir.INeg(op_c); } - IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; @@ -97,15 +87,24 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_b, IR::U32 op_c) { } // Anonymous namespace void TranslatorVisitor::IADD3_reg(u64 insn) { - IADD3(*this, insn, GetReg20(insn), GetReg39(insn)); + union { + u64 insn; + BitField<35, 2, Half> half_a; + BitField<31, 2, Half> half_c; + BitField<33, 2, Half> half_b; + } iadd3{insn}; + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; + const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; + const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; + IADD3(*this, insn, op_a, op_b, op_c); } void TranslatorVisitor::IADD3_cbuf(u64 insn) { - IADD3(*this, insn, GetCbuf(insn), GetReg39(insn)); + IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); } void TranslatorVisitor::IADD3_imm(u64 insn) { - IADD3(*this, insn, GetImm20(insn), GetReg39(insn)); + IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); } } // namespace Shader::Maxwell -- cgit v1.2.3 From 4397053d5c848deae00d6599f91b1e5c137a9639 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 22 Jun 2021 18:28:21 -0300 Subject: shader: Remove IAbs64 --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 11 ++--------- src/shader_recompiler/frontend/ir/ir_emitter.h | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 1 - 3 files changed, 3 insertions(+), 11 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 2e75208e6..13159a68d 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1152,15 +1152,8 @@ U32U64 IREmitter::INeg(const U32U64& value) { } } -U32U64 IREmitter::IAbs(const U32U64& value) { - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::IAbs32, value); - case Type::U64: - return Inst(Opcode::IAbs64, value); - default: - ThrowInvalidType(value.Type()); - } +U32 IREmitter::IAbs(const U32& value) { + return Inst(Opcode::IAbs32, value); } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..53f7b3b06 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -208,7 +208,7 @@ public: [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); - [[nodiscard]] U32U64 IAbs(const U32U64& value); + [[nodiscard]] U32 IAbs(const U32& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..9af750283 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -289,7 +289,6 @@ OPCODE(IMul32, U32, U32, OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) -OPCODE(IAbs64, U64, U64, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc..c084f3400 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Wed, 23 Jun 2021 03:31:49 -0300 Subject: shader: Only verify shader when graphics debugging is enabled --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c084f3400..a8b727f1a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -7,6 +7,7 @@ #include #include +#include "common/settings.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" @@ -164,7 +165,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/frontend/ir/attribute.h | 6 ++++++ src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 ++++++++++-------- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30..ca1199494 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast(static_cast(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d..ebcaa8bc2 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); -- cgit v1.2.3 From 8612b5fec5d39b904f9fddbbee3e06437d49429c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:42:07 -0300 Subject: shader: Use std::bit_cast instead of Common::BitCast for passthrough --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 6b4b0ce5b..2bb1d24a4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include #include @@ -142,8 +143,8 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + const auto& mask{env.GpPassthroughMask()}; + program.info.passthrough.mask |= ~std::bit_cast>(mask); } break; } -- cgit v1.2.3 From b21bf79bd2627797d87c17f30c776b4e2476f019 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 28 Jun 2021 22:35:31 -0300 Subject: shader: Only apply shift on register mode for IADD3 --- .../translate/impl/integer_add_three_input.cpp | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index 33e2a51ae..b50017536 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -22,31 +22,33 @@ enum class Half : u64 { [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { constexpr bool is_signed{false}; switch (half) { + case Half::All: + return value; case Half::Lower: return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); case Half::Upper: return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); - default: - return value; } + throw NotImplementedException("Invalid half"); } [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { switch (shift) { + case Shift::None: + return value; case Shift::Right: return ir.ShiftRightLogical(value, ir.Imm32(16)); case Shift::Left: return ir.ShiftLeftLogical(value, ir.Imm32(16)); - default: - return value; } + throw NotImplementedException("Invalid shift"); } -void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c) { +void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, + Shift shift = Shift::None) { union { u64 insn; BitField<0, 8, IR::Reg> dest_reg; - BitField<37, 2, Shift> shift; BitField<47, 1, u64> cc; BitField<48, 1, u64> x; BitField<49, 1, u64> neg_c; @@ -68,7 +70,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; lhs_1 = v.ir.IAdd(lhs_1, carry); } - const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, iadd3.shift)}; + const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; v.X(iadd3.dest_reg, result); @@ -89,14 +91,16 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o void TranslatorVisitor::IADD3_reg(u64 insn) { union { u64 insn; + BitField<37, 2, Shift> shift; BitField<35, 2, Half> half_a; - BitField<31, 2, Half> half_c; BitField<33, 2, Half> half_b; - } iadd3{insn}; + BitField<31, 2, Half> half_c; + } const iadd3{insn}; + const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; - IADD3(*this, insn, op_a, op_b, op_c); + IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); } void TranslatorVisitor::IADD3_cbuf(u64 insn) { -- cgit v1.2.3 From b9069c7891f2516ea037e9355daea284a1d540f1 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 28 Jun 2021 22:38:35 -0400 Subject: shader: Account for 33-bit IADD3 scenario --- .../maxwell/translate/impl/integer_add_three_input.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp index b50017536..040cfc10f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp @@ -36,8 +36,12 @@ enum class Half : u64 { switch (shift) { case Shift::None: return value; - case Shift::Right: - return ir.ShiftRightLogical(value, ir.Imm32(16)); + case Shift::Right: { + // 33-bit RS IADD3 edge case + const IR::U1 edge_case{ir.GetCarryFromOp(value)}; + const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; + return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; + } case Shift::Left: return ir.ShiftLeftLogical(value, ir.Imm32(16)); } @@ -67,6 +71,10 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o } IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; if (iadd3.x != 0) { + // TODO: How does RS behave when X is set? + if (shift == Shift::Right) { + throw NotImplementedException("IADD3 X+RS"); + } const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; lhs_1 = v.ir.IAdd(lhs_1, carry); } -- cgit v1.2.3 From 2e5af95541adf581364ee3864be57f9b2b9a230f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 28 Jun 2021 23:44:03 -0400 Subject: shader: GCC fmt 8.0.0 fixes --- src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 10d05dc4c..06fde0017 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -174,7 +174,7 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { switch (stmt->type) { case StatementType::Code: ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, - stmt->block->begin, stmt->block->end, + stmt->block->begin.Offset(), stmt->block->end.Offset(), reinterpret_cast(stmt->block)); break; case StatementType::Goto: -- cgit v1.2.3 From 11f04f1022d0820a1fdba38221ecd38f19d86d9e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 00:34:53 -0400 Subject: shader: Ignore global memory ops on devices lacking int64 support --- src/shader_recompiler/frontend/ir/opcodes.inc | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9af750283..d91098c80 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -71,20 +71,20 @@ OPCODE(UndefU32, U32, OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, Opaque, ) +OPCODE(LoadGlobalS8, U32, Opaque, ) +OPCODE(LoadGlobalU16, U32, Opaque, ) +OPCODE(LoadGlobalS16, U32, Opaque, ) +OPCODE(LoadGlobal32, U32, Opaque, ) +OPCODE(LoadGlobal64, U32x2, Opaque, ) +OPCODE(LoadGlobal128, U32x4, Opaque, ) +OPCODE(WriteGlobalU8, Void, Opaque, U32, ) +OPCODE(WriteGlobalS8, Void, Opaque, U32, ) +OPCODE(WriteGlobalU16, Void, Opaque, U32, ) +OPCODE(WriteGlobalS16, Void, Opaque, U32, ) +OPCODE(WriteGlobal32, Void, Opaque, U32, ) +OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) +OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) // Storage buffer operations OPCODE(LoadStorageU8, U32, U32, U32, ) -- cgit v1.2.3 From 2235a51b5d987cf8297211bb1778d75e6b794324 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 01:10:38 -0300 Subject: shader: Manually convert from array to bitset instead of using bit_cast --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 2bb1d24a4..83c77967d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include -#include #include #include #include @@ -144,7 +143,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { + program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; + } } break; } -- cgit v1.2.3 From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- src/shader_recompiler/frontend/ir/opcodes.h | 3 ++- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 - .../frontend/maxwell/structured_control_flow.cpp | 9 ++------- .../translate/impl/atomic_operations_global_memory.cpp | 12 ++++++------ .../translate/impl/integer_floating_point_conversion.cpp | 4 +++- .../frontend/maxwell/translate/impl/load_store_attribute.cpp | 12 ++++++------ .../maxwell/translate/impl/surface_atomic_operations.cpp | 3 --- .../frontend/maxwell/translate/impl/surface_load_store.cpp | 8 ++++---- 8 files changed, 23 insertions(+), 29 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 56b001902..9ab108292 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -67,7 +67,8 @@ constexpr OpcodeMeta META_TABLE[]{ }; constexpr size_t CalculateNumArgsOf(Opcode op) { const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); + return static_cast( + std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); } constexpr u8 NUM_ARGS[]{ diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 0e515c3b6..a6bd3e196 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -161,7 +161,6 @@ private: Environment& env; ObjectPool& block_pool; boost::container::small_vector functions; - FunctionId current_function_id{0}; Location program_start; bool exits_to_dispatcher{}; Block* dispatch_block{}; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 06fde0017..221454b99 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -313,9 +313,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { class GotoPass { public: - explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, - ObjectPool& block_pool_, ObjectPool& stmt_pool) - : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); @@ -616,8 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - ObjectPool& inst_pool; - ObjectPool& block_pool; ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -864,7 +860,6 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - u32 loop_id{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -878,7 +873,7 @@ private: IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; - GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 66f39e44e..d9f999e05 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -59,14 +59,14 @@ IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, AtomSize size) { static constexpr IR::FpControl f16_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::DontCare}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::DontCare, }; static constexpr IR::FpControl f32_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::FTZ}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::FTZ, }; switch (op) { case AtomOp::ADD: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e0e157275..0b8119ddd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -104,7 +104,9 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { .rounding = CastFpRounding(i2f.fp_rounding), .fmz_mode = IR::FmzMode::DontCare, }; - auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; + auto value{v.ir.ConvertIToF(static_cast(dst_bitsize), + static_cast(conversion_src_bitsize), is_signed, src, + fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 7d7dcc3cb..924fb7a40 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -80,10 +80,10 @@ void TranslatorVisitor::ALD(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ald.patch != 0) { const IR::Patch patch{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetPatch(patch)); + F(ald.dest_reg + static_cast(element), ir.GetPatch(patch)); } else { const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttribute(attr, vertex)); } } return; @@ -92,7 +92,7 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Indirect patch read"); } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -121,10 +121,10 @@ void TranslatorVisitor::AST(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ast.patch != 0) { const IR::Patch patch{offset / 4 + element}; - ir.SetPatch(patch, F(ast.src_reg + element)); + ir.SetPatch(patch, F(ast.src_reg + static_cast(element))); } else { const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + ir.SetAttribute(attr, F(ast.src_reg + static_cast(element)), vertex); } } return; @@ -133,7 +133,7 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Indexed tessellation patch store"); } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast(element)), vertex); }); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 44144f154..63b588ad4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -69,9 +69,6 @@ TextureType GetType(Type type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { - const auto array{[&](int index) { - return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); - }}; switch (type) { case Type::_1D: case Type::BUFFER_1D: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 7dc793ad7..681220a8d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -160,10 +160,10 @@ unsigned SwizzleMask(u64 swizzle) { IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { std::array colors; for (int i = 0; i < num_regs; ++i) { - colors[i] = ir.GetReg(reg + i); + colors[static_cast(i)] = ir.GetReg(reg + i); } for (int i = num_regs; i < 4; ++i) { - colors[i] = ir.Imm32(0); + colors[static_cast(i)] = ir.Imm32(0); } return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); } @@ -211,12 +211,12 @@ void TranslatorVisitor::SULD(u64 insn) { if (is_typed) { const int num_regs{SizeInRegs(suld.size)}; for (int i = 0; i < num_regs; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } else { const unsigned mask{SwizzleMask(suld.swizzle)}; const int bits{std::popcount(mask)}; - if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast(bits))) { throw NotImplementedException("Unaligned destination register"); } for (unsigned component = 0; component < 4; ++component) { -- cgit v1.2.3 From bf2956d77ab0ad06c4b5505cc9906e51e5878274 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Jul 2021 05:22:01 -0300 Subject: shader: Avoid usage of C++20 ranges to build in clang --- .../frontend/maxwell/control_flow.cpp | 13 ++++++------- .../frontend/maxwell/structured_control_flow.cpp | 8 ++++---- .../frontend/maxwell/translate_program.cpp | 20 +++++++++++++------- 3 files changed, 23 insertions(+), 18 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index e7abea82f..1a954a509 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -151,18 +150,18 @@ std::pair Stack::Pop(Token token) const { } std::optional Stack::Peek(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - if (it == reverse_entries.end()) { + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + if (it == entries.rend()) { return std::nullopt; } return it->target; } Stack Stack::Remove(Token token) const { - const auto reverse_entries{entries | std::views::reverse}; - const auto it{std::ranges::find(reverse_entries, token, &StackEntry::token)}; - const auto pos{std::distance(reverse_entries.begin(), it)}; + const auto it{std::find_if(entries.rbegin(), entries.rend(), + [token](const auto& entry) { return entry.token == token; })}; + const auto pos{std::distance(entries.rbegin(), it)}; Stack result; result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); return result; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 221454b99..8b3e0a15c 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include #include @@ -167,7 +166,7 @@ std::string DumpExpr(const Statement* stmt) { } } -std::string DumpTree(const Tree& tree, u32 indentation = 0) { +[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { std::string ret; std::string indent(indentation, ' '); for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { @@ -315,8 +314,9 @@ class GotoPass { public: explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; - for (const Node& goto_stmt : gotos | std::views::reverse) { - RemoveGoto(goto_stmt); + const auto end{gotos.rend()}; + for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { + RemoveGoto(*goto_stmt); } } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 83c77967d..c067d459c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include "common/settings.h" @@ -20,12 +19,19 @@ namespace Shader::Maxwell { namespace { IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + size_t num_syntax_blocks{}; + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + ++num_syntax_blocks; + } + } + IR::BlockList blocks; + blocks.reserve(num_syntax_blocks); + for (const auto& node : syntax_list) { + if (node.type == IR::AbstractSyntaxNode::Type::Block) { + blocks.push_back(node.data.block); + } + } return blocks; } -- cgit v1.2.3 From fc7bed21b539aac4fdde74a41217066eaf8ed3f9 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 19:56:14 -0400 Subject: shader: Implement ISETP.X --- .../maxwell/translate/impl/common_funcs.cpp | 43 ++++++++++++++++++++++ .../frontend/maxwell/translate/impl/common_funcs.h | 4 ++ .../translate/impl/integer_compare_and_set.cpp | 43 ---------------------- .../translate/impl/integer_set_predicate.cpp | 11 +++++- 4 files changed, 57 insertions(+), 44 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 10bb01d99..20458d2ad 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -29,6 +29,49 @@ IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32 } } +IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed) { + const IR::U32 zero{ir.Imm32(0)}; + const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; + const IR::U1 z_flag{ir.GetZFlag()}; + const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; + const IR::U1 flip_logic{is_signed ? ir.Imm1(false) + : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), + ir.ILessThan(operand_2, zero, true))}; + switch (compare_op) { + case CompareOp::False: + return ir.Imm1(false); + case CompareOp::LessThan: + return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + case CompareOp::Equal: + return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); + case CompareOp::LessThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), + ir.ILessThan(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::GreaterThan: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), + ir.IGreaterThan(intermediate, zero, true))}; + const IR::U1 not_z{ir.LogicalNot(z_flag)}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); + } + case CompareOp::NotEqual: + return ir.LogicalOr(ir.INotEqual(intermediate, zero), + ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); + case CompareOp::GreaterThanEqual: { + const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), + ir.IGreaterThanEqual(intermediate, zero, true))}; + return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); + } + case CompareOp::True: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop) { switch (bop) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f584060b3..214d0af3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -11,6 +11,10 @@ namespace Shader::Maxwell { [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed); +[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, + const IR::U32& operand_2, CompareOp compare_op, + bool is_signed); + [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, BooleanOp bop); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp index 34fa7345c..8ce1aee04 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp @@ -9,49 +9,6 @@ namespace Shader::Maxwell { namespace { -IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, - CompareOp compare_op, bool is_signed) { - const IR::U32 zero{ir.Imm32(0)}; - const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; - const IR::U1 z_flag{ir.GetZFlag()}; - const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; - const IR::U1 flip_logic{is_signed ? ir.Imm1(false) - : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), - ir.ILessThan(operand_2, zero, true))}; - switch (compare_op) { - case CompareOp::False: - return ir.Imm1(false); - case CompareOp::LessThan: - return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - case CompareOp::Equal: - return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); - case CompareOp::LessThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), - ir.ILessThan(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::GreaterThan: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), - ir.IGreaterThan(intermediate, zero, true))}; - const IR::U1 not_z{ir.LogicalNot(z_flag)}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); - } - case CompareOp::NotEqual: - return ir.LogicalOr(ir.INotEqual(intermediate, zero), - ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); - case CompareOp::GreaterThanEqual: { - const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), - ir.IGreaterThanEqual(intermediate, zero, true))}; - return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); - } - case CompareOp::True: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, CompareOp compare_op, bool is_signed, bool x) { return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp index 7743701d0..bee10e5b9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp @@ -9,6 +9,12 @@ namespace Shader::Maxwell { namespace { +IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, + CompareOp compare_op, bool is_signed, bool x) { + return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) + : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); +} + void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { union { u64 raw; @@ -17,15 +23,18 @@ void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { BitField<8, 8, IR::Reg> src_reg_a; BitField<39, 3, IR::Pred> bop_pred; BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> x; BitField<45, 2, BooleanOp> bop; BitField<48, 1, u64> is_signed; BitField<49, 3, CompareOp> compare_op; } const isetp{insn}; + const bool is_signed{isetp.is_signed != 0}; + const bool x{isetp.x != 0}; const BooleanOp bop{isetp.bop}; const CompareOp compare_op{isetp.compare_op}; const IR::U32 op_a{v.X(isetp.src_reg_a)}; - const IR::U1 comparison{IntegerCompare(v.ir, op_a, op_b, compare_op, isetp.is_signed != 0)}; + const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; -- cgit v1.2.3 From 7f13104c1778cfdfd54350e92603164070781124 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Jul 2021 18:28:36 -0300 Subject: shader: Support out of bound local memory reads and immediate writes Support ignoring immediate out of bound writes. Writing dynamically out of bounds is not yet supported (e.g. R0+0x4). Reading out of bounds yields zero. This is supported checking for the size from the IR; if the input is immediate, the optimization passes will drop it. --- .../translate/impl/load_store_local_shared.cpp | 25 ++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'src/shader_recompiler/frontend') diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 20df163f2..d2a1dbf61 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -85,21 +85,28 @@ IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); } + +IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { + const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; + const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; + return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; +} } // Anonymous namespace void TranslatorVisitor::LDL(u64 insn) { const auto [word_offset, offset]{WordOffset(*this, insn)}; + const IR::U32 word{LoadLocal(*this, word_offset, offset)}; const IR::Reg dest{Reg(insn)}; const auto [bit_size, is_signed]{GetSize(insn)}; switch (bit_size) { case 8: { const IR::U32 bit{ByteOffset(ir, offset)}; - X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); break; } case 16: { const IR::U32 bit{ShortOffset(ir, offset)}; - X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); break; } case 32: @@ -108,9 +115,11 @@ void TranslatorVisitor::LDL(u64 insn) { if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } - X(dest, ir.LoadLocal(word_offset)); + X(dest, word); for (int i = 1; i < bit_size / 32; ++i) { - X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; + const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; + X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); } break; } @@ -141,6 +150,14 @@ void TranslatorVisitor::LDS(u64 insn) { void TranslatorVisitor::STL(u64 insn) { const auto [word_offset, offset]{WordOffset(*this, insn)}; + if (offset.IsImmediate()) { + // TODO: Support storing out of bounds at runtime + if (offset.U32() >= env.LocalMemorySize()) { + LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", + offset.U32(), env.LocalMemorySize()); + return; + } + } const IR::Reg reg{Reg(insn)}; const IR::U32 src{X(reg)}; const int bit_size{GetSize(insn).first}; -- cgit v1.2.3