diff options
| author | bunnei <bunneidev@gmail.com> | 2015-05-09 22:45:05 -0400 | 
|---|---|---|
| committer | bunnei <bunneidev@gmail.com> | 2015-05-09 22:45:05 -0400 | 
| commit | ba0bfe7d82a241f1dbe449a1bdcc2a76c594c667 (patch) | |
| tree | e4274244ccd93c0b4e15e84f551c99382e8169d9 /src/video_core | |
| parent | 088f6ae2c65824152aae5a76559ce35d75f0e000 (diff) | |
| parent | 23e8be573ef047d8a0bee191f4065dbcd60a7f65 (diff) | |
Merge pull request #726 from bunnei/gpu-improvements
GPU improvements
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/pica.h | 67 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 20 | 
3 files changed, 117 insertions, 19 deletions
| diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e4a91058c..5e169ff69 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -226,7 +226,8 @@ struct Regs {              Texture1               = 0x4,              Texture2               = 0x5,              Texture3               = 0x6, -            // 0x7-0xc = primary color?? + +            PreviousBuffer         = 0xd,              Constant               = 0xe,              Previous               = 0xf,          }; @@ -299,7 +300,18 @@ struct Regs {              BitField<24, 8, u32> const_a;          }; -        INSERT_PADDING_WORDS(0x1); +        union { +            BitField< 0, 2, u32> color_scale; +            BitField<16, 2, u32> alpha_scale; +        }; + +        inline unsigned GetColorMultiplier() const { +            return (color_scale < 3) ? (1 << color_scale) : 1; +        } + +        inline unsigned GetAlphaMultiplier() const { +            return (alpha_scale < 3) ? (1 << alpha_scale) : 1; +        }      };      TevStageConfig tev_stage0; @@ -309,11 +321,36 @@ struct Regs {      TevStageConfig tev_stage2;      INSERT_PADDING_WORDS(0x3);      TevStageConfig tev_stage3; -    INSERT_PADDING_WORDS(0x13); +    INSERT_PADDING_WORDS(0x3); + +    union { +        // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in +        // these masks are set +        BitField< 8, 4, u32> update_mask_rgb; +        BitField<12, 4, u32> update_mask_a; + +        bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { +            return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); +        } + +        bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { +            return (stage_index < 4) && (update_mask_a & (1 << stage_index)); +        } +    } tev_combiner_buffer_input; +     +    INSERT_PADDING_WORDS(0xf);      TevStageConfig tev_stage4;      INSERT_PADDING_WORDS(0x3);      TevStageConfig tev_stage5; -    INSERT_PADDING_WORDS(0x3); + +    union { +        BitField< 0, 8, u32> r; +        BitField< 8, 8, u32> g; +        BitField<16, 8, u32> b; +        BitField<24, 8, u32> a; +    } tev_combiner_buffer_color; + +    INSERT_PADDING_WORDS(0x2);      const std::array<Regs::TevStageConfig,6> GetTevStages() const {          return { tev_stage0, tev_stage1, @@ -426,9 +463,7 @@ struct Regs {          D24S8  = 3      }; -    /* -     * Returns the number of bytes in the specified depth format -     */ +    // Returns the number of bytes in the specified depth format      static u32 BytesPerDepthPixel(DepthFormat format) {          switch (format) {          case DepthFormat::D16: @@ -443,6 +478,20 @@ struct Regs {          }      } +    // Returns the number of bits per depth component of the specified depth format +    static u32 DepthBitsPerPixel(DepthFormat format) { +        switch (format) { +        case DepthFormat::D16: +            return 16; +        case DepthFormat::D24: +        case DepthFormat::D24S8: +            return 24; +        default: +            LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); +            UNIMPLEMENTED(); +        } +    } +      struct {          // Components are laid out in reverse byte order, most significant bits first.          enum ColorFormat : u32 { @@ -784,8 +833,10 @@ struct Regs {          ADD_FIELD(tev_stage1);          ADD_FIELD(tev_stage2);          ADD_FIELD(tev_stage3); +        ADD_FIELD(tev_combiner_buffer_input);          ADD_FIELD(tev_stage4);          ADD_FIELD(tev_stage5); +        ADD_FIELD(tev_combiner_buffer_color);          ADD_FIELD(output_merger);          ADD_FIELD(framebuffer);          ADD_FIELD(vertex_attributes); @@ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0);  ASSERT_REG_POSITION(tev_stage1, 0xc8);  ASSERT_REG_POSITION(tev_stage2, 0xd0);  ASSERT_REG_POSITION(tev_stage3, 0xd8); +ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);  ASSERT_REG_POSITION(tev_stage4, 0xf0);  ASSERT_REG_POSITION(tev_stage5, 0xf8); +ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);  ASSERT_REG_POSITION(output_merger, 0x100);  ASSERT_REG_POSITION(framebuffer, 0x110);  ASSERT_REG_POSITION(vertex_attributes, 0x200); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 3b3fef484..46a326bb4 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {          UNIMPLEMENTED();      } -    return {}; +    return {0, 0, 0, 0};  }  static u32 GetDepth(int x, int y) { @@ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,              // with some basic arithmetic. Alpha combiners can be configured separately but work              // analogously.              Math::Vec4<u8> combiner_output; -            for (const auto& tev_stage : tev_stages) { +            Math::Vec4<u8> combiner_buffer = { +                registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, +                registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a +            }; + +            for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { +                const auto& tev_stage = tev_stages[tev_stage_index];                  using Source = Regs::TevStageConfig::Source;                  using ColorModifier = Regs::TevStageConfig::ColorModifier;                  using AlphaModifier = Regs::TevStageConfig::AlphaModifier; @@ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,                      case Source::Texture2:                          return texture_color[2]; +                    case Source::PreviousBuffer: +                        return combiner_buffer; +                      case Source::Constant:                          return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; @@ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,                      default:                          LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);                          UNIMPLEMENTED(); -                        return {}; +                        return {0, 0, 0, 0};                      }                  }; @@ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,                          return result.Cast<u8>();                      } +                    case Operation::AddSigned: +                    { +                        // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct +                        auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); +                        result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); +                        result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); +                        result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); +                        return result.Cast<u8>(); +                    } +                      case Operation::Lerp:                          return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); @@ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,                      default:                          LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);                          UNIMPLEMENTED(); -                        return {}; +                        return {0, 0, 0};                      }                  }; @@ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,                  };                  auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); -                combiner_output = Math::MakeVec(color_output, alpha_output); +                combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); +                combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); +                combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); +                combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); + +                if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { +                    combiner_buffer.r() = combiner_output.r(); +                    combiner_buffer.g() = combiner_output.g(); +                    combiner_buffer.b() = combiner_output.b(); +                } + +                if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { +                    combiner_buffer.a() = combiner_output.a(); +                }              }              if (registers.output_merger.alpha_test.enable) { @@ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,              // TODO: Does depth indeed only get written even if depth testing is enabled?              if (registers.output_merger.depth_test_enable) { -                u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + -                            v1.screenpos[2].ToFloat32() * w1 + -                            v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); +                unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); +                u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + +                               v1.screenpos[2].ToFloat32() * w1 + +                               v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);                  u32 ref_z = GetDepth(x >> 4, y >> 4);                  bool pass = false; diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 51f4e58bf..885b7de59 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) {                  break;              } +            case OpCode::Id::FLR: +                for (int i = 0; i < 4; ++i) { +                    if (!swizzle.DestComponentEnabled(i)) +                        continue; + +                    dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); +                } +                break; +              case OpCode::Id::MAX:                  for (int i = 0; i < 4; ++i) {                      if (!swizzle.DestComponentEnabled(i)) @@ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) {          case OpCode::Type::MultiplyAdd:          { -            if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { +            if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||  +                (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {                  const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; -                const float24* src1_ = LookupSourceRegister(instr.mad.src1); -                const float24* src2_ = LookupSourceRegister(instr.mad.src2); -                const float24* src3_ = LookupSourceRegister(instr.mad.src3); +                bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); + +                const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); +                const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); +                const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));                  const bool negate_src1 = ((bool)swizzle.negate_src1 != false);                  const bool negate_src2 = ((bool)swizzle.negate_src2 != false); | 
