diff options
| author | Yuri Kunde Schlesner <yuriks@yuriks.net> | 2017-06-29 09:29:40 -0700 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-06-29 09:29:40 -0700 | 
| commit | 686fde7e526e024716baa3aa3ba887d1a2479d41 (patch) | |
| tree | 52fd95ec3fc77f42122783663a36ed6734f773db | |
| parent | a613d45de992efd21afc73049d3811d5e337f56b (diff) | |
| parent | 85a448d40560a40d5fe2424b4c50b7bebe2a6064 (diff) | |
Merge pull request #2809 from wwylele/texture-copy-fix
gpu: fix edge cases for TextureCopy
| -rw-r--r-- | src/core/hw/gpu.cpp | 41 | ||||
| -rw-r--r-- | src/core/hw/gpu.h | 2 | 
2 files changed, 24 insertions, 19 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 42809c731..6838e449c 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -5,6 +5,7 @@  #include <cstring>  #include <numeric>  #include <type_traits> +#include "common/alignment.h"  #include "common/color.h"  #include "common/common_types.h"  #include "common/logging/log.h" @@ -313,7 +314,7 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {      const PAddr src_addr = config.GetPhysicalInputAddress();      const PAddr dst_addr = config.GetPhysicalOutputAddress(); -    // TODO: do hwtest with these cases +    // TODO: do hwtest with invalid addresses      if (!Memory::IsValidPhysicalAddress(src_addr)) {          LOG_CRITICAL(HW_GPU, "invalid input address 0x%08X", src_addr);          return; @@ -324,31 +325,36 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {          return;      } -    if (config.texture_copy.input_width == 0) { -        LOG_CRITICAL(HW_GPU, "zero input width"); +    if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))          return; -    } -    if (config.texture_copy.output_width == 0) { -        LOG_CRITICAL(HW_GPU, "zero output width"); +    u8* src_pointer = Memory::GetPhysicalPointer(src_addr); +    u8* dst_pointer = Memory::GetPhysicalPointer(dst_addr); + +    u32 remaining_size = Common::AlignDown(config.texture_copy.size, 16); + +    if (remaining_size == 0) { +        LOG_CRITICAL(HW_GPU, "zero size. Real hardware freezes on this.");          return;      } -    if (config.texture_copy.size == 0) { -        LOG_CRITICAL(HW_GPU, "zero size"); +    u32 input_gap = config.texture_copy.input_gap * 16; +    u32 output_gap = config.texture_copy.output_gap * 16; + +    // Zero gap means contiguous input/output even if width = 0. To avoid infinite loop below, width +    // is assigned with the total size if gap = 0. +    u32 input_width = input_gap == 0 ? remaining_size : config.texture_copy.input_width * 16; +    u32 output_width = output_gap == 0 ? remaining_size : config.texture_copy.output_width * 16; + +    if (input_width == 0) { +        LOG_CRITICAL(HW_GPU, "zero input width. Real hardware freezes on this.");          return;      } -    if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config)) +    if (output_width == 0) { +        LOG_CRITICAL(HW_GPU, "zero output width. Real hardware freezes on this.");          return; - -    u8* src_pointer = Memory::GetPhysicalPointer(src_addr); -    u8* dst_pointer = Memory::GetPhysicalPointer(dst_addr); - -    u32 input_width = config.texture_copy.input_width * 16; -    u32 input_gap = config.texture_copy.input_gap * 16; -    u32 output_width = config.texture_copy.output_width * 16; -    u32 output_gap = config.texture_copy.output_gap * 16; +    }      size_t contiguous_input_size =          config.texture_copy.size / input_width * (input_width + input_gap); @@ -360,7 +366,6 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {      Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),                                                 static_cast<u32>(contiguous_output_size)); -    u32 remaining_size = config.texture_copy.size;      u32 remaining_input = input_width;      u32 remaining_output = output_width;      while (remaining_size > 0) { diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index bdd997b2a..21b127fee 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -225,7 +225,7 @@ struct Regs {          INSERT_PADDING_WORDS(0x1);          struct { -            u32 size; +            u32 size; // The lower 4 bits are ignored              union {                  u32 input_size;  | 
