diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 134 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 13 | 
2 files changed, 119 insertions, 28 deletions
| diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 801d45144..1bb842fe7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -336,20 +336,22 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d      constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;      constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); +    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual +    // pixel values. +    const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; +      if (morton_to_gl) { -        // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual -        // pixel values. -        const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};          const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(              addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);          const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};          memcpy(gl_buffer, data.data(), size_to_copy);      } else { -        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should -        // check the configuration for this and perform more generic un/swizzle -        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); -        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, -                                       Memory::GetPointer(addr), gl_buffer, morton_to_gl); +        std::vector<u8> data(height * stride * bytes_per_pixel); +        Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth, +                                         bytes_per_pixel, bytes_per_pixel, data.data(), gl_buffer, +                                         false, block_height, block_depth); +        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; +        memcpy(Memory::GetPointer(addr), data.data(), size_to_copy);      }  } @@ -430,17 +432,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,          MortonCopy<false, PixelFormat::RGBA16UI>,          MortonCopy<false, PixelFormat::R11FG11FB10F>,          MortonCopy<false, PixelFormat::RGBA32UI>, -        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4 -        // formats are not supported -        nullptr, -        nullptr, -        nullptr, -        nullptr, -        nullptr, -        nullptr, -        nullptr, -        nullptr, -        nullptr, +        MortonCopy<false, PixelFormat::DXT1>, +        MortonCopy<false, PixelFormat::DXT23>, +        MortonCopy<false, PixelFormat::DXT45>, +        MortonCopy<false, PixelFormat::DXN1>, +        MortonCopy<false, PixelFormat::DXN2UNORM>, +        MortonCopy<false, PixelFormat::DXN2SNORM>, +        MortonCopy<false, PixelFormat::BC7U>, +        MortonCopy<false, PixelFormat::BC6H_UF16>, +        MortonCopy<false, PixelFormat::BC6H_SF16>, +        // TODO(Subv): Swizzling ASTC formats are not supported          nullptr,          MortonCopy<false, PixelFormat::G8R8U>,          MortonCopy<false, PixelFormat::G8R8S>, @@ -754,7 +755,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)                               SurfaceParams::SurfaceTargetName(params.target));  } -static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { +static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {      union S8Z24 {          BitField<0, 24, u32> z24;          BitField<24, 8, u32> s8; @@ -767,16 +768,23 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {      };      static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); -    S8Z24 input_pixel{}; -    Z24S8 output_pixel{}; +    S8Z24 s8z24_pixel{}; +    Z24S8 z24s8_pixel{};      constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};      for (std::size_t y = 0; y < height; ++y) {          for (std::size_t x = 0; x < width; ++x) {              const std::size_t offset{bpp * (y * width + x)}; -            std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); -            output_pixel.s8.Assign(input_pixel.s8); -            output_pixel.z24.Assign(input_pixel.z24); -            std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8)); +            if (reverse) { +                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); +                s8z24_pixel.s8.Assign(z24s8_pixel.s8); +                s8z24_pixel.z24.Assign(z24s8_pixel.z24); +                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); +            } else { +                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); +                z24s8_pixel.s8.Assign(s8z24_pixel.s8); +                z24s8_pixel.z24.Assign(s8z24_pixel.z24); +                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); +            }          }      }  } @@ -814,7 +822,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma      }      case PixelFormat::S8Z24:          // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. -        ConvertS8Z24ToZ24S8(data, width, height); +        ConvertS8Z24ToZ24S8(data, width, height, false);          break;      case PixelFormat::G8R8U: @@ -825,6 +833,30 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma      }  } +/** + * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to + * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or + * with typical desktop GPUs. + */ +static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, +                                                u32 width, u32 height) { +    switch (pixel_format) { +    case PixelFormat::G8R8U: +    case PixelFormat::G8R8S: +    case PixelFormat::ASTC_2D_4X4: +    case PixelFormat::ASTC_2D_8X8: { +        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", +                     static_cast<u32>(pixel_format)); +        UNREACHABLE(); +        break; +    } +    case PixelFormat::S8Z24: +        // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. +        ConvertS8Z24ToZ24S8(data, width, height, true); +        break; +    } +} +  MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));  void CachedSurface::LoadGLBuffer() {      ASSERT(params.type != SurfaceType::Fill); @@ -864,11 +896,57 @@ void CachedSurface::LoadGLBuffer() {      }      ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); + +    dirty = false;  }  MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));  void CachedSurface::FlushGLBuffer() { -    ASSERT_MSG(false, "Unimplemented"); +    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); +    const auto& rect{params.GetRect()}; +    // Load data from memory to the surface +    const GLint x0 = static_cast<GLint>(rect.left); +    const GLint y0 = static_cast<GLint>(rect.bottom); +    const size_t buffer_offset = +        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * +        GetGLBytesPerPixel(params.pixel_format); +    const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); +    const u32 copy_size = params.width * params.height * bytes_per_pixel; +    gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); +    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); +    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT +    ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); +    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); +    ASSERT(!tuple.compressed); +    ASSERT(x0 == 0 && y0 == 0); +    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); +    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(), +                      gl_buffer.data()); +    glPixelStorei(GL_PACK_ROW_LENGTH, 0); +    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width, +                                        params.height); +    ASSERT(params.type != SurfaceType::Fill); +    const u8* const texture_src_data = Memory::GetPointer(params.addr); +    ASSERT(texture_src_data); +    if (params.is_tiled) { +        u32 depth = params.depth; +        u32 block_depth = params.block_depth; + +        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", +                   params.block_width, static_cast<u32>(params.target)); + +        if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { +            // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. +            depth = 1U; +            block_depth = 1U; +        } +        gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( +            params.width, params.block_height, params.height, block_depth, depth, +            &gl_buffer[buffer_offset], copy_size, params.addr + buffer_offset); +    } else { +        Memory::WriteBlock(params.addr + buffer_offset, &gl_buffer[buffer_offset], +                           gl_buffer.size() - buffer_offset); +    }  }  MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0b8ae3eb4..a15fb7b07 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -802,6 +802,18 @@ public:          return params.size_in_bytes_total;      } +    void Flush() { +        // There is no need to flush the surface if it hasn't been modified by us. +        if (!dirty) +            return; +        FlushGLBuffer(); +        dirty = false; +    } + +    void MarkAsDirty() { +        dirty = true; +    } +      const OGLTexture& Texture() const {          return texture;      } @@ -833,6 +845,7 @@ private:      std::vector<u8> gl_buffer;      SurfaceParams params;      GLenum gl_target; +    bool dirty = false;  };  class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | 
