diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 429 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 43 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/textures/texture.h | 2 | 
5 files changed, 355 insertions, 129 deletions
| diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1fcd13f04..14d82a7bc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -738,7 +738,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,          }          texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); -        Surface surface = res_cache.GetTextureSurface(texture); +        Surface surface = res_cache.GetTextureSurface(texture, entry);          if (surface != nullptr) {              state.texture_units[current_bindpoint].texture = surface->Texture().handle;              state.texture_units[current_bindpoint].target = surface->Target(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 24a540258..ce967c4d6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -41,7 +41,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {  }  /*static*/ SurfaceParams SurfaceParams::CreateForTexture( -    const Tegra::Texture::FullTextureInfo& config) { +    const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {      SurfaceParams params{};      params.addr = TryGetCpuAddr(config.tic.Address());      params.is_tiled = config.tic.IsTiled(); @@ -60,9 +60,23 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {      case SurfaceTarget::Texture2D:          params.depth = 1;          break; +    case SurfaceTarget::TextureCubemap: +        params.depth = config.tic.Depth() * 6; +        break;      case SurfaceTarget::Texture3D: +        params.depth = config.tic.Depth(); +        break;      case SurfaceTarget::Texture2DArray:          params.depth = config.tic.Depth(); +        if (!entry.IsArray()) { +            // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of +            // one, but sample the texture in the shader as if it were not an array texture. This +            // probably is valid on hardware, but we still need to write a test to confirm this. In +            // emulation, the workaround here is to continue to treat this as a Texture2D. An +            // example game that does this is Super Mario Odyssey (in Cloud Kingdom). +            ASSERT(params.depth == 1); +            params.target = SurfaceTarget::Texture2D; +        }          break;      default:          LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); @@ -71,7 +85,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {          break;      } -    params.size_in_bytes = params.SizeInBytes(); +    params.size_in_bytes_total = params.SizeInBytesTotal(); +    params.size_in_bytes_2d = params.SizeInBytes2D(); +    params.max_mip_level = config.tic.max_mip_level + 1; +    params.rt = {}; +      return params;  } @@ -89,7 +107,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {      params.unaligned_height = config.height;      params.target = SurfaceTarget::Texture2D;      params.depth = 1; -    params.size_in_bytes = params.SizeInBytes(); +    params.size_in_bytes_total = params.SizeInBytesTotal(); +    params.size_in_bytes_2d = params.SizeInBytes2D(); +    params.max_mip_level = 0; + +    // Render target specific parameters, not used for caching +    params.rt.index = static_cast<u32>(index); +    params.rt.array_mode = config.array_mode; +    params.rt.layer_stride = config.layer_stride; +    params.rt.base_layer = config.base_layer; +      return params;  } @@ -108,7 +135,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {      params.unaligned_height = zeta_height;      params.target = SurfaceTarget::Texture2D;      params.depth = 1; -    params.size_in_bytes = params.SizeInBytes(); +    params.size_in_bytes_total = params.SizeInBytesTotal(); +    params.size_in_bytes_2d = params.SizeInBytes2D(); +    params.max_mip_level = 0; +    params.rt = {}; +      return params;  } @@ -400,9 +431,13 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),          // clang-format on  }; -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, -                         const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, -                         GLuint read_fb_handle, GLuint draw_fb_handle) { +static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, +                        GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, +                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { + +    const auto& src_params{src_surface->GetSurfaceParams()}; +    const auto& dst_params{dst_surface->GetSurfaceParams()}; +      OpenGLState prev_state{OpenGLState::GetCurState()};      SCOPE_EXIT({ prev_state.Apply(); }); @@ -413,47 +448,203 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec      u32 buffers{}; -    if (type == SurfaceType::ColorTexture) { -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, -                               0); -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, -                               0); +    if (src_params.type == SurfaceType::ColorTexture) { +        switch (src_params.target) { +        case SurfaceParams::SurfaceTarget::Texture2D: +            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0); +            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                                   0, 0); +            break; +        case SurfaceParams::SurfaceTarget::TextureCubemap: +            glFramebufferTexture2D( +                GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), +                src_surface->Texture().handle, 0); +            glFramebufferTexture2D( +                GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, +                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); +            break; +        case SurfaceParams::SurfaceTarget::Texture2DArray: +            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                                      src_surface->Texture().handle, 0, 0); +            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); +            break; +        case SurfaceParams::SurfaceTarget::Texture3D: +            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                                   SurfaceTargetToGL(src_params.target), +                                   src_surface->Texture().handle, 0, 0); +            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, +                                   SurfaceTargetToGL(src_params.target), 0, 0, 0); +            break; +        default: +            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0); +            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                                   0, 0); +            break; +        } + +        switch (dst_params.target) { +        case SurfaceParams::SurfaceTarget::Texture2D: +            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0); +            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                                   0, 0); +            break; +        case SurfaceParams::SurfaceTarget::TextureCubemap: +            glFramebufferTexture2D( +                GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), +                dst_surface->Texture().handle, 0); +            glFramebufferTexture2D( +                GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, +                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); +            break; +        case SurfaceParams::SurfaceTarget::Texture2DArray: +            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                                      dst_surface->Texture().handle, 0, 0); +            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); +            break; -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, -                               0); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, -                               0); +        case SurfaceParams::SurfaceTarget::Texture3D: +            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                                   SurfaceTargetToGL(dst_params.target), +                                   dst_surface->Texture().handle, 0, 0); +            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, +                                   SurfaceTargetToGL(dst_params.target), 0, 0, 0); +            break; +        default: +            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0); +            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, +                                   0, 0); +            break; +        }          buffers = GL_COLOR_BUFFER_BIT; -    } else if (type == SurfaceType::Depth) { -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); +    } else if (src_params.type == SurfaceType::Depth) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                               GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, +                               src_surface->Texture().handle, 0);          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                               GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, +                               dst_surface->Texture().handle, 0);          glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);          buffers = GL_DEPTH_BUFFER_BIT; -    } else if (type == SurfaceType::DepthStencil) { -        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +    } else if (src_params.type == SurfaceType::DepthStencil) { +        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, +                               GL_TEXTURE_2D, 0, 0);          glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, -                               src_tex, 0); +                               src_surface->Texture().handle, 0); -        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, +                               GL_TEXTURE_2D, 0, 0);          glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, -                               dst_tex, 0); +                               dst_surface->Texture().handle, 0);          buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;      } -    glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, -                      dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, +    const auto& rect{src_params.GetRect()}; +    glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom, +                      rect.right, rect.top, buffers,                        buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);      return true;  } +static void CopySurface(const Surface& src_surface, const Surface& dst_surface, +                        GLuint copy_pbo_handle, GLenum src_attachment = 0, +                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { +    ASSERT_MSG(dst_attachment == 0, "Unimplemented"); + +    const auto& src_params{src_surface->GetSurfaceParams()}; +    const auto& dst_params{dst_surface->GetSurfaceParams()}; + +    auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); +    auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + +    std::size_t buffer_size = +        std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total); + +    glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); +    glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); +    if (source_format.compressed) { +        glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, +                                    static_cast<GLsizei>(src_params.size_in_bytes_total), nullptr); +    } else { +        glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format, +                          source_format.type, static_cast<GLsizei>(src_params.size_in_bytes_total), +                          nullptr); +    } +    // If the new texture is bigger than the previous one, we need to fill in the rest with data +    // from the CPU. +    if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) { +        // Upload the rest of the memory. +        if (dst_params.is_tiled) { +            // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest +            // of the data in this case. Games like Super Mario Odyssey seem to hit this case +            // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer +            // but it doesn't clear it beforehand, the texture is already full of zeros. +            LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " +                              "reinterpretation but the texture is tiled."); +        } +        std::size_t remaining_size = +            dst_params.size_in_bytes_total - src_params.size_in_bytes_total; +        std::vector<u8> data(remaining_size); +        Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(), +                          data.size()); +        glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size, +                        data.data()); +    } + +    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + +    const GLsizei width{static_cast<GLsizei>( +        std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))}; +    const GLsizei height{static_cast<GLsizei>( +        std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))}; + +    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); +    if (dest_format.compressed) { +        LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); +        UNREACHABLE(); +    } else { +        switch (dst_params.target) { +        case SurfaceParams::SurfaceTarget::Texture1D: +            glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, +                                dest_format.type, nullptr); +            break; +        case SurfaceParams::SurfaceTarget::Texture2D: +            glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, +                                dest_format.format, dest_format.type, nullptr); +            break; +        case SurfaceParams::SurfaceTarget::Texture3D: +        case SurfaceParams::SurfaceTarget::Texture2DArray: +            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, +                                static_cast<GLsizei>(dst_params.depth), dest_format.format, +                                dest_format.type, nullptr); +            break; +        case SurfaceParams::SurfaceTarget::TextureCubemap: +            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, +                                static_cast<GLint>(cubemap_face), width, height, 1, +                                dest_format.format, dest_format.type, nullptr); +            break; +        default: +            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", +                         static_cast<u32>(dst_params.target)); +            UNREACHABLE(); +        } +        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +    } +} +  CachedSurface::CachedSurface(const SurfaceParams& params)      : params(params), gl_target(SurfaceTargetToGL(params.target)) {      texture.Create(); @@ -481,6 +672,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)                             rect.GetWidth());              break;          case SurfaceParams::SurfaceTarget::Texture2D: +        case SurfaceParams::SurfaceTarget::TextureCubemap:              glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,                             rect.GetWidth(), rect.GetHeight());              break; @@ -585,29 +777,39 @@ void CachedSurface::LoadGLBuffer() {      const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);      const u32 copy_size = params.width * params.height * bytes_per_pixel; +    const std::size_t total_size = copy_size * params.depth;      MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);      if (params.is_tiled) { +        gl_buffer.resize(total_size); +          // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do          // this for 3D textures, etc.          switch (params.target) {          case SurfaceParams::SurfaceTarget::Texture2D:              // Pass impl. to the fallback code below              break; +        case SurfaceParams::SurfaceTarget::Texture2DArray: +        case SurfaceParams::SurfaceTarget::TextureCubemap: +            for (std::size_t index = 0; index < params.depth; ++index) { +                const std::size_t offset{index * copy_size}; +                morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( +                    params.width, params.block_height, params.height, gl_buffer.data() + offset, +                    copy_size, params.addr + offset); +            } +            break;          default:              LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",                           static_cast<u32>(params.target));              UNREACHABLE();          } -        gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);          morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](              params.width, params.block_height, params.height, gl_buffer.data(), copy_size,              params.addr);      } else { -        const u8* const texture_src_data_end{texture_src_data + -                                             (static_cast<std::size_t>(params.depth) * copy_size)}; +        const u8* const texture_src_data_end{texture_src_data + total_size};          gl_buffer.assign(texture_src_data, texture_src_data_end);      } @@ -634,7 +836,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle      // Load data from memory to the surface      const GLint x0 = static_cast<GLint>(rect.left);      const GLint y0 = static_cast<GLint>(rect.bottom); -    const std::size_t buffer_offset = +    std::size_t buffer_offset =          static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +                                   static_cast<std::size_t>(x0)) *          GetGLBytesPerPixel(params.pixel_format); @@ -663,15 +865,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle              glCompressedTexImage2D(                  SurfaceTargetToGL(params.target), 0, tuple.internal_format,                  static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0, -                static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]); +                static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);              break;          case SurfaceParams::SurfaceTarget::Texture3D:          case SurfaceParams::SurfaceTarget::Texture2DArray:              glCompressedTexImage3D(                  SurfaceTargetToGL(params.target), 0, tuple.internal_format,                  static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), -                static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes), -                &gl_buffer[buffer_offset]); +                static_cast<GLsizei>(params.depth), 0, +                static_cast<GLsizei>(params.size_in_bytes_total), &gl_buffer[buffer_offset]); +            break; +        case SurfaceParams::SurfaceTarget::TextureCubemap: +            for (std::size_t face = 0; face < params.depth; ++face) { +                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), +                                       0, tuple.internal_format, static_cast<GLsizei>(params.width), +                                       static_cast<GLsizei>(params.height), 0, +                                       static_cast<GLsizei>(params.size_in_bytes_2d), +                                       &gl_buffer[buffer_offset]); +                buffer_offset += params.size_in_bytes_2d; +            }              break;          default:              LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", @@ -679,8 +891,8 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle              UNREACHABLE();              glCompressedTexImage2D(                  GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), -                static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes), -                &gl_buffer[buffer_offset]); +                static_cast<GLsizei>(params.height), 0, +                static_cast<GLsizei>(params.size_in_bytes_2d), &gl_buffer[buffer_offset]);          }      } else { @@ -703,6 +915,15 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle                              static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,                              tuple.type, &gl_buffer[buffer_offset]);              break; +        case SurfaceParams::SurfaceTarget::TextureCubemap: +            for (std::size_t face = 0; face < params.depth; ++face) { +                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0, +                                y0, static_cast<GLsizei>(rect.GetWidth()), +                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, +                                &gl_buffer[buffer_offset]); +                buffer_offset += params.size_in_bytes_2d; +            } +            break;          default:              LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",                           static_cast<u32>(params.target)); @@ -722,8 +943,9 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {      copy_pbo.Create();  } -Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { -    return GetSurface(SurfaceParams::CreateForTexture(config)); +Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, +                                                 const GLShader::SamplerEntry& entry) { +    return GetSurface(SurfaceParams::CreateForTexture(config, entry));  }  Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { @@ -811,98 +1033,69 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {      return surface;  } -Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, +Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,                                                 const SurfaceParams& new_params) {      // Verify surface is compatible for blitting -    const auto& params{surface->GetSurfaceParams()}; +    auto old_params{old_surface->GetSurfaceParams()};      // Get a new surface with the new parameters, and blit the previous surface to it      Surface new_surface{GetUncachedSurface(new_params)}; -    if (params.pixel_format == new_params.pixel_format || -        !Settings::values.use_accurate_framebuffers) { -        // If the format is the same, just do a framebuffer blit. This is significantly faster than -        // using PBOs. The is also likely less accurate, as textures will be converted rather than -        // reinterpreted. - -        BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, -                     params.GetRect(), params.type, read_framebuffer.handle, -                     draw_framebuffer.handle); -    } else { -        // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy, -        // where pixels are reinterpreted as a new format (without conversion). This code path uses -        // OpenGL PBOs and is quite slow. - -        auto source_format = GetFormatTuple(params.pixel_format, params.component_type); -        auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); - -        std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); +    // If the format is the same, just do a framebuffer blit. This is significantly faster than +    // using PBOs. The is also likely less accurate, as textures will be converted rather than +    // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate +    // surface copy, where pixels are reinterpreted as a new format (without conversion). This +    // code path uses OpenGL PBOs and is quite slow. +    const bool is_blit{old_params.pixel_format == new_params.pixel_format || +                       !Settings::values.use_accurate_framebuffers}; -        glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); -        glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); -        if (source_format.compressed) { -            glGetCompressedTextureImage(surface->Texture().handle, 0, -                                        static_cast<GLsizei>(params.SizeInBytes()), nullptr); +    switch (new_params.target) { +    case SurfaceParams::SurfaceTarget::Texture2D: +        if (is_blit) { +            BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);          } else { -            glGetTextureImage(surface->Texture().handle, 0, source_format.format, -                              source_format.type, static_cast<GLsizei>(params.SizeInBytes()), -                              nullptr); +            CopySurface(old_surface, new_surface, copy_pbo.handle);          } -        // If the new texture is bigger than the previous one, we need to fill in the rest with data -        // from the CPU. -        if (params.SizeInBytes() < new_params.SizeInBytes()) { -            // Upload the rest of the memory. -            if (new_params.is_tiled) { -                // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest -                // of the data in this case. Games like Super Mario Odyssey seem to hit this case -                // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer -                // but it doesn't clear it beforehand, the texture is already full of zeros. -                LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " -                                  "reinterpretation but the texture is tiled."); -            } -            std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); -            std::vector<u8> data(remaining_size); -            Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); -            glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, -                            data.data()); +        break; +    case SurfaceParams::SurfaceTarget::TextureCubemap: { +        if (old_params.rt.array_mode != 1) { +            // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this +            // yet (array rendering used as a cubemap texture). +            LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode); +            UNREACHABLE(); +            return new_surface;          } -        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - -        const auto& dest_rect{new_params.GetRect()}; - -        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle); -        if (dest_format.compressed) { -            LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); -            UNREACHABLE(); -        } else { -            switch (new_params.target) { -            case SurfaceParams::SurfaceTarget::Texture1D: -                glTextureSubImage1D(new_surface->Texture().handle, 0, 0, -                                    static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format, -                                    dest_format.type, nullptr); -                break; -            case SurfaceParams::SurfaceTarget::Texture2D: -                glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0, -                                    static_cast<GLsizei>(dest_rect.GetWidth()), -                                    static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, -                                    dest_format.type, nullptr); -                break; -            case SurfaceParams::SurfaceTarget::Texture3D: -            case SurfaceParams::SurfaceTarget::Texture2DArray: -                glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0, -                                    static_cast<GLsizei>(dest_rect.GetWidth()), -                                    static_cast<GLsizei>(dest_rect.GetHeight()), -                                    static_cast<GLsizei>(new_params.depth), dest_format.format, -                                    dest_format.type, nullptr); -                break; -            default: -                LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", -                             static_cast<u32>(params.target)); -                UNREACHABLE(); +        // This seems to be used for render-to-cubemap texture +        ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected"); +        ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected"); +        ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented"); + +        // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels. +        // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild. +        const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)}; + +        for (std::size_t index = 0; index < new_params.depth; ++index) { +            Surface face_surface{TryGetReservedSurface(old_params)}; +            ASSERT_MSG(face_surface, "Unexpected"); + +            if (is_blit) { +                BlitSurface(face_surface, new_surface, read_framebuffer.handle, +                            draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index, +                            new_params.rt.index, index); +            } else { +                CopySurface(face_surface, new_surface, copy_pbo.handle, +                            face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index);              } + +            old_params.addr += byte_stride;          } -        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); +        break; +    } +    default: +        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", +                     static_cast<u32>(new_params.target)); +        UNREACHABLE();      }      return new_surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 80c5f324b..49025a3fe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -9,12 +9,14 @@  #include <memory>  #include <vector> +#include "common/alignment.h"  #include "common/common_types.h"  #include "common/hash.h"  #include "common/math_util.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/rasterizer_cache.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h"  #include "video_core/textures/texture.h"  namespace OpenGL { @@ -126,6 +128,8 @@ struct SurfaceParams {          case Tegra::Texture::TextureType::Texture2D:          case Tegra::Texture::TextureType::Texture2DNoMipmap:              return SurfaceTarget::Texture2D; +        case Tegra::Texture::TextureType::TextureCubemap: +            return SurfaceTarget::TextureCubemap;          case Tegra::Texture::TextureType::Texture1DArray:              return SurfaceTarget::Texture1DArray;          case Tegra::Texture::TextureType::Texture2DArray: @@ -689,17 +693,23 @@ struct SurfaceParams {      /// Returns the rectangle corresponding to this surface      MathUtil::Rectangle<u32> GetRect() const; -    /// Returns the size of this surface in bytes, adjusted for compression -    std::size_t SizeInBytes() const { +    /// Returns the size of this surface as a 2D texture in bytes, adjusted for compression +    std::size_t SizeInBytes2D() const {          const u32 compression_factor{GetCompressionFactor(pixel_format)};          ASSERT(width % compression_factor == 0);          ASSERT(height % compression_factor == 0);          return (width / compression_factor) * (height / compression_factor) * -               GetFormatBpp(pixel_format) * depth / CHAR_BIT; +               GetFormatBpp(pixel_format) / CHAR_BIT; +    } + +    /// Returns the total size of this surface in bytes, adjusted for compression +    std::size_t SizeInBytesTotal() const { +        return SizeInBytes2D() * depth;      }      /// Creates SurfaceParams from a texture configuration -    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); +    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, +                                          const GLShader::SamplerEntry& entry);      /// Creates SurfaceParams from a framebuffer configuration      static SurfaceParams CreateForFramebuffer(std::size_t index); @@ -711,8 +721,9 @@ struct SurfaceParams {      /// Checks if surfaces are compatible for caching      bool IsCompatibleSurface(const SurfaceParams& other) const { -        return std::tie(pixel_format, type, width, height) == -               std::tie(other.pixel_format, other.type, other.width, other.height); +        return std::tie(pixel_format, type, width, height, target, depth) == +               std::tie(other.pixel_format, other.type, other.width, other.height, other.target, +                        other.depth);      }      VAddr addr; @@ -725,8 +736,18 @@ struct SurfaceParams {      u32 height;      u32 depth;      u32 unaligned_height; -    std::size_t size_in_bytes; +    std::size_t size_in_bytes_total; +    std::size_t size_in_bytes_2d;      SurfaceTarget target; +    u32 max_mip_level; + +    // Render target specific parameters, not used in caching +    struct { +        u32 index; +        u32 array_mode; +        u32 layer_stride; +        u32 base_layer; +    } rt;  };  }; // namespace OpenGL @@ -736,6 +757,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {      static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {          SurfaceReserveKey res;          res.state = params; +        res.state.rt = {}; // Ignore rt config in caching          return res;      }  }; @@ -759,7 +781,7 @@ public:      }      std::size_t GetSizeInBytes() const { -        return params.size_in_bytes; +        return params.size_in_bytes_total;      }      const OGLTexture& Texture() const { @@ -800,7 +822,8 @@ public:      RasterizerCacheOpenGL();      /// Get a surface based on the texture configuration -    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); +    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, +                              const GLShader::SamplerEntry& entry);      /// Get the depth surface based on the framebuffer configuration      Surface GetDepthBufferSurface(bool preserve_contents); @@ -822,7 +845,7 @@ private:      Surface GetUncachedSurface(const SurfaceParams& params);      /// Recreates a surface with new parameters -    Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); +    Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);      /// Reserves a unique surface that can be reused later      void ReserveSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index b3e95187e..320babdb1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2000,6 +2000,14 @@ private:                      }                      break;                  } +                case Tegra::Shader::TextureType::TextureCube: { +                    ASSERT_MSG(!is_array, "Unimplemented"); +                    std::string x = regs.GetRegisterAsFloat(instr.gpr8); +                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); +                    std::string z = regs.GetRegisterAsFloat(instr.gpr20); +                    coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; +                    break; +                }                  default:                      LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",                                   static_cast<u32>(texture_type)); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c2fb824b2..14aea4838 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -165,6 +165,8 @@ struct TICEntry {          // High 16 bits of the pitch value          BitField<0, 16, u32> pitch_high; + +        BitField<28, 4, u32> max_mip_level;      };      union {          BitField<0, 16, u32> width_minus_1; | 
