diff options
22 files changed, 166 insertions, 164 deletions
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index f53a8d193..200c6489a 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -44,20 +44,6 @@ template class Field<std::string>;  template class Field<const char*>;  template class Field<std::chrono::microseconds>; -#ifdef ARCHITECTURE_x86_64 -static const char* CpuVendorToStr(Common::CPUVendor vendor) { -    switch (vendor) { -    case Common::CPUVendor::INTEL: -        return "Intel"; -    case Common::CPUVendor::AMD: -        return "Amd"; -    case Common::CPUVendor::OTHER: -        return "Other"; -    } -    UNREACHABLE(); -} -#endif -  void AppendBuildInfo(FieldCollection& fc) {      const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};      fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); @@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {  #ifdef ARCHITECTURE_x86_64      fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);      fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); -    fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 2dfcd39c8..c9349a6b4 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -3,8 +3,6 @@  // Refer to the license.txt file included.  #include <cstring> -#include <string> -#include <thread>  #include "common/common_types.h"  #include "common/x64/cpu_detect.h" @@ -51,8 +49,6 @@ namespace Common {  static CPUCaps Detect() {      CPUCaps caps = {}; -    caps.num_cores = std::thread::hardware_concurrency(); -      // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support      // yuzu at all anyway @@ -70,12 +66,6 @@ static CPUCaps Detect() {      __cpuid(cpu_id, 0x80000000);      u32 max_ex_fn = cpu_id[0]; -    if (!strcmp(caps.brand_string, "GenuineIntel")) -        caps.vendor = CPUVendor::INTEL; -    else if (!strcmp(caps.brand_string, "AuthenticAMD")) -        caps.vendor = CPUVendor::AMD; -    else -        caps.vendor = CPUVendor::OTHER;      // Set reasonable default brand string even if brand string not available      strcpy(caps.cpu_string, caps.brand_string); @@ -96,15 +86,9 @@ static CPUCaps Detect() {              caps.sse4_1 = true;          if ((cpu_id[2] >> 20) & 1)              caps.sse4_2 = true; -        if ((cpu_id[2] >> 22) & 1) -            caps.movbe = true;          if ((cpu_id[2] >> 25) & 1)              caps.aes = true; -        if ((cpu_id[3] >> 24) & 1) { -            caps.fxsave_fxrstor = true; -        } -          // AVX support requires 3 separate checks:          //  - Is the AVX bit set in CPUID?          //  - Is the XSAVE bit set in CPUID? @@ -129,8 +113,6 @@ static CPUCaps Detect() {          }      } -    caps.flush_to_zero = caps.sse; -      if (max_ex_fn >= 0x80000004) {          // Extract CPU model string          __cpuid(cpu_id, 0x80000002); @@ -144,14 +126,8 @@ static CPUCaps Detect() {      if (max_ex_fn >= 0x80000001) {          // Check for more features          __cpuid(cpu_id, 0x80000001); -        if (cpu_id[2] & 1) -            caps.lahf_sahf_64 = true; -        if ((cpu_id[2] >> 5) & 1) -            caps.lzcnt = true;          if ((cpu_id[2] >> 16) & 1)              caps.fma4 = true; -        if ((cpu_id[3] >> 29) & 1) -            caps.long_mode = true;      }      return caps; @@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {      return caps;  } -std::string GetCPUCapsString() { -    auto caps = GetCPUCaps(); - -    std::string sum(caps.cpu_string); -    sum += " ("; -    sum += caps.brand_string; -    sum += ")"; - -    if (caps.sse) -        sum += ", SSE"; -    if (caps.sse2) { -        sum += ", SSE2"; -        if (!caps.flush_to_zero) -            sum += " (without DAZ)"; -    } - -    if (caps.sse3) -        sum += ", SSE3"; -    if (caps.ssse3) -        sum += ", SSSE3"; -    if (caps.sse4_1) -        sum += ", SSE4.1"; -    if (caps.sse4_2) -        sum += ", SSE4.2"; -    if (caps.avx) -        sum += ", AVX"; -    if (caps.avx2) -        sum += ", AVX2"; -    if (caps.bmi1) -        sum += ", BMI1"; -    if (caps.bmi2) -        sum += ", BMI2"; -    if (caps.fma) -        sum += ", FMA"; -    if (caps.aes) -        sum += ", AES"; -    if (caps.movbe) -        sum += ", MOVBE"; -    if (caps.long_mode) -        sum += ", 64-bit support"; - -    return sum; -} -  } // namespace Common diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 0af3a8adb..20f2ba234 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -4,23 +4,12 @@  #pragma once -#include <string> -  namespace Common { -/// x86/x64 CPU vendors that may be detected by this module -enum class CPUVendor { -    INTEL, -    AMD, -    OTHER, -}; -  /// x86/x64 CPU capabilities that may be detected by this module  struct CPUCaps { -    CPUVendor vendor;      char cpu_string[0x21];      char brand_string[0x41]; -    int num_cores;      bool sse;      bool sse2;      bool sse3; @@ -35,20 +24,6 @@ struct CPUCaps {      bool fma;      bool fma4;      bool aes; - -    // Support for the FXSAVE and FXRSTOR instructions -    bool fxsave_fxrstor; - -    bool movbe; - -    // This flag indicates that the hardware supports some mode in which denormal inputs and outputs -    // are automatically set to (signed) zero. -    bool flush_to_zero; - -    // Support for LAHF and SAHF instructions in 64-bit mode -    bool lahf_sahf_64; - -    bool long_mode;  };  /** @@ -57,10 +32,4 @@ struct CPUCaps {   */  const CPUCaps& GetCPUCaps(); -/** - * Gets a string summary of the name and supported capabilities of the host CPU - * @return String summary - */ -std::string GetCPUCapsString(); -  } // namespace Common diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h index 090565310..b689e8e8b 100644 --- a/src/core/hle/kernel/physical_memory.h +++ b/src/core/hle/kernel/physical_memory.h @@ -14,6 +14,9 @@ namespace Kernel {  // - Second to ensure all host backing memory used is aligned to 256 bytes due  // to strict alignment restrictions on GPU memory. -using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; +using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; +class PhysicalMemory final : public PhysicalMemoryVector { +    using PhysicalMemoryVector::PhysicalMemoryVector; +};  } // namespace Kernel diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index a9a20ef76..0b3500fce 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <algorithm> +#include <cstring>  #include <iterator>  #include <utility>  #include "common/alignment.h" @@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {      // If necessary, expand backing vector to cover new heap extents in      // the case of allocating. Otherwise, shrink the backing memory,      // if a smaller heap has been requested. -    const u64 old_heap_size = GetCurrentHeapSize(); -    if (size > old_heap_size) { -        const u64 alloc_size = size - old_heap_size; - -        heap_memory->insert(heap_memory->end(), alloc_size, 0); -        RefreshMemoryBlockMappings(heap_memory.get()); -    } else if (size < old_heap_size) { -        heap_memory->resize(size); -        heap_memory->shrink_to_fit(); - -        RefreshMemoryBlockMappings(heap_memory.get()); -    } +    heap_memory->resize(size); +    heap_memory->shrink_to_fit(); +    RefreshMemoryBlockMappings(heap_memory.get());      heap_end = heap_region_base + size;      ASSERT(GetCurrentHeapSize() == heap_memory->size()); @@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre      // Always merge allocated memory blocks, even when they don't share the same backing block.      if (left.type == VMAType::AllocatedMemoryBlock &&          (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { -        const auto right_begin = right.backing_block->begin() + right.offset; -        const auto right_end = right_begin + right.size;          // Check if we can save work.          if (left.offset == 0 && left.size == left.backing_block->size()) {              // Fast case: left is an entire backing block. -            left.backing_block->insert(left.backing_block->end(), right_begin, right_end); +            left.backing_block->resize(left.size + right.size); +            std::memcpy(left.backing_block->data() + left.size, +                        right.backing_block->data() + right.offset, right.size);          } else {              // Slow case: make a new memory block for left and right. -            const auto left_begin = left.backing_block->begin() + left.offset; -            const auto left_end = left_begin + left.size; -            const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end)); -            const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end)); -              auto new_memory = std::make_shared<PhysicalMemory>(); -            new_memory->reserve(left_size + right_size); -            new_memory->insert(new_memory->end(), left_begin, left_end); -            new_memory->insert(new_memory->end(), right_begin, right_end); +            new_memory->resize(left.size + right.size); +            std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size); +            std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset, +                        right.size);              left.backing_block = std::move(new_memory);              left.offset = 0; @@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {          memory.UnmapRegion(page_table, vma.base, vma.size);          break;      case VMAType::AllocatedMemoryBlock: -        memory.MapMemoryRegion(page_table, vma.base, vma.size, -                               vma.backing_block->data() + vma.offset); +        memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);          break;      case VMAType::BackingMemory:          memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index f1795fdd6..8908e5328 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {              codeset_segment->addr = segment_addr;              codeset_segment->size = aligned_size; -            memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz); +            std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i), +                        p->p_filesz);              current_image_position += aligned_size;          }      } diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 474b55cb1..092103abe 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -2,6 +2,7 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <cstring>  #include "core/file_sys/kernel_executable.h"  #include "core/file_sys/program_metadata.h"  #include "core/gdbstub/gdbstub.h" @@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {          segment.addr = offset;          segment.offset = offset;          segment.size = PageAlignSize(static_cast<u32>(data.size())); -        program_image.resize(offset); -        program_image.insert(program_image.end(), data.begin(), data.end()); +        program_image.resize(offset + data.size()); +        std::memcpy(program_image.data() + offset, data.data(), data.size());      };      load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset()); diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index f629892ae..515c5accb 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <cinttypes> +#include <cstring>  #include <vector>  #include "common/common_funcs.h" @@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,          if (nso_header.IsSegmentCompressed(i)) {              data = DecompressSegment(data, nso_header.segments[i]);          } -        program_image.resize(nso_header.segments[i].location); -        program_image.insert(program_image.end(), data.begin(), data.end()); +        program_image.resize(nso_header.segments[i].location + data.size()); +        std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), +                    data.size());          codeset.segments[i].addr = nso_header.segments[i].location;          codeset.segments[i].offset = nso_header.segments[i].location;          codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); @@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,          std::vector<u8> pi_header;          pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),                           reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader)); -        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(), -                         program_image.end()); +        pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(), +                         program_image.data() + program_image.size());          pi_header = pm->PatchNSO(pi_header, file.GetName()); -        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin()); +        std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());      }      // Apply cheats if they exist and the program has a valid title ID diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 3c2a29d9b..f0888327f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -14,6 +14,7 @@  #include "common/swap.h"  #include "core/arm/arm_interface.h"  #include "core/core.h" +#include "core/hle/kernel/physical_memory.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/vm_manager.h"  #include "core/memory.h" @@ -38,6 +39,11 @@ struct Memory::Impl {          system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);      } +    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, +                         Kernel::PhysicalMemory& memory, VAddr offset) { +        MapMemoryRegion(page_table, base, size, memory.data() + offset); +    } +      void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {          ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);          ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); @@ -601,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {      impl->SetCurrentPageTable(process);  } +void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, +                             Kernel::PhysicalMemory& memory, VAddr offset) { +    impl->MapMemoryRegion(page_table, base, size, memory, offset); +} +  void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {      impl->MapMemoryRegion(page_table, base, size, target);  } diff --git a/src/core/memory.h b/src/core/memory.h index 1428a6d60..8913a9da4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -19,8 +19,9 @@ class System;  }  namespace Kernel { +class PhysicalMemory;  class Process; -} +} // namespace Kernel  namespace Memory { @@ -66,6 +67,19 @@ public:      void SetCurrentPageTable(Kernel::Process& process);      /** +     * Maps an physical buffer onto a region of the emulated process address space. +     * +     * @param page_table The page table of the emulated process. +     * @param base       The address to start mapping at. Must be page-aligned. +     * @param size       The amount of bytes to map. Must be page-aligned. +     * @param memory     Physical buffer with the memory backing the mapping. Must be of length +     *                   at least `size + offset`. +     * @param offset     The offset within the physical memory. Must be page-aligned. +     */ +    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, +                         Kernel::PhysicalMemory& memory, VAddr offset); + +    /**       * Maps an allocated buffer onto a region of the emulated process address space.       *       * @param page_table The page table of the emulated process. diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 16f95b77d..ee79260fc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1018,7 +1018,14 @@ public:                      }                  } instanced_arrays; -                INSERT_UNION_PADDING_WORDS(0x6); +                INSERT_UNION_PADDING_WORDS(0x4); + +                union { +                    BitField<0, 1, u32> enable; +                    BitField<4, 8, u32> unk4; +                } vp_point_size; + +                INSERT_UNION_PADDING_WORDS(1);                  Cull cull; @@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);  ASSERT_REG_POSITION(index_array, 0x5F2);  ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);  ASSERT_REG_POSITION(instanced_arrays, 0x620); +ASSERT_REG_POSITION(vp_point_size, 0x644);  ASSERT_REG_POSITION(cull, 0x646);  ASSERT_REG_POSITION(pixel_center_integer, 0x649);  ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 57b57c647..6f98bd827 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {      Trunc = 11,  }; +enum class AtomicOp : u64 { +    Add = 0, +    Min = 1, +    Max = 2, +    Inc = 3, +    Dec = 4, +    And = 5, +    Or = 6, +    Xor = 7, +    Exch = 8, +}; +  enum class UniformType : u64 {      UnsignedByte = 0,      SignedByte = 1, @@ -236,6 +248,13 @@ enum class StoreType : u64 {      Bits128 = 6,  }; +enum class AtomicType : u64 { +    U32 = 0, +    S32 = 1, +    U64 = 2, +    S64 = 3, +}; +  enum class IMinMaxExchange : u64 {      None = 0,      XLo = 1, @@ -939,6 +958,16 @@ union Instruction {      } stg;      union { +        BitField<52, 4, AtomicOp> operation; +        BitField<28, 2, AtomicType> type; +        BitField<30, 22, s64> offset; + +        s32 GetImmediateOffset() const { +            return static_cast<s32>(offset << 2); +        } +    } atoms; + +    union {          BitField<32, 1, PhysicalAttributeDirection> direction;          BitField<47, 3, AttributeSize> size;          BitField<20, 11, u64> address; @@ -1659,9 +1688,10 @@ public:          ST_A,          ST_L,          ST_S, -        ST,   // Store in generic memory -        STG,  // Store in global memory -        AL2P, // Transforms attribute memory into physical memory +        ST,    // Store in generic memory +        STG,   // Store in global memory +        ATOMS, // Atomic operation on shared memory +        AL2P,  // Transforms attribute memory into physical memory          TEX,          TEX_B,  // Texture Load Bindless          TXQ,    // Texture Query @@ -1964,6 +1994,7 @@ private:              INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),              INST("101-------------", Id::ST, Type::Memory, "ST"),              INST("1110111011011---", Id::STG, Type::Memory, "STG"), +            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),              INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),              INST("110000----111---", Id::TEX, Type::Texture, "TEX"),              INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 672051102..c428f06e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {      const auto& regs = system.GPU().Maxwell3D().regs;      // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid      // in OpenGL). +    state.point.program_control = regs.vp_point_size.enable != 0;      state.point.size = std::max(1.0f, regs.point_size);  } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e9ceca768..2996aaf08 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1856,6 +1856,16 @@ private:                  Type::Uint};      } +    template <const std::string_view& opname, Type type> +    Expression Atomic(Operation operation) { +        ASSERT(stage == ShaderType::Compute); +        auto& smem = std::get<SmemNode>(*operation[0]); + +        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), +                            Visit(operation[1]).As(type)), +                type}; +    } +      Expression Branch(Operation operation) {          const auto target = std::get_if<ImmediateNode>(&*operation[0]);          UNIMPLEMENTED_IF(!target); @@ -2194,6 +2204,8 @@ private:          &GLSLDecompiler::AtomicImage<Func::Xor>,          &GLSLDecompiler::AtomicImage<Func::Exchange>, +        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, +          &GLSLDecompiler::Branch,          &GLSLDecompiler::BranchIndirect,          &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index df2e2395a..cc185e9e1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {  }  void OpenGLState::ApplyPointSize() { +    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);      if (UpdateValue(cur_state.point.size, point.size)) {          glPointSize(point.size);      } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fb180f302..678e5cd89 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -131,7 +131,8 @@ public:      std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;      struct { -        float size = 1.0f; // GL_POINT_SIZE +        bool program_control = false; // GL_PROGRAM_POINT_SIZE +        GLfloat size = 1.0f;          // GL_POINT_SIZE      } point;      struct { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b790b0ef4..e95eb069e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -44,7 +44,7 @@ struct FormatTuple {  constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U -    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S +    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S      {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI      {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U      {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U @@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format      {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F      {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB      {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U -    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S +    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S      {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI -    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F +    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5 @@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {          glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));          glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));          const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); +        u8* const mip_data = staging_buffer.data() + mip_offset; +        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));          if (is_compressed) { -            glGetCompressedTextureImage(texture.handle, level, -                                        static_cast<GLsizei>(params.GetHostMipmapSize(level)), -                                        staging_buffer.data() + mip_offset); +            glGetCompressedTextureImage(texture.handle, level, size, mip_data);          } else { -            glGetTextureImage(texture.handle, level, format, type, -                              static_cast<GLsizei>(params.GetHostMipmapSize(level)), -                              staging_buffer.data() + mip_offset); +            glGetTextureImage(texture.handle, level, format, type, size, mip_data);          }      }  } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8fe852ce8..0cf97cafa 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1796,6 +1796,11 @@ private:          return {};      } +    Expression UAtomicAdd(Operation) { +        UNIMPLEMENTED(); +        return {}; +    } +      Expression Branch(Operation operation) {          const auto& target = std::get<ImmediateNode>(*operation[0]);          OpStore(jmp_to, Constant(t_uint, target.GetValue())); @@ -2373,6 +2378,8 @@ private:          &SPIRVDecompiler::AtomicImageXor,          &SPIRVDecompiler::AtomicImageExchange, +        &SPIRVDecompiler::UAtomicAdd, +          &SPIRVDecompiler::Branch,          &SPIRVDecompiler::BranchIndirect,          &SPIRVDecompiler::PushFlowStack, diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8cc84e935..7591a715f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -16,6 +16,8 @@  namespace VideoCommon::Shader { +using Tegra::Shader::AtomicOp; +using Tegra::Shader::AtomicType;  using Tegra::Shader::Attribute;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; @@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {          }          break;      } +    case OpCode::Id::ATOMS: { +        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", +                             static_cast<int>(instr.atoms.operation.Value())); +        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", +                             static_cast<int>(instr.atoms.type.Value())); + +        const s32 offset = instr.atoms.GetImmediateOffset(); +        Node address = GetRegister(instr.gpr8); +        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); + +        Node memory = GetSharedMemory(std::move(address)); +        Node data = GetRegister(instr.gpr20); + +        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); +        SetRegister(bb, instr.gpr0, std::move(value)); +        break; +    }      case OpCode::Id::AL2P: {          // Ignore al2p.direction since we don't care about it. diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4e155542a..075c7d07c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,6 +162,8 @@ enum class OperationCode {      AtomicImageXor,      /// (MetaImage, int[N] coords) -> void      AtomicImageExchange, /// (MetaImage, int[N] coords) -> void +    UAtomicAdd, /// (smem, uint) -> uint +      Branch,         /// (uint branch_target) -> void      BranchIndirect, /// (uint branch_target) -> void      PushFlowStack,  /// (uint branch_target) -> void diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b21fbf826..b5dd3e0d6 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() {      const QString main_window = QStringLiteral("Main Window");      const QString load_file = QStringLiteral("Load File"); +    const QString load_amiibo = QStringLiteral("Load Amiibo");      const QString exit_yuzu = QStringLiteral("Exit yuzu"); +    const QString restart_emulation = QStringLiteral("Restart Emulation");      const QString stop_emulation = QStringLiteral("Stop Emulation");      const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar");      const QString toggle_status_bar = QStringLiteral("Toggle Status Bar");      const QString fullscreen = QStringLiteral("Fullscreen"); +    const QString capture_screenshot = QStringLiteral("Capture Screenshot");      ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file));      ui.action_Load_File->setShortcutContext(          hotkey_registry.GetShortcutContext(main_window, load_file)); +    ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo)); +    ui.action_Load_Amiibo->setShortcutContext( +        hotkey_registry.GetShortcutContext(main_window, load_amiibo)); +      ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu));      ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu)); +    ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation)); +    ui.action_Restart->setShortcutContext( +        hotkey_registry.GetShortcutContext(main_window, restart_emulation)); +      ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation));      ui.action_Stop->setShortcutContext(          hotkey_registry.GetShortcutContext(main_window, stop_emulation)); @@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() {      ui.action_Show_Status_Bar->setShortcutContext(          hotkey_registry.GetShortcutContext(main_window, toggle_status_bar)); +    ui.action_Capture_Screenshot->setShortcut( +        hotkey_registry.GetKeySequence(main_window, capture_screenshot)); +    ui.action_Capture_Screenshot->setShortcutContext( +        hotkey_registry.GetShortcutContext(main_window, capture_screenshot)); +      connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),              &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);      connect( diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 21f422500..a2c9e4547 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -15,7 +15,7 @@    </property>    <property name="windowIcon">     <iconset> -    <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset> +    <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>    </property>    <property name="tabShape">     <enum>QTabWidget::Rounded</enum> @@ -98,6 +98,7 @@      <addaction name="action_Display_Dock_Widget_Headers"/>      <addaction name="action_Show_Filter_Bar"/>      <addaction name="action_Show_Status_Bar"/> +    <addaction name="separator"/>      <addaction name="menu_View_Debugging"/>     </widget>     <widget class="QMenu" name="menu_Tools">  | 
