diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 91 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 16 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 2 | 
4 files changed, 73 insertions, 52 deletions
| diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@  #include "core/core_timing.h"  #include "video_core/engines/maxwell_3d.h"  #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h"  #include "video_core/memory_manager.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/textures/texture.h" @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {      regs.reg_array[0xd00] = 1;  } -void Maxwell3D::ProcessQueryGet() { +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { +    struct LongQueryResult { +        u64_le value; +        u64_le timestamp; +    }; +    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");      const GPUVAddr sequence_address{regs.query.QueryAddress()}; -    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application -    // VAddr before writing. +    if (long_query) { +        // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast +        // GPU, this command may actually take a while to complete in real hardware due to GPU +        // wait queues. +        LongQueryResult query_result{payload, system.GPU().GetTicks()}; +        memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); +    } else { +        memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); +    } +} +void Maxwell3D::ProcessQueryGet() {      // TODO(Subv): Support the other query units.      ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,                 "Units other than CROP are unimplemented"); -    u64 result = 0; - -    // TODO(Subv): Support the other query variables -    switch (regs.query.query_get.select) { -    case Regs::QuerySelect::Zero: -        // This seems to actually write the query sequence to the query address. -        result = regs.query.query_sequence; +    switch (regs.query.query_get.operation) { +    case Regs::QueryOperation::Release: { +        const u64 result = regs.query.query_sequence; +        StampQueryResult(result, regs.query.query_get.short_query == 0);          break; -    default: -        result = 1; -        UNIMPLEMENTED_MSG("Unimplemented query select type {}", -                          static_cast<u32>(regs.query.query_get.select.Value()));      } - -    // TODO(Subv): Research and implement how query sync conditions work. - -    struct LongQueryResult { -        u64_le value; -        u64_le timestamp; -    }; -    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); - -    switch (regs.query.query_get.mode) { -    case Regs::QueryMode::Write: -    case Regs::QueryMode::Write2: { -        u32 sequence = regs.query.query_sequence; -        if (regs.query.query_get.short_query) { -            // Write the current query sequence to the sequence address. -            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short -            // query. -            memory_manager.Write<u32>(sequence_address, sequence); -        } else { -            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast -            // GPU, this command may actually take a while to complete in real hardware due to GPU -            // wait queues. -            LongQueryResult query_result{}; -            query_result.value = result; -            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming -            query_result.timestamp = system.CoreTiming().GetTicks(); -            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); +    case Regs::QueryOperation::Acquire: { +        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU +        // to write a value that matches the current payload. +        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); +        break; +    } +    case Regs::QueryOperation::Counter: { +        u64 result{}; +        switch (regs.query.query_get.select) { +        case Regs::QuerySelect::Zero: +            result = 0; +            break; +        default: +            result = 1; +            UNIMPLEMENTED_MSG("Unimplemented query select type {}", +                              static_cast<u32>(regs.query.query_get.select.Value()));          } +        StampQueryResult(result, regs.query.query_get.short_query == 0); +        break; +    } +    case Regs::QueryOperation::Trap: { +        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); +        break; +    } +    default: { +        UNIMPLEMENTED_MSG("Unknown query operation");          break;      } -    default: -        UNIMPLEMENTED_MSG("Query mode {} not implemented", -                          static_cast<u32>(regs.query.query_get.mode.Value()));      }  } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7b1912a66..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -71,12 +71,11 @@ public:          static constexpr std::size_t MaxConstBuffers = 18;          static constexpr std::size_t MaxConstBufferSize = 0x10000; -        enum class QueryMode : u32 { -            Write = 0, -            Sync = 1, -            // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 -            // is. -            Write2 = 2, +        enum class QueryOperation : u32 { +            Release = 0, +            Acquire = 1, +            Counter = 2, +            Trap = 3,          };          enum class QueryUnit : u32 { @@ -1081,7 +1080,7 @@ public:                      u32 query_sequence;                      union {                          u32 raw; -                        BitField<0, 2, QueryMode> mode; +                        BitField<0, 2, QueryOperation> operation;                          BitField<4, 1, u32> fence;                          BitField<12, 4, QueryUnit> unit;                          BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1413,6 +1412,9 @@ private:      /// Handles a write to the QUERY_GET register.      void ProcessQueryGet(); +    // Writes the query result accordingly +    void StampQueryResult(u64 payload, bool long_query); +      // Handles Conditional Rendering      void ProcessQueryCondition(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@  #include "common/microprofile.h"  #include "core/core.h"  #include "core/core_timing.h" +#include "core/core_timing_util.h"  #include "core/memory.h"  #include "video_core/engines/fermi_2d.h"  #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {      return true;  } +u64 GPU::GetTicks() const { +    // This values were reversed engineered by fincs from NVN +    // The gpu clock is reported in units of 385/625 nanoseconds +    constexpr u64 gpu_ticks_num = 384; +    constexpr u64 gpu_ticks_den = 625; + +    const u64 cpu_ticks = system.CoreTiming().GetTicks(); +    const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); +    const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; +    const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; +    return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} +  void GPU::FlushCommands() {      renderer.Rasterizer().FlushCommands();  } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {          block.sequence = regs.semaphore_sequence;          // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of          // CoreTiming -        block.timestamp = system.CoreTiming().GetTicks(); +        block.timestamp = GetTicks();          memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,                                     sizeof(block));      } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public:      bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); +    u64 GetTicks() const; +      std::unique_lock<std::mutex> LockSync() {          return std::unique_lock{sync_mutex};      } | 
