diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 106 | 
1 files changed, 56 insertions, 50 deletions
| diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index dd406f9ca..af9332975 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -2,6 +2,8 @@  // Licensed under GPLv2  // Refer to the license.txt file included. +#include <stack> +  #include <boost/range/algorithm.hpp>  #include <common/file_util.h> @@ -65,9 +67,6 @@ const std::array<u32, 1024>& GetSwizzlePatterns()      return swizzle_data;  } -// TODO: Is there actually a limit on hardware? -const int if_stack_size = 8; -  struct VertexShaderState {      u32* program_counter; @@ -84,14 +83,14 @@ struct VertexShaderState {      enum {          INVALID_ADDRESS = 0xFFFFFFFF      }; -    u32 call_stack[8]; // TODO: What is the maximal call stack depth? -    u32* call_stack_pointer; -    struct IfStackElement { -        u32 else_addr; -        u32 else_instructions; -    } if_stack[if_stack_size]; -    IfStackElement* if_stack_pointer; +    struct CallStackElement { +        u32 final_address; +        u32 return_address; +    }; + +    // TODO: Is there a maximal size for this? +    std::stack<CallStackElement> call_stack;      struct {          u32 max_offset; // maximum program counter ever reached @@ -101,12 +100,27 @@ struct VertexShaderState {  static void ProcessShaderCode(VertexShaderState& state) {      while (true) { -        bool increment_pc = true; +        if (!state.call_stack.empty()) { +            if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { +                state.program_counter = &shader_memory[state.call_stack.top().return_address]; +                state.call_stack.pop(); + +                // TODO: Is "trying again" accurate to hardware? +                continue; +            } +        } +          bool exit_loop = false;          const Instruction& instr = *(const Instruction*)state.program_counter;          const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; -        state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); +        auto call = [&](std::stack<VertexShaderState::CallStackElement>& stack, u32 offset, u32 num_instructions, u32 return_offset) { +            state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset +            stack.push({ offset + num_instructions, return_offset }); +        }; +        u32 binary_offset = state.program_counter - shader_memory.data(); + +        state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset);          auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {              switch (source_reg.GetRegisterType()) { @@ -328,30 +342,33 @@ static void ProcessShaderCode(VertexShaderState& state) {          default:              // Handle each instruction on its own              switch (instr.opcode) { -            // NOP is currently used as a heuristic for leaving from a function. -            // TODO: This is completely incorrect. -            case Instruction::OpCode::NOP: -                if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { -                    exit_loop = true; -                } else { -                    // Jump back to call stack position, invalidate call stack entry, move up call stack pointer -                    state.program_counter = &shader_memory[*state.call_stack_pointer]; -                    *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; -                } - +            case Instruction::OpCode::END: +                exit_loop = true;                  break;              case Instruction::OpCode::CALL: -                increment_pc = false; - -                _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); +                call(state.call_stack, +                     instr.flow_control.dest_offset, +                     instr.flow_control.num_instructions, +                     binary_offset + 1); +                break; -                *++state.call_stack_pointer = state.program_counter - shader_memory.data(); -                state.program_counter = &shader_memory[instr.flow_control.dest_offset]; +            case Instruction::OpCode::NOP:                  break; -            case Instruction::OpCode::END: -                // TODO +            case Instruction::OpCode::IFU: +                if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { +                    call(state.call_stack, +                         binary_offset + 1, +                         instr.flow_control.dest_offset - binary_offset - 1, +                         instr.flow_control.dest_offset + instr.flow_control.num_instructions); +                } else { +                    call(state.call_stack, +                         instr.flow_control.dest_offset, +                         instr.flow_control.num_instructions, +                         instr.flow_control.dest_offset + instr.flow_control.num_instructions); +                } +                  break;              case Instruction::OpCode::IFC: @@ -381,12 +398,15 @@ static void ProcessShaderCode(VertexShaderState& state) {                  }                  if (results[2]) { -                    ++state.if_stack_pointer; - -                    state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; -                    state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; +                    call(state.call_stack, +                         binary_offset + 1, +                         instr.flow_control.dest_offset - binary_offset - 1, +                         instr.flow_control.dest_offset + instr.flow_control.num_instructions);                  } else { -                    state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; +                    call(state.call_stack, +                         instr.flow_control.dest_offset, +                         instr.flow_control.num_instructions, +                         instr.flow_control.dest_offset + instr.flow_control.num_instructions);                  }                  break; @@ -401,15 +421,7 @@ static void ProcessShaderCode(VertexShaderState& state) {              break;          } -        if (increment_pc) -            ++state.program_counter; - -        if (state.if_stack_pointer >= &state.if_stack[0]) { -            if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { -                state.program_counter += state.if_stack_pointer->else_instructions; -                state.if_stack_pointer--; -            } -        } +        ++state.program_counter;          if (exit_loop)              break; @@ -462,12 +474,6 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)      state.conditional_code[0] = false;      state.conditional_code[1] = false; -    boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); -    state.call_stack_pointer = &state.call_stack[0]; - -    std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), -              VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); -    state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly      ProcessShaderCode(state);      DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), | 
