From c4015cd93a314878b6fc24e9a7f711e4b485dc53 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 12 Jul 2018 22:25:03 -0400 Subject: [PATCH 1/2] gl_shader_gen: Implement dual vertex shader mode. - When VertexA shader stage is enabled, we combine with VertexB program to make a single Vertex Shader stage. --- .../renderer_opengl/gl_rasterizer.cpp | 43 ++++++++--- .../renderer_opengl/gl_shader_decompiler.cpp | 71 ++++++++++--------- .../renderer_opengl/gl_shader_decompiler.h | 3 +- .../renderer_opengl/gl_shader_gen.cpp | 44 +++++++++--- .../renderer_opengl/gl_shader_gen.h | 33 ++++++++- 5 files changed, 139 insertions(+), 55 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ea138d4029..02ffd9bde3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -181,6 +181,19 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, return {array_ptr, buffer_offset}; } +static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) { + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + auto& shader_config = gpu.regs.shader_config[static_cast(program)]; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const boost::optional cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; + Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + + return program_code; +} + void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { // Helper function for uploading uniform data const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { @@ -193,18 +206,17 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { }; auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); - ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. u32 current_constbuffer_bindpoint = uniform_buffers.size(); u32 current_texture_bindpoint = 0; - for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast(index)}; - const auto& stage = index - 1; // Stage indices are 0 - 5 + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 const bool is_enabled = gpu.IsShaderStageEnabled(static_cast(stage)); @@ -228,16 +240,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { buffer_ptr += sizeof(GLShader::MaxwellUniformData); buffer_offset += sizeof(GLShader::MaxwellUniformData); - // Fetch program code from memory - GLShader::ProgramCode program_code; - const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; - const boost::optional cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; - Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); - GLShader::ShaderSetup setup{std::move(program_code)}; - + GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; GLShader::ShaderEntries shader_resources; switch (program) { + case Maxwell::ShaderProgram::VertexA: { + // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. + // Conventional HW does not support this, so we combine VertexA and VertexB into one + // stage here. + setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB)); + GLShader::MaxwellVSConfig vs_config{setup}; + shader_resources = + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::VertexB: { GLShader::MaxwellVSConfig vs_config{setup}; shader_resources = @@ -268,6 +285,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { current_texture_bindpoint = SetupTextures(static_cast(stage), gl_stage_program, current_texture_bindpoint, shader_resources.texture_samplers); + + // When VertexA is enabled, we have dual vertex shaders + if (program == Maxwell::ShaderProgram::VertexA) { + // VertexB was combined with VertexA, so we skip the VertexB iteration + index++; + } } shader_program_manager->UseTrivialGeometryShader(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 96a4ca6fe6..5fae95788e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -42,13 +42,14 @@ enum class ExitMethod { struct Subroutine { /// Generates a name suitable for GLSL source code. std::string GetName() const { - return "sub_" + std::to_string(begin) + '_' + std::to_string(end); + return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix; } - u32 begin; ///< Entry point of the subroutine. - u32 end; ///< Return point of the subroutine. - ExitMethod exit_method; ///< Exit method of the subroutine. - std::set labels; ///< Addresses refereced by JMP instructions. + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set labels; ///< Addresses refereced by JMP instructions. bool operator<(const Subroutine& rhs) const { return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); @@ -58,11 +59,11 @@ struct Subroutine { /// Analyzes shader code and produces a set of subroutines. class ControlFlowAnalyzer { public: - ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) : program_code(program_code) { // Recursively finds all subroutines. - const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); if (program_main.exit_method != ExitMethod::AlwaysEnd) throw DecompileFail("Program does not always end"); } @@ -77,12 +78,12 @@ private: std::map, ExitMethod> exit_method_map; /// Adds and analyzes a new subroutine if it is not added yet. - const Subroutine& AddSubroutine(u32 begin, u32 end) { - auto iter = subroutines.find(Subroutine{begin, end}); + const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { + auto iter = subroutines.find(Subroutine{begin, end, suffix}); if (iter != subroutines.end()) return *iter; - Subroutine subroutine{begin, end}; + Subroutine subroutine{begin, end, suffix}; subroutine.exit_method = Scan(begin, end, subroutine.labels); if (subroutine.exit_method == ExitMethod::Undetermined) throw DecompileFail("Recursive function detected"); @@ -191,7 +192,8 @@ public: UnsignedInteger, }; - GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {} + GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix) + : index{index}, shader{shader}, suffix{suffix} {} /// Gets the GLSL type string for a register static std::string GetTypeString(Type type) { @@ -216,7 +218,7 @@ public: /// Returns a GLSL string representing the current state of the register const std::string GetActiveString() { declr_type.insert(active_type); - return GetPrefixString(active_type) + std::to_string(index); + return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix; } /// Returns true if the active type is a float @@ -251,6 +253,7 @@ private: ShaderWriter& shader; Type active_type{Type::Float}; std::set declr_type; + const std::string& suffix; }; /** @@ -262,8 +265,8 @@ private: class GLSLRegisterManager { public: GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, - const Maxwell3D::Regs::ShaderStage& stage) - : shader{shader}, declarations{declarations}, stage{stage} { + const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix) + : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} { BuildRegisterList(); } @@ -430,12 +433,12 @@ public: } /// Add declarations for registers - void GenerateDeclarations() { + void GenerateDeclarations(const std::string& suffix) { for (const auto& reg : regs) { for (const auto& type : reg.DeclaredTypes()) { declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' + - GLSLRegister::GetPrefixString(type) + - std::to_string(reg.GetIndex()) + " = 0;"); + reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) + + '_' + suffix + " = 0;"); } } declarations.AddNewLine(); @@ -558,7 +561,7 @@ private: /// Build the GLSL register list. void BuildRegisterList() { for (size_t index = 0; index < Register::NumRegisters; ++index) { - regs.emplace_back(index, shader); + regs.emplace_back(index, shader, suffix); } } @@ -620,16 +623,17 @@ private: std::array declr_const_buffers; std::vector used_samplers; const Maxwell3D::Regs::ShaderStage& stage; + const std::string& suffix; }; class GLSLGenerator { public: GLSLGenerator(const std::set& subroutines, const ProgramCode& program_code, - u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) : subroutines(subroutines), program_code(program_code), main_offset(main_offset), - stage(stage) { + stage(stage), suffix(suffix) { - Generate(); + Generate(suffix); } std::string GetShaderCode() { @@ -644,7 +648,7 @@ public: private: /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { - auto iter = subroutines.find(Subroutine{begin, end}); + auto iter = subroutines.find(Subroutine{begin, end, suffix}); ASSERT(iter != subroutines.end()); return *iter; } @@ -689,7 +693,7 @@ private: // Can't assign to the constant predicate. ASSERT(pred != static_cast(Pred::UnusedIndex)); - std::string variable = 'p' + std::to_string(pred); + std::string variable = 'p' + std::to_string(pred) + '_' + suffix; shader.AddLine(variable + " = " + value + ';'); declr_predicates.insert(std::move(variable)); } @@ -707,7 +711,7 @@ private: if (index == static_cast(Pred::UnusedIndex)) variable = "true"; else - variable = 'p' + std::to_string(index); + variable = 'p' + std::to_string(index) + '_' + suffix; if (negate) { return "!(" + variable + ')'; @@ -1728,7 +1732,7 @@ private: return program_counter; } - void Generate() { + void Generate(const std::string& suffix) { // Add declarations for all subroutines for (const auto& subroutine : subroutines) { shader.AddLine("bool " + subroutine.GetName() + "();"); @@ -1736,7 +1740,7 @@ private: shader.AddNewLine(); // Add the main entry point - shader.AddLine("bool exec_shader() {"); + shader.AddLine("bool exec_" + suffix + "() {"); ++shader.scope; CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); --shader.scope; @@ -1799,7 +1803,7 @@ private: /// Add declarations for registers void GenerateDeclarations() { - regs.GenerateDeclarations(); + regs.GenerateDeclarations(suffix); for (const auto& pred : declr_predicates) { declarations.AddLine("bool " + pred + " = false;"); @@ -1812,27 +1816,30 @@ private: const ProgramCode& program_code; const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; + const std::string& suffix; ShaderWriter shader; ShaderWriter declarations; - GLSLRegisterManager regs{shader, declarations, stage}; + GLSLRegisterManager regs{shader, declarations, stage, suffix}; // Declarations std::set declr_predicates; }; // namespace Decompiler std::string GetCommonDeclarations() { - std::string declarations = "bool exec_shader();\n"; + std::string declarations; declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); + declarations += '\n'; return declarations; } boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage) { + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix) { try { - auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); - GLSLGenerator generator(subroutines, program_code, main_offset, stage); + auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 382c76b7a3..7610dad3a0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D; std::string GetCommonDeclarations(); boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset, - Maxwell3D::Regs::ShaderStage stage); + Maxwell3D::Regs::ShaderStage stage, + const std::string& suffix); } // namespace Decompiler } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index c1e6fac9f2..129c777d18 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); + out += "bool exec_vertex();\n"; + + if (setup.IsDualProgram()) { + out += "bool exec_vertex_b();\n"; + } + + ProgramResult program = + Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex, "vertex") + .get_value_or({}); - ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, - Maxwell3D::Regs::ShaderStage::Vertex) - .get_value_or({}); out += R"( out gl_PerVertex { @@ -34,7 +41,14 @@ layout (std140) uniform vs_config { }; void main() { - exec_shader(); + exec_vertex(); +)"; + + if (setup.IsDualProgram()) { + out += " exec_vertex_b();"; + } + + out += R"( // Viewport can be flipped, which is unsupported by glViewport position.xy *= viewport_flip.xy; @@ -44,8 +58,19 @@ void main() { // For now, this is here to bring order in lieu of proper emulation position.w = 1.0; } + )"; + out += program.first; + + if (setup.IsDualProgram()) { + ProgramResult program_b = + Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") + .get_value_or({}); + out += program_b.first; + } + return {out, program.second}; } @@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo std::string out = "#version 430 core\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += Decompiler::GetCommonDeclarations(); + out += "bool exec_fragment();\n"; - ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, - Maxwell3D::Regs::ShaderStage::Fragment) - .get_value_or({}); + ProgramResult program = + Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, + Maxwell3D::Regs::ShaderStage::Fragment, "fragment") + .get_value_or({}); out += R"( - in vec4 position; out vec4 color; @@ -67,7 +93,7 @@ layout (std140) uniform fs_config { }; void main() { - exec_shader(); + exec_fragment(); } )"; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index ed890e0f9a..4729ce0fc8 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -115,21 +115,48 @@ struct ShaderEntries { using ProgramResult = std::pair; struct ShaderSetup { - ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + ShaderSetup(const ProgramCode& program_code) { + program.code = program_code; + } + + struct { + ProgramCode code; + ProgramCode code_b; // Used for dual vertex shaders + } program; - ProgramCode program_code; bool program_code_hash_dirty = true; u64 GetProgramCodeHash() { if (program_code_hash_dirty) { - program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash = GetNewHash(); program_code_hash_dirty = false; } return program_code_hash; } + /// Used in scenarios where we have a dual vertex shaders + void SetProgramB(const ProgramCode& program_b) { + program.code_b = program_b; + has_program_b = true; + } + + bool IsDualProgram() const { + return has_program_b; + } + private: + u64 GetNewHash() const { + if (has_program_b) { + // Compute hash over dual shader programs + return Common::ComputeHash64(&program, sizeof(program)); + } else { + // Compute hash over a single shader program + return Common::ComputeHash64(&program.code, program.code.size()); + } + } + u64 program_code_hash{}; + bool has_program_b{}; }; struct MaxwellShaderConfigCommon { From 8aeff9cf8e84c27ab83cea8df1a94ce8082efc78 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 12 Jul 2018 22:57:57 -0400 Subject: [PATCH 2/2] gl_rasterizer: Fix check for if a shader stage is enabled. --- src/video_core/engines/maxwell_3d.cpp | 21 ------------------- src/video_core/engines/maxwell_3d.h | 11 +++++++--- .../renderer_opengl/gl_rasterizer.cpp | 14 +++---------- 3 files changed, 11 insertions(+), 35 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 3bca16364e..dfbf80abdb 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -398,27 +398,6 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const { - // The Vertex stage is always enabled. - if (stage == Regs::ShaderStage::Vertex) - return true; - - switch (stage) { - case Regs::ShaderStage::TesselationControl: - return regs.shader_config[static_cast(Regs::ShaderProgram::TesselationControl)] - .enable != 0; - case Regs::ShaderStage::TesselationEval: - return regs.shader_config[static_cast(Regs::ShaderProgram::TesselationEval)] - .enable != 0; - case Regs::ShaderStage::Geometry: - return regs.shader_config[static_cast(Regs::ShaderProgram::Geometry)].enable != 0; - case Regs::ShaderStage::Fragment: - return regs.shader_config[static_cast(Regs::ShaderProgram::Fragment)].enable != 0; - } - - UNREACHABLE(); -} - void Maxwell3D::ProcessClearBuffers() { ASSERT(regs.clear_buffers.R == regs.clear_buffers.G && regs.clear_buffers.R == regs.clear_buffers.B && diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5a7cf01072..6f0170ff7c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -379,6 +379,14 @@ public: } }; + bool IsShaderConfigEnabled(size_t index) const { + // The VertexB is always enabled. + if (index == static_cast(Regs::ShaderProgram::VertexB)) { + return true; + } + return shader_config[index].enable != 0; + } + union { struct { INSERT_PADDING_WORDS(0x45); @@ -780,9 +788,6 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; - /// Returns whether the specified shader stage is enabled or not. - bool IsShaderStageEnabled(Regs::ShaderStage stage) const; - private: std::unordered_map> uploaded_macros; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 02ffd9bde3..4072a12b44 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -216,15 +216,13 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast(index)}; - const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 - - const bool is_enabled = gpu.IsShaderStageEnabled(static_cast(stage)); - // Skip stages that are not enabled - if (!is_enabled) { + if (!gpu.regs.IsShaderConfigEnabled(index)) { continue; } + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 + GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); @@ -628,9 +626,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr auto& gpu = Core::System::GetInstance().GPU(); auto& maxwell3d = gpu.Get3DEngine(); - ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), - "Attempted to upload constbuffer of disabled shader stage"); - // Reset all buffer draw state for this stage. for (auto& buffer : state.draw.const_buffers[static_cast(stage)]) { buffer.bindpoint = 0; @@ -697,9 +692,6 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, auto& gpu = Core::System::GetInstance().GPU(); auto& maxwell3d = gpu.Get3DEngine(); - ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage), - "Attempted to upload textures of disabled shader stage"); - ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), "Exceeded the number of active textures.");