From ea51a3af261254e5455f63a0ef41e55ef1dfc471 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 22 Aug 2017 09:49:26 +0300 Subject: [PATCH 1/3] SwRasterizer: implement custom clip plane --- src/video_core/regs_rasterizer.h | 14 ++++++++++++-- src/video_core/swrasterizer/clipper.cpp | 15 +++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 2874fd1272..4fef00d763 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -5,10 +5,10 @@ #pragma once #include - #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/pica_types.h" namespace Pica { @@ -31,7 +31,17 @@ struct RasterizerRegs { BitField<0, 24, u32> viewport_size_y; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x3); + + BitField<0, 1, u32> clip_enable; + BitField<0, 24, u32> clip_coef[4]; // float24 + + Math::Vec4 GetClipCoef() const { + return {float24::FromRaw(clip_coef[0]), float24::FromRaw(clip_coef[1]), + float24::FromRaw(clip_coef[2]), float24::FromRaw(clip_coef[3])}; + } + + INSERT_PADDING_WORDS(0x1); BitField<0, 24, u32> viewport_depth_range; // float24 BitField<0, 24, u32> viewport_depth_near_plane; // float24 diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index cdbc715026..cc76ba5555 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -127,8 +127,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu // Simple implementation of the Sutherland-Hodgman clipping algorithm. // TODO: Make this less inefficient (currently lots of useless buffering overhead happens here) - for (auto edge : clipping_edges) { - + auto Clip = [&](const ClippingEdge& edge) { std::swap(input_list, output_list); output_list->clear(); @@ -147,12 +146,24 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu } reference_vertex = &vertex; } + }; + + for (auto edge : clipping_edges) { + Clip(edge); // Need to have at least a full triangle to continue... if (output_list->size() < 3) return; } + if (g_state.regs.rasterizer.clip_enable) { + ClippingEdge custom_edge{-g_state.regs.rasterizer.GetClipCoef()}; + Clip(custom_edge); + + if (output_list->size() < 3) + return; + } + InitScreenCoordinates((*output_list)[0]); InitScreenCoordinates((*output_list)[1]); From addbcd5784c8195f49cecc20834537c80d1c8c72 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 22 Aug 2017 09:49:53 +0300 Subject: [PATCH 2/3] gl_rasterizer: implement custom clip plane --- .../renderer_opengl/gl_rasterizer.cpp | 28 +++++++ .../renderer_opengl/gl_rasterizer.h | 9 ++- .../renderer_opengl/gl_shader_gen.cpp | 80 +++++++++++-------- 3 files changed, 83 insertions(+), 34 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index aa95ef21da..7b0cd1b667 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -169,6 +169,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); // Sync fixed function OpenGL state + SyncClipEnabled(); + SyncClipCoef(); SyncCullMode(); SyncBlendEnabled(); SyncBlendFuncs(); @@ -401,6 +403,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncCullMode(); break; + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[0], 0x48): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[1], 0x49): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[2], 0x4a): + case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[3], 0x4b): + SyncClipCoef(); + break; + // Depth modifiers case PICA_REG_INDEX(rasterizer.viewport_depth_range): SyncDepthScale(); @@ -1280,6 +1294,20 @@ void RasterizerOpenGL::SetShader() { } } +void RasterizerOpenGL::SyncClipEnabled() { + state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0; +} + +void RasterizerOpenGL::SyncClipCoef() { + const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef(); + const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + if (new_clip_coef != uniform_block_data.data.clip_coef) { + uniform_block_data.data.clip_coef = new_clip_coef; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncCullMode() { const auto& regs = Pica::g_state.regs; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 78e218efe1..46c62961c3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -151,14 +151,21 @@ private: LightSrc light_src[8]; alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages alignas(16) GLvec4 tev_combiner_buffer_color; + alignas(16) GLvec4 clip_coef; }; static_assert( - sizeof(UniformData) == 0x460, + sizeof(UniformData) == 0x470, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the PICA register + void SyncClipCoef(); + /// Sets the OpenGL shader in accordance with the current PICA register state void SetShader(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 015e69da97..aa60b2e7f2 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -24,6 +24,42 @@ using TevStageConfig = TexturingRegs::TevStageConfig; namespace GLShader { +static const std::string UniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 + +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; + +layout (std140) uniform shader_data { + vec2 framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec4 clip_coef; +}; +)"; + PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { PicaShaderConfig res; @@ -1008,8 +1044,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core -#define NUM_TEV_STAGES 6 -#define NUM_LIGHTS 8 in vec4 primary_color; in vec2 texcoord[3]; @@ -1021,36 +1055,6 @@ in vec4 gl_FragCoord; out vec4 color; -struct LightSrc { - vec3 specular_0; - vec3 specular_1; - vec3 diffuse; - vec3 ambient; - vec3 position; - vec3 spot_direction; - float dist_atten_bias; - float dist_atten_scale; -}; - -layout (std140) uniform shader_data { - vec2 framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - vec3 fog_color; - vec2 proctex_noise_f; - vec2 proctex_noise_a; - vec2 proctex_noise_p; - vec3 lighting_global_ambient; - LightSrc light_src[NUM_LIGHTS]; - vec4 const_color[NUM_TEV_STAGES]; - vec4 tev_combiner_buffer_color; -}; - uniform sampler2D tex[3]; uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; @@ -1059,7 +1063,11 @@ uniform samplerBuffer proctex_color_map; uniform samplerBuffer proctex_alpha_map; uniform samplerBuffer proctex_lut; uniform samplerBuffer proctex_diff_lut; +)"; + out += UniformBlockDef; + + out += R"( // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); @@ -1190,6 +1198,12 @@ out float texcoord0_w; out vec4 normquat; out vec3 view; +)"; + + out += UniformBlockDef; + + out += R"( + void main() { primary_color = vert_color; texcoord[0] = vert_texcoord0; @@ -1200,7 +1214,7 @@ void main() { view = vert_view; gl_Position = vert_position; gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 - // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane + gl_ClipDistance[1] = dot(clip_coef, vert_position); } )"; From 417cb45e3fc20a7529ce5d548ba0fbc36ea0a621 Mon Sep 17 00:00:00 2001 From: wwylele Date: Tue, 22 Aug 2017 09:47:15 +0300 Subject: [PATCH 3/3] SwRasterizer/Clipper: flip the sign convention to match PICA and OpenGL --- src/video_core/swrasterizer/clipper.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index cc76ba5555..a52129eb76 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -31,7 +31,7 @@ public: : coeffs(coeffs), bias(bias) {} bool IsInside(const Vertex& vertex) const { - return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); + return Math::Dot(vertex.pos + bias, coeffs) >= float24::FromFloat32(0); } bool IsOutSide(const Vertex& vertex) const { @@ -116,13 +116,13 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu static const float24 f0 = float24::FromFloat32(0.0); static const float24 f1 = float24::FromFloat32(1.0); static const std::array clipping_edges = {{ - {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w - {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w - {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w - {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w - {Math::MakeVec(f0, f0, f1, f0)}, // z = 0 - {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w - {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4(f0, f0, f0, EPSILON)}, // w = EPSILON + {Math::MakeVec(-f1, f0, f0, f1)}, // x = +w + {Math::MakeVec(f1, f0, f0, f1)}, // x = -w + {Math::MakeVec(f0, -f1, f0, f1)}, // y = +w + {Math::MakeVec(f0, f1, f0, f1)}, // y = -w + {Math::MakeVec(f0, f0, -f1, f0)}, // z = 0 + {Math::MakeVec(f0, f0, f1, f1)}, // z = -w + {Math::MakeVec(f0, f0, f0, f1), Math::Vec4(f0, f0, f0, EPSILON)}, // w = EPSILON }}; // Simple implementation of the Sutherland-Hodgman clipping algorithm. @@ -157,7 +157,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu } if (g_state.regs.rasterizer.clip_enable) { - ClippingEdge custom_edge{-g_state.regs.rasterizer.GetClipCoef()}; + ClippingEdge custom_edge{g_state.regs.rasterizer.GetClipCoef()}; Clip(custom_edge); if (output_list->size() < 3)