gl_rasterizer: Skip VB upload if the state is clean.

This commit is contained in:
Markus Wick 2018-11-06 21:26:27 +01:00
parent 0072275d25
commit 97f5c4ffd3
9 changed files with 60 additions and 6 deletions

View file

@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
MICROPROFILE_SCOPE(ProcessCommandLists); MICROPROFILE_SCOPE(ProcessCommandLists);
// On entering GPU code, assume all memory may be touched by the ARM core.
maxwell_3d->dirty_flags.OnMemoryWrite();
auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
LOG_TRACE(HW_GPU, LOG_TRACE(HW_GPU,
"Processing method {:08X} on subchannel {} value " "Processing method {:08X} on subchannel {} value "

View file

@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "core/core.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h" #include "video_core/textures/decoders.h"
@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
// All copies here update the main memory, so mark all rasterizer states as invalid.
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
// We have to invalidate the destination region to evict any outdated surfaces from the // We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address // cache. We do this before actually writing the new data because the destination address

View file

@ -3,8 +3,10 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/engines/kepler_memory.h" #include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
namespace Tegra::Engines { namespace Tegra::Engines {
@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
rasterizer.InvalidateRegion(dest_address, sizeof(u32)); rasterizer.InvalidateRegion(dest_address, sizeof(u32));
Memory::Write32(dest_address, data); Memory::Write32(dest_address, data);
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++; state.write_offset++;
} }

View file

@ -123,10 +123,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
if (regs.reg_array[method] != value) { if (regs.reg_array[method] != value) {
regs.reg_array[method] = value; regs.reg_array[method] = value;
// Vertex format
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true; dirty_flags.vertex_attrib_format = true;
} }
// Vertex buffer
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array |=
1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
} }
switch (method) { switch (method) {
@ -258,6 +272,7 @@ void Maxwell3D::ProcessQueryGet() {
query_result.timestamp = CoreTiming::GetTicks(); query_result.timestamp = CoreTiming::GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result)); Memory::WriteBlock(*address, &query_result, sizeof(query_result));
} }
dirty_flags.OnMemoryWrite();
break; break;
} }
default: default:
@ -334,6 +349,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
Memory::Write32(*address, value); Memory::Write32(*address, value);
dirty_flags.OnMemoryWrite();
// Increment the current buffer position. // Increment the current buffer position.
regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;

View file

@ -1014,6 +1014,11 @@ public:
struct DirtyFlags { struct DirtyFlags {
bool vertex_attrib_format = true; bool vertex_attrib_format = true;
u32 vertex_array = 0xFFFFFFFF;
void OnMemoryWrite() {
vertex_array = 0xFFFFFFFF;
}
}; };
DirtyFlags dirty_flags; DirtyFlags dirty_flags;

View file

@ -2,7 +2,9 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "core/core.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h" #include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h" #include "video_core/textures/decoders.h"
@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
return; return;
} }
// All copies here update the main memory, so mark all rasterizer states as invalid.
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,

View file

@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
return std::make_tuple(uploaded_ptr, uploaded_offset); return std::make_tuple(uploaded_ptr, uploaded_offset);
} }
void OGLBufferCache::Map(std::size_t max_size) { bool OGLBufferCache::Map(std::size_t max_size) {
bool invalidate; bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) = std::tie(buffer_ptr, buffer_offset_base, invalidate) =
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
if (invalidate) { if (invalidate) {
InvalidateAll(); InvalidateAll();
} }
return invalidate;
} }
void OGLBufferCache::Unmap() { void OGLBufferCache::Unmap() {

View file

@ -50,7 +50,7 @@ public:
/// Reserves memory to be used by host's CPU. Returns mapped address and offset. /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
void Map(std::size_t max_size); bool Map(std::size_t max_size);
void Unmap(); void Unmap();
GLuint GetHandle() const; GLuint GetHandle() const;

View file

@ -183,15 +183,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
} }
state.draw.vertex_array = VAO.handle; state.draw.vertex_array = VAO.handle;
state.ApplyVertexBufferState(); state.ApplyVertexBufferState();
// Rebinding the VAO invalidates the vertex buffer bindings.
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
} }
void RasterizerOpenGL::SetupVertexBuffer() { void RasterizerOpenGL::SetupVertexBuffer() {
MICROPROFILE_SCOPE(OpenGL_VB); auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
if (!gpu.dirty_flags.vertex_array)
return;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer // Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (~gpu.dirty_flags.vertex_array & (1u << index))
continue;
const auto& vertex_array = regs.vertex_array[index]; const auto& vertex_array = regs.vertex_array[index];
if (!vertex_array.IsEnabled()) if (!vertex_array.IsEnabled())
continue; continue;
@ -218,6 +228,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
// Implicit set by glBindVertexBuffer. Stupid glstate handling... // Implicit set by glBindVertexBuffer. Stupid glstate handling...
state.draw.vertex_buffer = buffer_cache.GetHandle(); state.draw.vertex_buffer = buffer_cache.GetHandle();
gpu.dirty_flags.vertex_array = 0;
} }
DrawParameters RasterizerOpenGL::SetupDraw() { DrawParameters RasterizerOpenGL::SetupDraw() {
@ -575,7 +587,7 @@ void RasterizerOpenGL::DrawArrays() {
return; return;
MICROPROFILE_SCOPE(OpenGL_Drawing); MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
ScopeAcquireGLContext acquire_context{emu_window}; ScopeAcquireGLContext acquire_context{emu_window};
@ -626,7 +638,11 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers // Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
buffer_cache.Map(buffer_size); bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
}
SetupVertexFormat(); SetupVertexFormat();
SetupVertexBuffer(); SetupVertexBuffer();