BufferBase: Don't ignore GPU pages.

This commit is contained in:
Fernando Sahmkow 2023-01-05 06:43:54 -05:00
parent 2d0c4f2b1d
commit b56ad93bbc
8 changed files with 22 additions and 23 deletions

View file

@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
int num = 0;
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
REQUIRE(num == 0);
REQUIRE(num == 1);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();

View file

@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
const u64 word = current_word;
u64 page = page_begin;
page_begin = 0;
@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@ -539,8 +539,7 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
@ -564,7 +563,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@ -574,8 +573,7 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
const u64 word = state_words[word_index];
if (word == 0) {
continue;
}

View file

@ -123,7 +123,7 @@ void MaxwellDMA::Launch() {
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_buffer.data(), tmp_buffer.size());
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
tmp_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@ -143,7 +143,7 @@ void MaxwellDMA::Launch() {
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
regs.line_length_in);
}
}
}

View file

@ -3,6 +3,7 @@
#pragma once
#include <utility>
#include <vector>
#include "common/common_types.h"
@ -26,8 +27,8 @@ public:
if (address >= start_address && address + size <= last_collection) [[likely]] {
return;
}
size = (address + size + atomicy_side_mask) & atomicy_mask - address;
address = address & atomicy_mask;
size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
address = address & atomicity_mask;
if (!has_collected) [[unlikely]] {
reset_values();
has_collected = true;
@ -64,10 +65,10 @@ public:
}
private:
static constexpr size_t atomicy_bits = 5;
static constexpr size_t atomicy_size = 1ULL << atomicy_bits;
static constexpr size_t atomicy_side_mask = atomicy_size - 1;
static constexpr size_t atomicy_mask = ~atomicy_side_mask;
static constexpr size_t atomicity_bits = 5;
static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
static constexpr size_t atomicity_size_mask = atomicity_size - 1;
static constexpr size_t atomicity_mask = ~atomicity_size_mask;
GPUVAddr start_address{};
GPUVAddr last_collection{};
size_t accumulated_size{};

View file

@ -46,7 +46,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
entries.resize(page_table_size / 32, 0);
if (!Settings::IsGPULevelExtreme()) {
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
} else {
fastmem_arena = nullptr;

View file

@ -107,7 +107,7 @@ public:
* will be returned;
*/
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
std::size_t size) const;
std::size_t size) const;
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);

View file

@ -97,7 +97,7 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
for (const auto [cpu_addr, size] : sequences) {
for (const auto& [cpu_addr, size] : sequences) {
InvalidateRegion(cpu_addr, size);
}
}

View file

@ -486,18 +486,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
{
std::scoped_lock lock{texture_cache.mutex};
for (const auto [addr, size] : sequences) {
for (const auto& [addr, size] : sequences) {
texture_cache.WriteMemory(addr, size);
}
}
{
std::scoped_lock lock{buffer_cache.mutex};
for (const auto [addr, size] : sequences) {
for (const auto& [addr, size] : sequences) {
buffer_cache.WriteMemory(addr, size);
}
}
{
for (const auto [addr, size] : sequences) {
for (const auto& [addr, size] : sequences) {
query_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
}