Skip to content

Commit

Permalink
wined3d: Implement a copy-on-write scheme for whole buffer uploads.
Browse files Browse the repository at this point in the history
For discard maps on deferred contexts, we currently blit from the upload buffer
to the resource. This is necessary because command lists can be reused and
submitted multiple times—we cannot simply have the buffer take ownership, as for
discard maps on immediate contexts.

However, it is very common for applications to use command lists only once
before throwing them away—in essence taking advantage of the feature only for
the multithreading advantages it allows.

Therefore we take advantage of this pattern by trying to rename the buffer
anyway. In order to do this we introduce a refcount for BO pointers. When
writing to a buffer BO, we first check whether the buffer "owns" the BO—i.e.
whether it has a refcount of 1—and if not, we create a new BO for the buffer and
copy the contents of the old BO to the new BO. That is, we perform
mostly-transparent copy-on-write.

This improves performance, and reduces CPU usage, in Assassin's Creed: Unity.
  • Loading branch information
Zebediah Figura authored and julliard committed Aug 10, 2023
1 parent de71649 commit c6b9aa7
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 11 deletions.
87 changes: 78 additions & 9 deletions dlls/wined3d/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,11 @@ static void wined3d_buffer_gl_destroy_buffer_object(struct wined3d_buffer_gl *bu
buffer_gl->b.bo_user.valid = false;
list_remove(&buffer_gl->b.bo_user.entry);
}
wined3d_context_gl_destroy_bo(context_gl, bo_gl);
heap_free(bo_gl);
if (!--bo_gl->b.refcount)
{
wined3d_context_gl_destroy_bo(context_gl, bo_gl);
heap_free(bo_gl);
}
buffer_gl->b.buffer_object = NULL;
}

Expand Down Expand Up @@ -1015,6 +1018,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc

if (flags & WINED3D_MAP_WRITE)
{
wined3d_buffer_acquire_bo_for_write(buffer, context);
wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER);
buffer_invalidate_bo_range(buffer, dirty_offset, dirty_size);
}
Expand Down Expand Up @@ -1127,19 +1131,60 @@ static void wined3d_buffer_set_bo(struct wined3d_buffer *buffer, struct wined3d_
{
struct wined3d_bo_user *bo_user;

/* The previous BO might have users in other buffers which were valid,
* and should in theory remain valid. The problem is that it's not easy
* to tell which users belong to this buffer and which don't. We could
* add a field, but for now it's easier and probably fine to just
* invalidate every user. */
LIST_FOR_EACH_ENTRY(bo_user, &prev_bo->users, struct wined3d_bo_user, entry)
bo_user->valid = false;
list_init(&prev_bo->users);

assert(list_empty(&bo->users));

wined3d_context_destroy_bo(context, prev_bo);
heap_free(prev_bo);
if (!--prev_bo->refcount)
{
wined3d_context_destroy_bo(context, prev_bo);
heap_free(prev_bo);
}
}

buffer->buffer_object = bo;
}

void wined3d_buffer_acquire_bo_for_write(struct wined3d_buffer *buffer, struct wined3d_context *context)
{
const struct wined3d_range range = {.size = buffer->resource.size};
struct wined3d_bo_address dst, src;
struct wined3d_bo *bo;

if (!(bo = buffer->buffer_object))
return;

/* If we are the only owner of this BO, there is nothing to do. */
if (bo->refcount == 1)
return;

TRACE("Performing copy-on-write for BO %p.\n", bo);

/* Grab a reference to the current BO. It's okay if this overflows, because
* the following unload will release it. */
++bo->refcount;

/* Unload and re-prepare to get a new buffer. This is a bit cheap and not
* perfectly idiomatic—we should really just factor out an adapter-agnostic
* function to create a BO and then use wined3d_buffer_set_bo()—but it'll
* do nonetheless. */
wined3d_buffer_unload_location(buffer, context, WINED3D_LOCATION_BUFFER);
wined3d_buffer_prepare_location(buffer, context, WINED3D_LOCATION_BUFFER);

/* And finally, perform the actual copy. */
assert(buffer->buffer_object != bo);
dst.buffer_object = buffer->buffer_object;
dst.addr = NULL;
src.buffer_object = bo;
src.addr = NULL;
wined3d_context_copy_bo_address(context, &dst, &src, 1, &range, WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD);
}

void wined3d_buffer_copy_bo_address(struct wined3d_buffer *dst_buffer, struct wined3d_context *context,
unsigned int dst_offset, const struct wined3d_const_bo_address *src_addr, unsigned int size)
{
Expand All @@ -1151,6 +1196,9 @@ void wined3d_buffer_copy_bo_address(struct wined3d_buffer *dst_buffer, struct wi
if (!dst_offset && size == dst_buffer->resource.size)
map_flags |= WINED3D_MAP_DISCARD;

if (map_flags & WINED3D_MAP_DISCARD)
wined3d_buffer_acquire_bo_for_write(dst_buffer, context);

dst_location = wined3d_buffer_get_memory(dst_buffer, context, &dst_addr);
dst_addr.addr += dst_offset;

Expand Down Expand Up @@ -1182,8 +1230,26 @@ void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_off
void wined3d_buffer_update_sub_resource(struct wined3d_buffer *buffer, struct wined3d_context *context,
const struct upload_bo *upload_bo, unsigned int offset, unsigned int size)
{
if (upload_bo->flags & UPLOAD_BO_RENAME_ON_UNMAP)
struct wined3d_bo *bo = upload_bo->addr.buffer_object;
uint32_t flags = upload_bo->flags;

/* Try to take this buffer for COW. Don't take it if we've saturated the
* refcount. */
if (!offset && size == buffer->resource.size
&& bo && bo->refcount < UINT8_MAX && !(upload_bo->flags & UPLOAD_BO_RENAME_ON_UNMAP))
{
flags |= UPLOAD_BO_RENAME_ON_UNMAP;
++bo->refcount;
}

if (flags & UPLOAD_BO_RENAME_ON_UNMAP)
{
/* Don't increment the refcount. UPLOAD_BO_RENAME_ON_UNMAP transfers an
* existing reference.
*
* FIXME: We could degenerate RENAME to a copy + free and rely on the
* COW logic to detect this case.
*/
wined3d_buffer_set_bo(buffer, context, upload_bo->addr.buffer_object);
wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER);
wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER);
Expand Down Expand Up @@ -1572,8 +1638,11 @@ static void wined3d_buffer_vk_unload_location(struct wined3d_buffer *buffer,
buffer->bo_user.valid = false;
list_remove(&buffer->bo_user.entry);
}
wined3d_context_vk_destroy_bo(context_vk, bo_vk);
heap_free(bo_vk);
if (!--bo_vk->b.refcount)
{
wined3d_context_vk_destroy_bo(context_vk, bo_vk);
heap_free(bo_vk);
}
buffer->buffer_object = NULL;
break;

Expand Down
3 changes: 3 additions & 0 deletions dlls/wined3d/context_gl.c
Original file line number Diff line number Diff line change
Expand Up @@ -4163,6 +4163,7 @@ static void context_gl_load_unordered_access_resources(struct wined3d_context_gl
if (view->resource->type == WINED3D_RTYPE_BUFFER)
{
buffer = buffer_from_resource(view->resource);
wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c);
wined3d_buffer_load_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER);
wined3d_unordered_access_view_invalidate_location(view, ~WINED3D_LOCATION_BUFFER);
wined3d_context_gl_reference_buffer(context_gl, buffer);
Expand Down Expand Up @@ -4193,6 +4194,8 @@ static void context_gl_load_stream_output_buffers(struct wined3d_context_gl *con
if (!(buffer = state->stream_output[i].buffer))
continue;

wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c);

wined3d_buffer_load(buffer, &context_gl->c, state);
wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER);
wined3d_context_gl_reference_buffer(context_gl, buffer);
Expand Down
5 changes: 5 additions & 0 deletions dlls/wined3d/context_vk.c
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ static bool wined3d_context_vk_create_slab_bo(struct wined3d_context_vk *context
*bo = slab->bo;
bo->memory = NULL;
bo->slab = slab;
bo->b.refcount = 1;
bo->b.client_map_count = 0;
bo->b.map_ptr = NULL;
bo->b.buffer_offset = idx * object_size;
Expand Down Expand Up @@ -557,6 +558,7 @@ BOOL wined3d_context_vk_create_bo(struct wined3d_context_vk *context_vk, VkDevic
return FALSE;
}

bo->b.refcount = 1;
bo->b.client_map_count = 0;
bo->b.map_ptr = NULL;
bo->b.buffer_offset = 0;
Expand Down Expand Up @@ -3415,6 +3417,8 @@ static void wined3d_context_vk_load_buffers(struct wined3d_context_vk *context_v
if (!(buffer = state->stream_output[i].buffer))
continue;

wined3d_buffer_acquire_bo_for_write(buffer, &context_vk->c);

buffer_vk = wined3d_buffer_vk(buffer);
wined3d_buffer_load(&buffer_vk->b, &context_vk->c, state);
wined3d_buffer_vk_barrier(buffer_vk, context_vk, WINED3D_BIND_STREAM_OUTPUT);
Expand Down Expand Up @@ -3525,6 +3529,7 @@ static void wined3d_context_vk_load_shader_resources(struct wined3d_context_vk *
uav_vk = wined3d_unordered_access_view_vk(uav);
if (uav->resource->type == WINED3D_RTYPE_BUFFER)
{
wined3d_buffer_acquire_bo_for_write(buffer_from_resource(uav->resource), &context_vk->c);
if (!uav_vk->view_vk.bo_user.valid)
{
wined3d_unordered_access_view_vk_update(uav_vk, context_vk);
Expand Down
7 changes: 5 additions & 2 deletions dlls/wined3d/cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -4494,8 +4494,11 @@ static void wined3d_command_list_destroy_object(void *object)

if ((bo = list->uploads[i].bo))
{
wined3d_context_destroy_bo(context, bo);
heap_free(bo);
if (!--bo->refcount)
{
wined3d_context_destroy_bo(context, bo);
heap_free(bo);
}
}
else
{
Expand Down
1 change: 1 addition & 0 deletions dlls/wined3d/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,7 @@ bool wined3d_device_gl_create_bo(struct wined3d_device_gl *device_gl, struct win
bo->b.memory_offset = bo->b.buffer_offset;
bo->b.map_ptr = NULL;
bo->b.client_map_count = 0;
bo->b.refcount = 1;

return true;
}
Expand Down
10 changes: 10 additions & 0 deletions dlls/wined3d/view.c
Original file line number Diff line number Diff line change
Expand Up @@ -1667,9 +1667,14 @@ void wined3d_unordered_access_view_gl_clear(struct wined3d_unordered_access_view
get_buffer_view_range(buffer, &view_gl->v.desc, &format_gl->f, &offset, &size);

if (!offset && size == buffer->resource.size)
{
wined3d_buffer_prepare_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER);
}
else
{
wined3d_buffer_acquire_bo_for_write(buffer, &context_gl->c);
wined3d_buffer_load_location(buffer, &context_gl->c, WINED3D_LOCATION_BUFFER);
}
wined3d_unordered_access_view_invalidate_location(&view_gl->v, ~WINED3D_LOCATION_BUFFER);

bo_gl = wined3d_bo_gl(buffer->buffer_object);
Expand Down Expand Up @@ -2073,9 +2078,14 @@ void wined3d_unordered_access_view_vk_clear(struct wined3d_unordered_access_view

get_buffer_view_range(buffer, view_desc, view_format, &offset, &size);
if (!offset && size == buffer->resource.size)
{
wined3d_buffer_prepare_location(buffer, &context_vk->c, WINED3D_LOCATION_BUFFER);
}
else
{
wined3d_buffer_acquire_bo_for_write(buffer, &context_vk->c);
wined3d_buffer_load_location(buffer, &context_vk->c, WINED3D_LOCATION_BUFFER);
}
wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_BUFFER);
wined3d_buffer_invalidate_location(buffer, ~WINED3D_LOCATION_BUFFER);
}
Expand Down
7 changes: 7 additions & 0 deletions dlls/wined3d/wined3d_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,11 @@ struct wined3d_bo
size_t memory_offset;
unsigned int client_map_count;
bool coherent;
/* Number of resources referencing this BO, used for COW tracking.
* If a resource has this BO as a location and wants to write to it, it
* needs to make a copy unless it's the only owner (refcount == 1).
* Deferred contexts may also hold a reference. */
uint8_t refcount;
};

struct wined3d_bo_user
Expand Down Expand Up @@ -4402,6 +4407,8 @@ static inline void wined3d_buffer_validate_user(struct wined3d_buffer *buffer)
list_add_head(&buffer->buffer_object->users, &buffer->bo_user.entry);
}

void wined3d_buffer_acquire_bo_for_write(struct wined3d_buffer *buffer,
struct wined3d_context *context) DECLSPEC_HIDDEN;
void wined3d_buffer_cleanup(struct wined3d_buffer *buffer) DECLSPEC_HIDDEN;
void wined3d_buffer_copy(struct wined3d_buffer *dst_buffer, unsigned int dst_offset,
struct wined3d_buffer *src_buffer, unsigned int src_offset, unsigned int size) DECLSPEC_HIDDEN;
Expand Down

0 comments on commit c6b9aa7

Please sign in to comment.