mesa/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
{
struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
struct amdgpu_winsys *ws = acs->ws;
struct amdgpu_cs_context *cs = acs->cst;
int i, r;
uint32_t bo_list = 0;
uint64_t seq_no = 0;
bool has_user_fence = amdgpu_cs_has_user_fence(cs);
bool use_bo_list_create = ws->info.drm_minor < 27;
struct drm_amdgpu_bo_list_in bo_list_in;
unsigned initial_num_real_buffers = cs->num_real_buffers;
#if DEBUG
/* Prepare the buffer list. */
if (ws->debug_all_bos) {
/* The buffer list contains all buffers. This is a slow path that
* ensures that no buffer is missing in the BO list.
*/
unsigned num_handles = 0;
struct drm_amdgpu_bo_list_entry *list =
alloca(ws->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
struct amdgpu_winsys_bo *bo;
simple_mtx_lock(&ws->global_bo_list_lock);
LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, u.real.global_list_item) {
list[num_handles].bo_handle = bo->u.real.kms_handle;
list[num_handles].bo_priority = 0;
++num_handles;
}
r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers, list, &bo_list);
simple_mtx_unlock(&ws->global_bo_list_lock);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
goto cleanup;
}
} else
#endif
{
if (!amdgpu_add_sparse_backing_buffers(ws, cs)) {
fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n");
r = -ENOMEM;
goto cleanup;
}
struct drm_amdgpu_bo_list_entry *list =
alloca((cs->num_real_buffers + 2) * sizeof(struct drm_amdgpu_bo_list_entry));
unsigned num_handles = 0;
for (i = 0; i < cs->num_real_buffers; ++i) {
struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
assert(buffer->u.real.priority_usage != 0);
list[num_handles].bo_handle = buffer->bo->u.real.kms_handle;
list[num_handles].bo_priority = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
++num_handles;
}
if (use_bo_list_create) {
/* Legacy path creating the buffer list handle and passing it to the CS ioctl. */
r = amdgpu_bo_list_create_raw(ws->dev, num_handles, list, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
goto cleanup;
}
} else {
/* Standard path passing the buffer list via the CS ioctl. */
bo_list_in.operation = ~0;
bo_list_in.list_handle = ~0;
bo_list_in.bo_number = num_handles;
bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list;
}
}
if (acs->ring_type == RING_GFX)
ws->gfx_bo_list_counter += cs->num_real_buffers;
if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) {
r = -ECANCELED;
} else {
struct drm_amdgpu_cs_chunk chunks[7];
unsigned num_chunks = 0;
/* BO list */
if (!use_bo_list_create) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
num_chunks++;
}
/* Fence dependencies. */
unsigned num_dependencies = cs->fence_dependencies.num;
if (num_dependencies) {
struct drm_amdgpu_cs_chunk_dep *dep_chunk =
alloca(num_dependencies * sizeof(*dep_chunk));
for (unsigned i = 0; i < num_dependencies; i++) {
struct amdgpu_fence *fence =
(struct amdgpu_fence*)cs->fence_dependencies.list[i];
assert(util_queue_fence_is_signalled(&fence->submitted));
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
}
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies;
chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
num_chunks++;
}
/* Syncobj dependencies. */
unsigned num_syncobj_dependencies = cs->syncobj_dependencies.num;
if (num_syncobj_dependencies) {
struct drm_amdgpu_cs_chunk_sem *sem_chunk =
alloca(num_syncobj_dependencies * sizeof(sem_chunk[0]));
for (unsigned i = 0; i < num_syncobj_dependencies; i++) {
struct amdgpu_fence *fence =
(struct amdgpu_fence*)cs->syncobj_dependencies.list[i];
if (!amdgpu_fence_is_syncobj(fence))
continue;
assert(util_queue_fence_is_signalled(&fence->submitted));
sem_chunk[i].handle = fence->syncobj;
}
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num_syncobj_dependencies;
chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
num_chunks++;
}
/* Submit the parallel compute IB first. */
if (cs->ib[IB_PARALLEL_COMPUTE].ib_bytes > 0) {
unsigned old_num_chunks = num_chunks;
/* Add compute fence dependencies. */
unsigned num_dependencies = cs->compute_fence_dependencies.num;
if (num_dependencies) {
struct drm_amdgpu_cs_chunk_dep *dep_chunk =
alloca(num_dependencies * sizeof(*dep_chunk));
for (unsigned i = 0; i < num_dependencies; i++) {
struct amdgpu_fence *fence =
(struct amdgpu_fence*)cs->compute_fence_dependencies.list[i];
assert(util_queue_fence_is_signalled(&fence->submitted));
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
}
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_dependencies;
chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
num_chunks++;
}
/* Add compute start fence dependencies. */
unsigned num_start_dependencies = cs->compute_start_fence_dependencies.num;
if (num_start_dependencies) {
struct drm_amdgpu_cs_chunk_dep *dep_chunk =
alloca(num_start_dependencies * sizeof(*dep_chunk));
for (unsigned i = 0; i < num_start_dependencies; i++) {
struct amdgpu_fence *fence =
(struct amdgpu_fence*)cs->compute_start_fence_dependencies.list[i];
assert(util_queue_fence_is_signalled(&fence->submitted));
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
}
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES;
chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num_start_dependencies;
chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
num_chunks++;
}
/* Convert from dwords to bytes. */
cs->ib[IB_PARALLEL_COMPUTE].ib_bytes *= 4;
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PARALLEL_COMPUTE];
num_chunks++;
r = acs->noop ? 0 : amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
num_chunks, chunks, NULL);
if (r)
goto finalize;
/* Back off the compute chunks. */
num_chunks = old_num_chunks;
}
/* Syncobj signals. */
unsigned num_syncobj_to_signal = cs->syncobj_to_signal.num;
if (num_syncobj_to_signal) {
struct drm_amdgpu_cs_chunk_sem *sem_chunk =
alloca(num_syncobj_to_signal * sizeof(sem_chunk[0]));
for (unsigned i = 0; i < num_syncobj_to_signal; i++) {
struct amdgpu_fence *fence =
(struct amdgpu_fence*)cs->syncobj_to_signal.list[i];
assert(amdgpu_fence_is_syncobj(fence));
sem_chunk[i].handle = fence->syncobj;
}
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT;
chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4
* num_syncobj_to_signal;
chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
num_chunks++;
}
/* Fence */
if (has_user_fence) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk;
num_chunks++;
}
/* IB */
if (cs->ib[IB_PREAMBLE].ib_bytes) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_PREAMBLE];
num_chunks++;
}
/* IB */
cs->ib[IB_MAIN].ib_bytes *= 4; /* Convert from dwords to bytes. */
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
num_chunks++;
if (cs->secure) {
cs->ib[IB_PREAMBLE].flags |= AMDGPU_IB_FLAGS_SECURE;
cs->ib[IB_MAIN].flags |= AMDGPU_IB_FLAGS_SECURE;
} else {
cs->ib[IB_PREAMBLE].flags &= ~AMDGPU_IB_FLAGS_SECURE;
cs->ib[IB_MAIN].flags &= ~AMDGPU_IB_FLAGS_SECURE;
}
assert(num_chunks <= ARRAY_SIZE(chunks));
r = acs->noop ? 0 : amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list,
num_chunks, chunks, &seq_no);
}
finalize:
if (r) {
if (r == -ENOMEM)
fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
else if (r == -ECANCELED)
fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
else
fprintf(stderr, "amdgpu: The CS has been rejected, "
"see dmesg for more information (%i).\n", r);
acs->ctx->num_rejected_cs++;
ws->num_total_rejected_cs++;
} else if (!acs->noop) {
/* Success. */
uint64_t *user_fence = NULL;
/* Need to reserve 4 QWORD for user fence:
* QWORD[0]: completed fence
* QWORD[1]: preempted fence
* QWORD[2]: reset fence
* QWORD[3]: preempted then reset
**/
if (has_user_fence)
user_fence = acs->ctx->user_fence_cpu_address_base + acs->ring_type * 4;
amdgpu_fence_submitted(cs->fence, seq_no, user_fence);
}
/* Cleanup. */
if (bo_list)
amdgpu_bo_list_destroy_raw(ws->dev, bo_list);
cleanup:
/* If there was an error, signal the fence, because it won't be signalled
* by the hardware. */
if (r || acs->noop)
amdgpu_fence_signalled(cs->fence);
cs->error_code = r;
/* Only decrement num_active_ioctls for those buffers where we incremented it. */
for (i = 0; i < initial_num_real_buffers; i++)
p_atomic_dec(&cs->real_buffers[i].bo->num_active_ioctls);
for (i = 0; i < cs->num_slab_buffers; i++)
p_atomic_dec(&cs->slab_buffers[i].bo->num_active_ioctls);
for (i = 0; i < cs->num_sparse_buffers; i++)
p_atomic_dec(&cs->sparse_buffers[i].bo->num_active_ioctls);
amdgpu_cs_context_cleanup(ws, cs);
}
libdrm/amdgpu/amdgpu_cs.c
drm_public int amdgpu_cs_submit(amdgpu_context_handle context,
uint64_t flags,
struct amdgpu_cs_request *ibs_request,
uint32_t number_of_requests)
{
uint32_t i;
int r;
if (!context || !ibs_request)
return -EINVAL;
r = 0;
for (i = 0; i < number_of_requests; i++) {
r = amdgpu_cs_submit_one(context, ibs_request);
if (r)
break;
ibs_request++;
}
return r;
}
/**
* Submit command to kernel DRM
* \param dev - \c [in] Device handle
* \param context - \c [in] GPU Context
* \param ibs_request - \c [in] Pointer to submission requests
* \param fence - \c [out] return fence for this submission
*
* \return 0 on success otherwise POSIX Error code
* \sa amdgpu_cs_submit()
*/
static int amdgpu_cs_submit_one(amdgpu_context_handle context,
struct amdgpu_cs_request *ibs_request)
{
struct drm_amdgpu_cs_chunk *chunks;
struct drm_amdgpu_cs_chunk_data *chunk_data;
struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
amdgpu_device_handle dev = context->dev;
struct list_head *sem_list;
amdgpu_semaphore_handle sem, tmp;
uint32_t i, size, num_chunks, bo_list_handle = 0, sem_count = 0;
uint64_t seq_no;
bool user_fence;
int r = 0;
if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
return -EINVAL;
if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
return -EINVAL;
if (ibs_request->number_of_ibs == 0) {
ibs_request->seq_no = AMDGPU_NULL_SUBMIT_SEQ;
return 0;
}
user_fence = (ibs_request->fence_info.handle != NULL);
size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
if (ibs_request->resources)
bo_list_handle = ibs_request->resources->handle;
num_chunks = ibs_request->number_of_ibs;
/* IB chunks */
for (i = 0; i < ibs_request->number_of_ibs; i++) {
struct amdgpu_cs_ib_info *ib;
chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
ib = &ibs_request->ibs[i];
chunk_data[i].ib_data._pad = 0;
chunk_data[i].ib_data.va_start = ib->ib_mc_address;
chunk_data[i].ib_data.ib_bytes = ib->size * 4;
chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
chunk_data[i].ib_data.ring = ibs_request->ring;
chunk_data[i].ib_data.flags = ib->flags;
}
pthread_mutex_lock(&context->sequence_mutex);
if (user_fence) {
i = num_chunks++;
/* fence chunk */
chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
/* fence bo handle */
chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
/* offset */
chunk_data[i].fence_data.offset =
ibs_request->fence_info.offset * sizeof(uint64_t);
}
if (ibs_request->number_of_dependencies) {
dependencies = alloca(sizeof(struct drm_amdgpu_cs_chunk_dep) *
ibs_request->number_of_dependencies);
if (!dependencies) {
r = -ENOMEM;
goto error_unlock;
}
for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
dep->ip_type = info->ip_type;
dep->ip_instance = info->ip_instance;
dep->ring = info->ring;
dep->ctx_id = info->context->id;
dep->handle = info->fence;
}
i = num_chunks++;
/* dependencies chunk */
chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
* ibs_request->number_of_dependencies;
chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
}
drm_public int amdgpu_cs_submit_raw2(amdgpu_device_handle dev,
amdgpu_context_handle context,
uint32_t bo_list_handle,
int num_chunks,
struct drm_amdgpu_cs_chunk *chunks,
uint64_t *seq_no)
{
union drm_amdgpu_cs cs;
uint64_t *chunk_array;
int i, r;
memset(&cs, 0, sizeof(cs));
chunk_array = alloca(sizeof(uint64_t) * num_chunks);
for (i = 0; i < num_chunks; i++)
chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
cs.in.ctx_id = context->id;
cs.in.bo_list_handle = bo_list_handle;
cs.in.num_chunks = num_chunks;
r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CS,
&cs, sizeof(cs));
if (!r && seq_no)
*seq_no = cs.out.handle;
return r;
}
upstream-kernel/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
/* KMS */
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
upstream-kernel/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
int r;
if (amdgpu_ras_intr_triggered())
return -EHWPOISON;
if (!adev->accel_working)
return -EBUSY;
parser.adev = adev;
parser.filp = filp;
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
if (printk_ratelimit())
DRM_ERROR("Failed to initialize parser %d!\n", r);
goto out;
}
r = amdgpu_cs_ib_fill(adev, &parser);
if (r)
goto out;
r = amdgpu_cs_dependencies(adev, &parser);
if (r) {
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
goto out;
}
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n");
else if (r != -ERESTARTSYS && r != -EAGAIN)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
goto out;
}
reserved_buffers = true;
trace_amdgpu_cs_ibs(&parser);
r = amdgpu_cs_vm_handling(&parser);
if (r)
goto out;
r = amdgpu_cs_submit(&parser, cs);
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
return r;
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct drm_sched_entity *entity = p->entity;
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
int r;
job = p->job;
p->job = NULL;
r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
if (r)
goto error_unlock;
/* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
* added to BOs.
*/
mutex_lock(&p->adev->notifier_lock);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
*/
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
}
if (r) {
r = -EAGAIN;
goto error_abort;
}
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
amdgpu_cs_post_dependencies(p);
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
!p->ctx->preamble_presented) {
job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
}
cs->out.handle = seq;
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
trace_amdgpu_cs_ioctl(job);
amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
drm_sched_entity_push_job(&job->base, entity);
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct dma_resv *resv = e->tv.bo->base.resv;
struct dma_fence_chain *chain = e->chain;
if (!chain)
continue;
/*
* Work around dma_resv shortcommings by wrapping up the
* submission in a dma_fence_chain and add it as exclusive
* fence, but first add the submission as shared fence to make
* sure that shared fences never signal before the exclusive
* one.
*/
dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
dma_fence_get(p->fence), 1);
dma_resv_add_shared_fence(resv, p->fence);
rcu_assign_pointer(resv->fence_excl, &chain->base);
e->chain = NULL;
}
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
return 0;
error_abort:
drm_sched_job_cleanup(&job->base);
mutex_unlock(&p->adev->notifier_lock);
error_unlock:
amdgpu_job_free(job);
return r;
}
upstream-kernel/linux/drivers/gpu/drm/scheduler/sched_main.c
/**
* drm_sched_job_init - init a scheduler job
*
* @job: scheduler job to init
* @entity: scheduler entity to use
* @owner: job owner for debugging
*
* Refer to drm_sched_entity_push_job() documentation
* for locking considerations.
*
* Returns 0 for success, negative error code otherwise.
*/
int drm_sched_job_init(struct drm_sched_job *job,
struct drm_sched_entity *entity,
void *owner)
{
struct drm_gpu_scheduler *sched;
drm_sched_entity_select_rq(entity);
if (!entity->rq)
return -ENOENT;
sched = entity->rq->sched;
job->sched = sched;
job->entity = entity;
job->s_priority = entity->rq - sched->sched_rq;
job->s_fence = drm_sched_fence_create(entity, owner);
if (!job->s_fence)
return -ENOMEM;
job->id = atomic64_inc_return(&sched->job_id_count);
INIT_LIST_HEAD(&job->list);
return 0;
}
EXPORT_SYMBOL(drm_sched_job_init);