mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2
Add AMDGPU_CTX_QUERY2_FLAGS_RAS_CE/UE which indicate if any error happened between previous query and this query. Signed-off-by: xinhui pan <xinhui.pan@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
791c47694f
commit
ae363a212b
@ -26,6 +26,7 @@
|
||||
#include <drm/drm_auth.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_sched.h"
|
||||
#include "amdgpu_ras.h"
|
||||
|
||||
#define to_amdgpu_ctx_entity(e) \
|
||||
container_of((e), struct amdgpu_ctx_entity, entity)
|
||||
@ -344,6 +345,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
|
||||
{
|
||||
struct amdgpu_ctx *ctx;
|
||||
struct amdgpu_ctx_mgr *mgr;
|
||||
uint32_t ras_counter;
|
||||
|
||||
if (!fpriv)
|
||||
return -EINVAL;
|
||||
@ -368,6 +370,21 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
|
||||
if (atomic_read(&ctx->guilty))
|
||||
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
|
||||
|
||||
/*query ue count*/
|
||||
ras_counter = amdgpu_ras_query_error_count(adev, false);
|
||||
/*ras counter is monotonic increasing*/
|
||||
if (ras_counter != ctx->ras_counter_ue) {
|
||||
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
|
||||
ctx->ras_counter_ue = ras_counter;
|
||||
}
|
||||
|
||||
/*query ce count*/
|
||||
ras_counter = amdgpu_ras_query_error_count(adev, true);
|
||||
if (ras_counter != ctx->ras_counter_ce) {
|
||||
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
|
||||
ctx->ras_counter_ce = ras_counter;
|
||||
}
|
||||
|
||||
mutex_unlock(&mgr->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -49,6 +49,8 @@ struct amdgpu_ctx {
|
||||
enum drm_sched_priority override_priority;
|
||||
struct mutex lock;
|
||||
atomic_t guilty;
|
||||
uint32_t ras_counter_ce;
|
||||
uint32_t ras_counter_ue;
|
||||
};
|
||||
|
||||
struct amdgpu_ctx_mgr {
|
||||
|
@ -210,6 +210,9 @@ union drm_amdgpu_bo_list {
|
||||
#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
|
||||
/* indicate some job from this context once cause gpu hang */
|
||||
#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2)
|
||||
/* indicate some errors are detected by RAS */
|
||||
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3)
|
||||
#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4)
|
||||
|
||||
/* Context priority level */
|
||||
#define AMDGPU_CTX_PRIORITY_UNSET -2048
|
||||
|
Loading…
Reference in New Issue
Block a user