diff options
| author | Dave Airlie <airlied@redhat.com> | 2025-09-15 13:16:53 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2025-09-15 13:16:54 +1000 |
| commit | 2cda9a063dd6f21e5294092a679afdcd0fc58549 (patch) | |
| tree | 26e9fbee5a8887216285f634653d12c50fb50eaf | |
| parent | cf99b26d3081b1f9961cc213415867706d19a414 (diff) | |
| parent | ba391a102ec11ab7f5b249e46dcf61ef836a57e7 (diff) | |
Merge tag 'drm-intel-gt-next-2025-09-12' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next
Driver Changes:
- Include the GuC registers in the error state (Daniele)
- Use memdup_user() (Thorsten)
- Selftest improvements (Jonathan)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://lore.kernel.org/r/aMPCfRObHMg6DZAs@jlahtine-mobl
| -rw-r--r-- | drivers/gpu/drm/i915/gem/i915_gem_context.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 102 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.h | 1 |
6 files changed, 116 insertions, 12 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 15835952352e..ed6599694835 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2158,18 +2158,12 @@ static int set_context_image(struct i915_gem_context *ctx, goto out_ce; } - state = kmalloc(ce->engine->context_size, GFP_KERNEL); - if (!state) { - ret = -ENOMEM; + state = memdup_user(u64_to_user_ptr(user.image), ce->engine->context_size); + if (IS_ERR(state)) { + ret = PTR_ERR(state); goto out_ce; } - if (copy_from_user(state, u64_to_user_ptr(user.image), - ce->engine->context_size)) { - ret = -EFAULT; - goto out_state; - } - shmem_state = shmem_create_from_data(ce->engine->name, state, ce->engine->context_size); if (IS_ERR(shmem_state)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 619c70c54ef9..4f252f704975 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -904,9 +904,7 @@ static void active_engine(struct kthread_work *work) arg->result = PTR_ERR(ce[count]); pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, arg->result); - if (!count) - return; - while (--count) + while (count--) intel_context_put(ce[count]); return; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index e7ccfa520df3..384d1400134d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -46,6 +46,14 @@ static void guc_prepare_xfer(struct intel_gt *gt) /* allows for 5us (in 10ns units) before GT can go to RC6 */ intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF); } + + /* + * Starting from IP 12.50 we need to enable the mirroring of GuC + * internal state to debug registers. This is always enabled on previous + * IPs. + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50)) + intel_uncore_rmw(uncore, GUC_SHIM_CONTROL2, 0, GUC_ENABLE_DEBUG_REG); } static int guc_xfer_rsa_mmio(struct intel_uc_fw *guc_fw, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 3fd798837502..f73dab527547 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -96,6 +96,7 @@ #define GUC_GEN10_SHIM_WC_ENABLE (1<<21) #define GUC_SHIM_CONTROL2 _MMIO(0xc068) +#define GUC_ENABLE_DEBUG_REG (1<<11) #define GUC_IS_PRIVILEGED (1<<29) #define GSC_LOADS_HUC (1<<30) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 89f7c04a3330..7582ef34bf3f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -685,6 +685,74 @@ static void err_print_guc_ctb(struct drm_i915_error_state_buf *m, ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size); } +/* This list includes registers that are useful in debugging GuC hangs. */ +const struct { + u32 start; + u32 count; +} guc_hw_reg_state[] = { + { 0xc0b0, 2 }, + { 0xc000, 65 }, + { 0xc140, 1 }, + { 0xc180, 16 }, + { 0xc1dc, 10 }, + { 0xc300, 79 }, + { 0xc4b4, 47 }, + { 0xc574, 1 }, + { 0xc57c, 1 }, + { 0xc584, 11 }, + { 0xc5c0, 8 }, + { 0xc5e4, 1 }, + { 0xc5ec, 103 }, + { 0xc7c0, 1 }, + { 0xc0b0, 2 } +}; + +static u32 print_range_line(struct drm_i915_error_state_buf *m, u32 start, u32 *dump, u32 count) +{ + if (count >= 8) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", + start, dump[0], dump[1], dump[2], dump[3], + dump[4], dump[5], dump[6], dump[7]); + return 8; + } else if (count >= 4) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", + start, dump[0], dump[1], dump[2], dump[3]); + return 4; + } else if (count >= 2) { + err_printf(m, "[0x%04x] 0x%08x 0x%08x\n", start, dump[0], dump[1]); + return 2; + } + + err_printf(m, "[0x%04x] 0x%08x\n", start, dump[0]); + return 1; +} + +static void err_print_guc_hw_state(struct drm_i915_error_state_buf *m, u32 *hw_state) +{ + u32 total = 0; + int i; + + if (!hw_state) + return; + + err_printf(m, "GuC Register State:\n"); + + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) { + u32 entry = 0; + + while (entry < guc_hw_reg_state[i].count) { + u32 start = guc_hw_reg_state[i].start + entry * sizeof(u32); + u32 count = guc_hw_reg_state[i].count - entry; + u32 *values = hw_state + total + entry; + + entry += print_range_line(m, start, values, count); + } + + GEM_BUG_ON(entry != guc_hw_reg_state[i].count); + total += entry; + } +} + static void err_print_uc(struct drm_i915_error_state_buf *m, const struct intel_uc_coredump *error_uc) { @@ -693,6 +761,7 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp); + err_print_guc_hw_state(m, error_uc->guc.hw_state); intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log); err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence); err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0); @@ -1025,6 +1094,7 @@ static void cleanup_uc(struct intel_uc_coredump *uc) kfree(uc->huc_fw.file_wanted.path); i915_vma_coredump_free(uc->guc.vma_log); i915_vma_coredump_free(uc->guc.vma_ctb); + kfree(uc->guc.hw_state); kfree(uc); } @@ -1721,6 +1791,37 @@ static void gt_record_guc_ctb(struct intel_ctb_coredump *saved, saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr; } +static u32 read_guc_state_reg(struct intel_uncore *uncore, int range, int count) +{ + GEM_BUG_ON(range >= ARRAY_SIZE(guc_hw_reg_state)); + GEM_BUG_ON(count >= guc_hw_reg_state[range].count); + + return intel_uncore_read(uncore, + _MMIO(guc_hw_reg_state[range].start + count * sizeof(u32))); +} + +static void gt_record_guc_hw_state(struct intel_uncore *uncore, + struct intel_uc_coredump *error_uc) +{ + u32 *hw_state; + u32 count = 0; + int i, j; + + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) + count += guc_hw_reg_state[i].count; + + hw_state = kcalloc(count, sizeof(u32), ALLOW_FAIL); + if (!hw_state) + return; + + count = 0; + for (i = 0; i < ARRAY_SIZE(guc_hw_reg_state); i++) + for (j = 0; j < guc_hw_reg_state[i].count; j++) + hw_state[count++] = read_guc_state_reg(uncore, i, j); + + error_uc->guc.hw_state = hw_state; +} + static struct intel_uc_coredump * gt_record_uc(struct intel_gt_coredump *gt, struct i915_vma_compress *compress) @@ -1755,6 +1856,7 @@ gt_record_uc(struct intel_gt_coredump *gt, uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv, uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); + gt_record_guc_hw_state(gt->_gt->uncore, error_uc); return error_uc; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 182324979278..91b3df621a49 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -177,6 +177,7 @@ struct intel_gt_coredump { struct intel_ctb_coredump ctb[2]; struct i915_vma_coredump *vma_ctb; struct i915_vma_coredump *vma_log; + u32 *hw_state; u32 timestamp; u16 last_fence; bool is_guc_capture; |