diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 193 |
1 files changed, 182 insertions, 11 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 14904c4a6025..52ef481548b6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -68,6 +68,8 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; @@ -81,12 +83,17 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Make sure to wrap wptr if we need to */ wptr = get_wptr(ring); - spin_unlock_irqrestore(&ring->preempt_lock, flags); - - /* Make sure everything is posted before making a decision */ - mb(); + /* Update HW if this is the current ring and we are not in preempt*/ + if (!a6xx_in_preempt(a6xx_gpu)) { + if (a6xx_gpu->cur_ring == ring) + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + else + ring->restore_wptr = true; + } else { + ring->restore_wptr = true; + } - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + spin_unlock_irqrestore(&ring->preempt_lock, flags); } static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, @@ -138,12 +145,14 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, /* * Write the new TTBR0 to the memstore. This is good for debugging. + * Needed for preemption */ - OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_PKT7(ring, CP_MEM_WRITE, 5); OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); - OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr)); + OUT_RING(ring, upper_32_bits(ttbr)); + OUT_RING(ring, ctx->seqno); /* * Sync both threads after switching pagetables and enable BR only @@ -268,6 +277,34 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) a6xx_flush(gpu, ring); } +static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, + struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) +{ + OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); + + OUT_RING(ring, SMMU_INFO); + /* don't save SMMU, we write the record from the kernel instead */ + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* privileged and non secure buffer save */ + OUT_RING(ring, NON_SECURE_SAVE_ADDR); + OUT_RING(ring, lower_32_bits( + a6xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits( + a6xx_gpu->preempt_iova[ring->id])); + + /* user context buffer save, seems to be unnused by fw */ + OUT_RING(ring, NON_PRIV_SAVE_ADDR); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + OUT_RING(ring, COUNTER); + /* seems OK to set to 0 to disable it */ + OUT_RING(ring, 0); + OUT_RING(ring, 0); +} + static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; @@ -285,6 +322,13 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); + /* + * If preemption is enabled, then set the pseudo register for the save + * sequence + */ + if (gpu->nr_rings > 1) + a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); + get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_start)); get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, @@ -376,6 +420,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); OUT_RING(ring, submit->seqno); + a6xx_gpu->last_seqno[ring->id] = submit->seqno; + /* write the ringbuffer timestamp */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); @@ -389,10 +435,32 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x100); /* IFPC enable */ + /* If preemption is enabled */ + if (gpu->nr_rings > 1) { + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + + /* + * If dword[2:1] are non zero, they specify an address for + * the CP to write the value of dword[3] to on preemption + * complete. Write 0 to skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + } + + trace_msm_gpu_submit_flush(submit, gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); a6xx_flush(gpu, ring); + + /* Check to see if we need to start preemption */ + a6xx_preempt_trigger(gpu); } static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) @@ -599,6 +667,77 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) adreno_gpu->ubwc_config.macrotile_mode); } +static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + const struct adreno_reglist_list *reglist; + void *ptr = a6xx_gpu->pwrup_reglist_ptr; + struct cpu_gpu_lock *lock = ptr; + u32 *dest = (u32 *)&lock->regs[0]; + int i; + + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + + lock->gpu_req = lock->cpu_req = lock->turn = 0; + lock->ifpc_list_len = 0; + lock->preemption_list_len = reglist->count; + + /* + * For each entry in each of the lists, write the offset and the current + * register value into the GPU buffer + */ + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + + /* + * The overall register list is composed of + * 1. Static IFPC-only registers + * 2. Static IFPC + preemption registers + * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) + * + * The first two lists are static. Size of these lists are stored as + * number of pairs in ifpc_list_len and preemption_list_len + * respectively. With concurrent binning, Some of the perfcounter + * registers being virtualized, CP needs to know the pipe id to program + * the aperture inorder to restore the same. Thus, third list is a + * dynamic list with triplets as + * (<aperture, shifted 12 bits> <address> <data>), and the length is + * stored as number for triplets in dynamic_list_len. + */ + lock->dynamic_list_len = 0; +} + +static int a7xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings <= 1) + return 0; + + /* Turn CP protection off */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + + a6xx_flush(gpu, ring); + + return a6xx_idle(gpu, ring) ? 0 : -EINVAL; +} + static int a6xx_cp_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb[0]; @@ -630,6 +769,8 @@ static int a6xx_cp_init(struct msm_gpu *gpu) static int a7xx_cp_init(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->rb[0]; u32 mask; @@ -667,11 +808,11 @@ static int a7xx_cp_init(struct msm_gpu *gpu) /* *Don't* send a power up reg list for concurrent binning (TODO) */ /* Lo address */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* Hi address */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* BIT(31) set => read the regs from the list */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, BIT(31)); a6xx_flush(gpu, ring); return a6xx_idle(gpu, ring) ? 0 : -EINVAL; @@ -795,6 +936,16 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); } + a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, + MSM_BO_WC | MSM_BO_MAP_PRIV, + gpu->aspace, &a6xx_gpu->pwrup_reglist_bo, + &a6xx_gpu->pwrup_reglist_iova); + + if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) + return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); + + msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); + return 0; } @@ -1125,6 +1276,8 @@ static int hw_init(struct msm_gpu *gpu) if (a6xx_gpu->shadow_bo) { gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); + for (unsigned int i = 0; i < gpu->nr_rings; i++) + a6xx_gpu->shadow[i] = 0; } /* ..which means "always" on A7xx, also for BV shadow */ @@ -1133,6 +1286,8 @@ static int hw_init(struct msm_gpu *gpu) rbmemptr(gpu->rb[0], bv_rptr)); } + a6xx_preempt_hw_init(gpu); + /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; @@ -1142,6 +1297,11 @@ static int hw_init(struct msm_gpu *gpu) /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); + if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { + a7xx_patch_pwrup_reglist(gpu); + a6xx_gpu->pwrup_reglist_emitted = true; + } + ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); if (ret) goto out; @@ -1179,6 +1339,10 @@ static int hw_init(struct msm_gpu *gpu) out: if (adreno_has_gmu_wrapper(adreno_gpu)) return ret; + + /* Last step - yield the ringbuffer */ + a7xx_preempt_start(gpu); + /* * Tell the GMU that we are done touching the GPU and it can start power * management @@ -1556,8 +1720,13 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) a7xx_sw_fuse_violation_irq(gpu); - if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { msm_gpu_retire(gpu); + a6xx_preempt_trigger(gpu); + } + + if (status & A6XX_RBBM_INT_0_MASK_CP_SW) + a6xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -2330,6 +2499,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) a6xx_fault_handler); a6xx_calc_ubwc_config(adreno_gpu); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a6xx_preempt_init(gpu); return gpu; } |