summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c110
1 files changed, 87 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index f5b9f443cfdd..20ea6cb01edf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -41,7 +41,9 @@
#include "amdgpu_aca.h"
MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -53,6 +55,14 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */
+#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */
+#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
+#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */
+
+#define NORMALIZE_XCC_REG_OFFSET(offset) \
+ (offset & 0xFFFF)
+
struct amdgpu_gfx_ras gfx_v9_4_3_ras;
static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev);
@@ -215,9 +225,24 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
}
}
+static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg)
+{
+ uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg);
+
+ /* If it is an XCC reg, normalize the reg to keep
+ lower 16 bits in local xcc */
+
+ if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) ||
+ ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH)))
+ return normalized_reg;
+ else
+ return reg;
+}
+
static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
WRITE_DATA_DST_SEL(0) |
@@ -232,6 +257,12 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
uint32_t addr1, uint32_t ref, uint32_t mask,
uint32_t inv)
{
+ /* Only do the normalization on regspace */
+ if (mem_space == 0) {
+ addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0);
+ addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1);
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring,
/* memory (1) or register (0) */
@@ -370,15 +401,14 @@ static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev)
static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
const char *chip_name)
{
- char fw_name[30];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
uint16_t version_minor;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ "amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
@@ -407,12 +437,10 @@ static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev)
static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
const char *chip_name)
{
- char fw_name[30];
int err;
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
-
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
@@ -624,6 +652,15 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev,
soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
}
+static int gfx_v9_4_3_get_xccs_per_xcp(struct amdgpu_device *adev)
+{
+ u32 xcp_ctl;
+
+ /* Value is expected to be the same on all, fetch from first instance */
+ xcp_ctl = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HYP_XCP_CTL);
+
+ return REG_GET_FIELD(xcp_ctl, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP);
+}
static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev,
int num_xccs_per_xcp)
@@ -678,6 +715,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q,
.switch_partition_mode = &gfx_v9_4_3_switch_compute_partition,
.ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
+ .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp,
};
static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
@@ -755,6 +793,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1392,21 +1431,23 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned vmid)
{
- u32 reg, data;
+ u32 reg, pre_data, data;
reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
- if (amdgpu_sriov_is_pp_one_vf(adev))
- data = RREG32_NO_KIQ(reg);
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+ pre_data = RREG32_NO_KIQ(reg);
else
- data = RREG32(reg);
+ pre_data = RREG32(reg);
- data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
- if (amdgpu_sriov_is_pp_one_vf(adev))
- WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
- else
- WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ if (pre_data != data) {
+ if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+ WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ } else
+ WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
+ }
}
static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
@@ -1582,6 +1623,9 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
+ if (amdgpu_sriov_vf(adev))
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_MODE, 1);
} else {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_EN, 0);
@@ -2016,18 +2060,31 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
{
- int r = 0, i, num_xcc;
+ int r = 0, i, num_xcc, num_xcp, num_xcc_per_xcp;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ if (amdgpu_sriov_vf(adev)) {
+ enum amdgpu_gfx_partition mode;
- if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
- AMDGPU_XCP_FL_NONE) ==
- AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
- r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
- amdgpu_user_partt_mode);
+ mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE);
+ if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ return -EINVAL;
+ num_xcc_per_xcp = gfx_v9_4_3_get_xccs_per_xcp(adev);
+ adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
+ num_xcp = num_xcc / num_xcc_per_xcp;
+ r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
+ } else {
+ if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+ AMDGPU_XCP_FL_NONE) ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+ r = amdgpu_xcp_switch_partition_mode(
+ adev->xcp_mgr, amdgpu_user_partt_mode);
+ }
if (r)
return r;
- num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < num_xcc; i++) {
r = gfx_v9_4_3_xcc_cp_resume(adev, i);
if (r)
@@ -2502,6 +2559,7 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle,
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
for (i = 0; i < num_xcc; i++)
gfx_v9_4_3_xcc_update_gfx_clock_gating(
adev, state == AMD_CG_STATE_GATE, i);
@@ -2722,6 +2780,8 @@ static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
(5 << 8) | /* dst: memory */
@@ -2739,6 +2799,8 @@ static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
{
uint32_t cmd = 0;
+ reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg);
+
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
@@ -4138,6 +4200,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
/* init asci gds info */
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
/* 9.4.3 removed all the GDS internal memory,
* only support GWS opcode in kernel, like barrier
* semaphore.etc */
@@ -4150,6 +4213,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
/* deprecated for 9.4.3, no usage at all */
adev->gds.gds_compute_max_wave_id = 0;
break;