summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c267
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c262
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c197
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c293
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c425
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c346
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c24
72 files changed, 2066 insertions, 962 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 64e7acff8f18..ebe08947c5a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -37,7 +37,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
- -I$(FULL_AMD_PATH)/amdkfd
+ -I$(FULL_AMD_PATH)/amdkfd \
+ -I$(FULL_AMD_PATH)/ras/ras_mgr
# Locally disable W=1 warnings enabled in drm subsystem Makefile
subdir-ccflags-y += -Wno-override-init
@@ -324,4 +325,9 @@ amdgpu-y += \
isp_v4_1_1.o
endif
+AMD_GPU_RAS_PATH := ../ras
+AMD_GPU_RAS_FULL_PATH := $(FULL_AMD_PATH)/ras
+include $(AMD_GPU_RAS_FULL_PATH)/Makefile
+amdgpu-y += $(AMD_GPU_RAS_FILES)
+
obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index 9569dc16dd3d..daa7b23bc775 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -88,6 +88,10 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
uint32_t ip_block;
int r, i;
+ /* Skip suspend of SDMA IP versions >= 4.4.2. They are multi-aid */
+ if (adev->aid_mask)
+ ip_block_mask &= ~BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2a0df4cabb99..50079209c472 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -372,13 +372,15 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type);
bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
-#define AMDGPU_MAX_IP_NUM 16
+#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
struct amdgpu_ip_block_status {
bool valid;
@@ -839,8 +841,6 @@ struct amd_powerplay {
const struct amd_pm_funcs *pp_funcs;
};
-struct ip_discovery_top;
-
/* polaris10 kickers */
#define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \
((rid == 0xE3) || \
@@ -972,8 +972,7 @@ struct amdgpu_device {
struct notifier_block acpi_nb;
struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
- struct debugfs_blob_wrapper debugfs_vbios_blob;
- struct debugfs_blob_wrapper debugfs_discovery_blob;
+ struct debugfs_blob_wrapper debugfs_vbios_blob;
struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrent access to GRBM index */
struct mutex grbm_idx_mutex;
@@ -1063,6 +1062,9 @@ struct amdgpu_device {
u32 log2_max_MBps;
} mm_stats;
+ /* discovery*/
+ struct amdgpu_discovery_info discovery;
+
/* display */
bool enable_virtual_display;
struct amdgpu_vkms_output *amdgpu_vkms_output;
@@ -1174,6 +1176,12 @@ struct amdgpu_device {
* queue fence.
*/
struct xarray userq_xa;
+ /**
+ * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
+ * Key: doorbell_index (unique global identifier for the queue)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_doorbell_xa;
/* df */
struct amdgpu_df df;
@@ -1265,8 +1273,6 @@ struct amdgpu_device {
struct list_head ras_list;
- struct ip_discovery_top *ip_top;
-
struct amdgpu_reset_domain *reset_domain;
struct mutex benchmark_mutex;
@@ -1290,6 +1296,7 @@ struct amdgpu_device {
bool debug_disable_gpu_ring_reset;
bool debug_vm_userptr;
bool debug_disable_ce_logs;
+ bool debug_enable_ce_cs;
/* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
@@ -1308,8 +1315,6 @@ struct amdgpu_device {
*/
bool apu_prefer_gtt;
- struct list_head userq_mgr_list;
- struct mutex userq_mutex;
bool userq_halt_for_enforce_isolation;
struct amdgpu_uid *uid_info;
@@ -1637,7 +1642,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct drm_file *file_priv);
void amdgpu_driver_release_kms(struct drm_device *dev);
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev);
int amdgpu_device_prepare(struct drm_device *dev);
void amdgpu_device_complete(struct drm_device *dev);
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 6c62e27b9800..d31460a9e958 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -507,7 +507,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
pm_runtime_get_sync(adev_to_drm(adev)->dev);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(adev_to_drm(adev));
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index a2879d2b7c8e..644f79f3c9af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -36,6 +36,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -746,6 +747,20 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad
enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset)
{
+
+ if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info;
+
+ memset(&ih_info, 0, sizeof(ih_info));
+ ih_info.block = block;
+ ih_info.pasid = pasid;
+ ih_info.reset = reset;
+ ih_info.pasid_fn = pasid_fn;
+ ih_info.data = data;
+ amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
+ return;
+ }
+
amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 9e120c934cc1..8bdfcde2029b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -71,7 +71,7 @@ struct kgd_mem {
struct mutex lock;
struct amdgpu_bo *bo;
struct dma_buf *dmabuf;
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
struct list_head validate_list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 83020963dfde..96ccd5ade031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1057,7 +1057,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
int ret = 0;
mutex_lock(&process_info->lock);
@@ -1089,8 +1089,15 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, &range);
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range)) {
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
if (ret) {
+ amdgpu_hmm_range_free(range);
if (ret == -EAGAIN)
pr_debug("Failed to get user pages, try again\n");
else
@@ -1113,7 +1120,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
amdgpu_bo_unreserve(bo);
release_out:
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+ amdgpu_hmm_range_free(range);
unregister_out:
if (ret)
amdgpu_hmm_unregister(bo);
@@ -1916,7 +1923,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
amdgpu_hmm_unregister(mem->bo);
mutex_lock(&process_info->notifier_lock);
- amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+ amdgpu_hmm_range_free(mem->range);
mutex_unlock(&process_info->notifier_lock);
}
@@ -1954,9 +1961,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
*/
if (size) {
if (!is_imported &&
- (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
- (adev->apu_prefer_gtt &&
- mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
*size = bo_size;
else
*size = 0;
@@ -2329,10 +2334,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
struct kfd_vm_fault_info *mem)
{
- if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
*mem = *adev->gmc.vm_fault_info;
- mb(); /* make sure read happened */
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
}
return 0;
}
@@ -2543,7 +2547,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+ amdgpu_hmm_range_free(mem->range);
mem->range = NULL;
/* BO reservations and getting user pages (hmm_range_fault)
@@ -2567,9 +2571,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
}
}
+ mem->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!mem->range))
+ return -ENOMEM;
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, &mem->range);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range);
if (ret) {
+ amdgpu_hmm_range_free(mem->range);
+ mem->range = NULL;
pr_debug("Failed %d to get user pages\n", ret);
/* Return -EFAULT bad address error as success. It will
@@ -2742,8 +2751,8 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
continue;
/* Only check mem with hmm range associated */
- valid = amdgpu_ttm_tt_get_user_pages_done(
- mem->bo->tbo.ttm, mem->range);
+ valid = amdgpu_hmm_range_valid(mem->range);
+ amdgpu_hmm_range_free(mem->range);
mem->range = NULL;
if (!valid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index c7d32fb216e4..636385c80f64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -181,19 +181,22 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
u8 frev, crev;
int usage_bytes = 0;
- if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
- if (frev == 2 && crev == 1) {
- fw_usage_v2_1 =
- (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
- amdgpu_atomfirmware_allocate_fb_v2_1(adev,
- fw_usage_v2_1,
- &usage_bytes);
- } else if (frev >= 2 && crev >= 2) {
- fw_usage_v2_2 =
- (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
- amdgpu_atomfirmware_allocate_fb_v2_2(adev,
- fw_usage_v2_2,
- &usage_bytes);
+ /* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+ if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+ if (frev == 2 && crev == 1) {
+ fw_usage_v2_1 =
+ (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+ fw_usage_v2_1,
+ &usage_bytes);
+ } else if (frev >= 2 && crev >= 2) {
+ fw_usage_v2_2 =
+ (struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+ amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+ fw_usage_v2_2,
+ &usage_bytes);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..35d04e69aec0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -96,13 +96,14 @@ void amdgpu_bios_release(struct amdgpu_device *adev)
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
- * For SR-IOV, the vbios image is also put in VRAM in the VF.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
*/
static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
- uint8_t __iomem *bios;
+ uint8_t __iomem *bios = NULL;
resource_size_t vram_base;
- resource_size_t size = 256 * 1024; /* ??? */
+ u32 size = 256U * 1024U; /* ??? */
if (!(adev->flags & AMD_IS_APU))
if (amdgpu_device_need_post(adev))
@@ -114,18 +115,33 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
adev->bios = NULL;
vram_base = pci_resource_start(adev->pdev, 0);
- bios = ioremap_wc(vram_base, size);
- if (!bios)
- return false;
adev->bios = kmalloc(size, GFP_KERNEL);
- if (!adev->bios) {
- iounmap(bios);
+ if (!adev->bios)
return false;
+
+ /* For SRIOV with dynamic critical region is enabled,
+ * the vbios image is put at a dynamic offset of VRAM in the VF.
+ * If dynamic critical region is disabled, follow the existing logic as on baremetal.
+ */
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size)) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+ } else {
+ bios = ioremap_wc(vram_base, size);
+ if (!bios) {
+ amdgpu_bios_release(adev);
+ return false;
+ }
+
+ memcpy_fromio(adev->bios, bios, size);
+ iounmap(bios);
}
+
adev->bios_size = size;
- memcpy_fromio(adev->bios, bios, size);
- iounmap(bios);
if (!check_atom_bios(adev, size)) {
amdgpu_bios_release(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a716c9886c74..2b5e7c46a39d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -38,7 +38,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo *bo;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
bool user_invalidated;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 47e9bfba0642..9f96d568acf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -734,10 +734,8 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -919,10 +917,8 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -1146,10 +1142,8 @@ out:
amdgpu_connector_update_scratch_regs(connector, ret);
exit:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
return ret;
}
@@ -1486,10 +1480,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker())
pm_runtime_put_autosuspend(connector->dev->dev);
- }
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index ef996493115f..425a3e564360 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: MIT
/*
* Copyright 2025 Advanced Micro Devices, Inc.
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
index bcb97d245673..353421807387 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: MIT */
/*
* Copyright 2025 Advanced Micro Devices, Inc.
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9cd7741d2254..ecdfe6cb36cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -29,7 +29,6 @@
#include <linux/pagemap.h>
#include <linux/sync_file.h>
#include <linux/dma-buf.h>
-#include <linux/hmm.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
@@ -41,6 +40,7 @@
#include "amdgpu_gmc.h"
#include "amdgpu_gem.h"
#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
struct amdgpu_device *adev,
@@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
if (p->uf_bo && ring->funcs->no_user_fence)
return -EINVAL;
+ if (!p->adev->debug_enable_ce_cs &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
+ dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
+ return -EINVAL;
+ }
+
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
@@ -702,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*/
const s64 us_upper_bound = 200000;
- if (!adev->mm_stats.log2_max_MBps) {
+ if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
*max_bytes = 0;
*max_vis_bytes = 0;
return;
@@ -885,12 +891,17 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
bool userpage_invalidated = false;
struct amdgpu_bo *bo = e->bo;
- r = amdgpu_ttm_tt_get_user_pages(bo, &e->range);
+ e->range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!e->range))
+ return -ENOMEM;
+
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
if (r)
goto out_free_user_pages;
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
- if (bo->tbo.ttm->pages[i] != hmm_pfn_to_page(e->range->hmm_pfns[i])) {
+ if (bo->tbo.ttm->pages[i] !=
+ hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) {
userpage_invalidated = true;
break;
}
@@ -984,9 +995,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
out_free_user_pages:
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = e->bo;
-
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+ amdgpu_hmm_range_free(e->range);
e->range = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);
@@ -1317,8 +1326,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
*/
r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
- e->range);
+ r |= !amdgpu_hmm_range_valid(e->range);
+ amdgpu_hmm_range_free(e->range);
e->range = NULL;
}
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a70651050acf..62d43b8cbe58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -129,7 +129,6 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
if (use_bank) {
if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
(se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return -EINVAL;
@@ -179,7 +178,6 @@ end:
if (pm_pg_lock)
mutex_unlock(&adev->pm.mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -255,7 +253,6 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off
if (rd->id.use_grbm) {
if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) ||
(rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
mutex_unlock(&rd->lock);
@@ -310,7 +307,6 @@ end:
mutex_unlock(&rd->lock);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
@@ -446,7 +442,6 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
@@ -557,7 +552,6 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -617,7 +611,6 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -676,7 +669,6 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -736,7 +728,6 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -795,7 +786,6 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -855,7 +845,6 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
@@ -1003,7 +992,6 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (r) {
@@ -1094,7 +1082,6 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
if (!x) {
@@ -1192,7 +1179,6 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
mutex_unlock(&adev->grbm_idx_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
while (size) {
@@ -1266,7 +1252,6 @@ static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1315,7 +1300,6 @@ static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1365,7 +1349,6 @@ static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1414,7 +1397,6 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1460,7 +1442,6 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1501,7 +1482,6 @@ static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *bu
r = result;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -1701,7 +1681,6 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
up_write(&adev->reset_domain->sem);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1721,7 +1700,6 @@ static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1742,7 +1720,6 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
*val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
@@ -1762,7 +1739,6 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val)
r = amdgpu_benchmark(adev, val);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return r;
@@ -1902,7 +1878,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence.base) == fence)
+ if (preempted && (&job->hw_fence->base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@@ -2014,7 +1990,6 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
ret = -EINVAL;
out:
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return ret;
@@ -2123,10 +2098,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
debugfs_create_blob("amdgpu_vbios", 0444, root,
&adev->debugfs_vbios_blob);
- adev->debugfs_discovery_blob.data = adev->mman.discovery_bin;
- adev->debugfs_discovery_blob.size = adev->mman.discovery_tmr_size;
- debugfs_create_blob("amdgpu_discovery", 0444, root,
- &adev->debugfs_discovery_blob);
+ if (adev->discovery.debugfs_blob.size)
+ debugfs_create_blob("amdgpu_discovery", 0444, root,
+ &adev->discovery.debugfs_blob);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7a899fb4de29..654f4844b7ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1882,6 +1882,13 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device
static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
{
+ /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
+ * It's unclear if this is a platform-specific or GPU-specific issue.
+ * Disable ASPM on SI for the time being.
+ */
+ if (adev->family == AMDGPU_FAMILY_SI)
+ return true;
+
#if IS_ENABLED(CONFIG_X86)
struct cpuinfo_x86 *c = &cpu_data(0);
@@ -2380,7 +2387,7 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_ip_is_valid - is the hardware IP enabled
+ * amdgpu_device_ip_is_hw - is the hardware IP enabled
*
* @adev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
@@ -2388,6 +2395,27 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
* Check if the hardware IP is enable or not.
* Returns true if it the IP is enable, false if not.
*/
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
+{
+ int i;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == block_type)
+ return adev->ip_blocks[i].status.hw;
+ }
+ return false;
+}
+
+/**
+ * amdgpu_device_ip_is_valid - is the hardware IP valid
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is valid or not.
+ * Returns true if it the IP is valid, false if not.
+ */
bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type)
{
@@ -2626,7 +2654,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "arcturus";
break;
case CHIP_NAVI12:
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
return 0;
chip_name = "navi12";
break;
@@ -2754,6 +2782,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return r;
+
+ r = amdgpu_virt_init_critical_region(adev);
+ if (r)
+ return r;
}
switch (adev->asic_type) {
@@ -3773,7 +3805,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
continue;
- /* XXX handle errors */
r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
if (r)
return r;
@@ -3856,9 +3887,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
- /* XXX handle errors */
r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
- adev->ip_blocks[i].status.hw = false;
+ if (r)
+ return r;
/* handle putting the SMC in the appropriate state */
if (!amdgpu_sriov_vf(adev)) {
@@ -3888,7 +3919,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
* in each IP into a state suitable for suspend.
* Returns 0 on success, negative error code on failure.
*/
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{
int r;
@@ -4184,7 +4215,6 @@ bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
#else
return false;
#endif
- case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_MULLINS:
@@ -4278,58 +4308,53 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
long timeout;
int ret = 0;
- /*
- * By default timeout for jobs is 10 sec
- */
- adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ /* By default timeout for all queues is 2 sec */
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = msecs_to_jiffies(2000);
- if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- while ((timeout_setting = strsep(&input, ",")) &&
- strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
- ret = kstrtol(timeout_setting, 0, &timeout);
- if (ret)
- return ret;
+ if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+ return 0;
- if (timeout == 0) {
- index++;
- continue;
- } else if (timeout < 0) {
- timeout = MAX_SCHEDULE_TIMEOUT;
- dev_warn(adev->dev, "lockup timeout disabled");
- add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
- } else {
- timeout = msecs_to_jiffies(timeout);
- }
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
- switch (index++) {
- case 0:
- adev->gfx_timeout = timeout;
- break;
- case 1:
- adev->compute_timeout = timeout;
- break;
- case 2:
- adev->sdma_timeout = timeout;
- break;
- case 3:
- adev->video_timeout = timeout;
- break;
- default:
- break;
- }
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ dev_warn(adev->dev, "lockup timeout disabled");
+ add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+ } else {
+ timeout = msecs_to_jiffies(timeout);
}
- /*
- * There is only one value specified and
- * it should apply to all non-compute jobs.
- */
- if (index == 1) {
- adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
- if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
- adev->compute_timeout = adev->gfx_timeout;
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
}
}
+ /* When only one value specified apply it to all queues. */
+ if (index == 1)
+ adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+ adev->video_timeout = timeout;
+
return ret;
}
@@ -4384,6 +4409,55 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
dev_info(adev->dev, "MCBP is enabled\n");
}
+static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_atombios_sysfs_init(adev);
+ if (r)
+ drm_err(&adev->ddev,
+ "registering atombios sysfs failed (%d).\n", r);
+
+ r = amdgpu_pm_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
+
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r) {
+ adev->ucode_sysfs_en = false;
+ dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
+ } else
+ adev->ucode_sysfs_en = true;
+
+ r = amdgpu_device_attr_sysfs_init(adev);
+ if (r)
+ dev_err(adev->dev, "Could not create amdgpu device attr\n");
+
+ r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+ if (r)
+ dev_err(adev->dev,
+ "Could not create amdgpu board attributes\n");
+
+ amdgpu_fru_sysfs_init(adev);
+ amdgpu_reg_state_sysfs_init(adev);
+ amdgpu_xcp_sysfs_init(adev);
+
+ return r;
+}
+
+static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
+{
+ if (adev->pm.sysfs_initialized)
+ amdgpu_pm_sysfs_fini(adev);
+ if (adev->ucode_sysfs_en)
+ amdgpu_ucode_sysfs_fini(adev);
+ amdgpu_device_attr_sysfs_fini(adev);
+ amdgpu_fru_sysfs_fini(adev);
+
+ amdgpu_reg_state_sysfs_fini(adev);
+ amdgpu_xcp_sysfs_fini(adev);
+}
+
/**
* amdgpu_device_init - initialize the driver
*
@@ -4483,7 +4557,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.userq_sch_mutex);
mutex_init(&adev->gfx.workload_profile_mutex);
mutex_init(&adev->vcn.workload_profile_mutex);
- mutex_init(&adev->userq_mutex);
amdgpu_device_init_apu_flags(adev);
@@ -4511,7 +4584,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->pm.od_kobj_list);
- INIT_LIST_HEAD(&adev->userq_mgr_list);
+ xa_init(&adev->userq_doorbell_xa);
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
@@ -4807,39 +4880,14 @@ fence_driver_init:
flush_delayed_work(&adev->delayed_init_work);
}
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ amdgpu_xgmi_reset_on_init(adev);
/*
* Place those sysfs registering after `late_init`. As some of those
* operations performed in `late_init` might affect the sysfs
* interfaces creating.
*/
- r = amdgpu_atombios_sysfs_init(adev);
- if (r)
- drm_err(&adev->ddev,
- "registering atombios sysfs failed (%d).\n", r);
-
- r = amdgpu_pm_sysfs_init(adev);
- if (r)
- dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
-
- r = amdgpu_ucode_sysfs_init(adev);
- if (r) {
- adev->ucode_sysfs_en = false;
- dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
- } else
- adev->ucode_sysfs_en = true;
-
- r = amdgpu_device_attr_sysfs_init(adev);
- if (r)
- dev_err(adev->dev, "Could not create amdgpu device attr\n");
-
- r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
- if (r)
- dev_err(adev->dev,
- "Could not create amdgpu board attributes\n");
-
- amdgpu_fru_sysfs_init(adev);
- amdgpu_reg_state_sysfs_init(adev);
- amdgpu_xcp_sysfs_init(adev);
+ r = amdgpu_device_sys_interface_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -4867,9 +4915,6 @@ fence_driver_init:
if (px)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
- if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
- amdgpu_xgmi_reset_on_init(adev);
-
amdgpu_device_check_iommu_direct_map(adev);
adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
@@ -4961,15 +5006,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
}
amdgpu_fence_driver_hw_fini(adev);
- if (adev->pm.sysfs_initialized)
- amdgpu_pm_sysfs_fini(adev);
- if (adev->ucode_sysfs_en)
- amdgpu_ucode_sysfs_fini(adev);
- amdgpu_device_attr_sysfs_fini(adev);
- amdgpu_fru_sysfs_fini(adev);
-
- amdgpu_reg_state_sysfs_fini(adev);
- amdgpu_xcp_sysfs_fini(adev);
+ amdgpu_device_sys_interface_fini(adev);
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
@@ -5044,7 +5081,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if (IS_ENABLED(CONFIG_PERF_EVENTS))
amdgpu_pmu_fini(adev);
- if (adev->mman.discovery_bin)
+ if (adev->discovery.bin)
amdgpu_discovery_fini(adev);
amdgpu_reset_put_reset_domain(adev->reset_domain);
@@ -5212,16 +5249,20 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
dev_warn(adev->dev, "smart shift update failed\n");
if (notify_clients)
- drm_client_dev_suspend(adev_to_drm(adev), false);
+ drm_client_dev_suspend(adev_to_drm(adev));
cancel_delayed_work_sync(&adev->delayed_init_work);
amdgpu_ras_suspend(adev);
- amdgpu_device_ip_suspend_phase1(adev);
+ r = amdgpu_device_ip_suspend_phase1(adev);
+ if (r)
+ return r;
amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
- amdgpu_userq_suspend(adev);
+ r = amdgpu_userq_suspend(adev);
+ if (r)
+ return r;
r = amdgpu_device_evict_resources(adev);
if (r)
@@ -5231,7 +5272,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
amdgpu_fence_driver_hw_fini(adev);
- amdgpu_device_ip_suspend_phase2(adev);
+ r = amdgpu_device_ip_suspend_phase2(adev);
+ if (r)
+ return r;
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
@@ -5346,7 +5389,7 @@ exit:
flush_delayed_work(&adev->delayed_init_work);
if (notify_clients)
- drm_client_dev_resume(adev_to_drm(adev), false);
+ drm_client_dev_resume(adev_to_drm(adev));
amdgpu_ras_resume(adev);
@@ -5802,11 +5845,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (!amdgpu_ring_sched_ready(ring))
continue;
- /* Clear job fence from fence drv to avoid force_completion
- * leave NULL and vm flush fence in fence drv
- */
- amdgpu_fence_driver_clear_job_fences(ring);
-
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}
@@ -5951,7 +5989,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
if (r)
goto out;
- drm_client_dev_resume(adev_to_drm(tmp_adev), false);
+ drm_client_dev_resume(adev_to_drm(tmp_adev));
/*
* The GPU enters bad state once faulty pages
@@ -6286,7 +6324,7 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- drm_client_dev_suspend(adev_to_drm(tmp_adev), false);
+ drm_client_dev_suspend(adev_to_drm(tmp_adev));
/* disable ras on ALL IPs */
if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
@@ -6535,7 +6573,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
- if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
+ if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@@ -7279,10 +7317,17 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
if (adev->gmc.xgmi.connected_to_cpu)
return;
- if (ring && ring->funcs->emit_hdp_flush)
+ if (ring && ring->funcs->emit_hdp_flush) {
amdgpu_ring_emit_hdp_flush(ring);
- else
- amdgpu_asic_flush_hdp(adev, ring);
+ return;
+ }
+
+ if (!ring && amdgpu_sriov_runtime(adev)) {
+ if (!amdgpu_kiq_hdp_flush(adev))
+ return;
+ }
+
+ amdgpu_asic_flush_hdp(adev, ring);
}
void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 73401f0aeb34..fa2a22dfa048 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -107,6 +107,7 @@
#include "vcn_v5_0_1.h"
#include "jpeg_v5_0_0.h"
#include "jpeg_v5_0_1.h"
+#include "amdgpu_ras_mgr.h"
#include "amdgpu_vpe.h"
#if defined(CONFIG_DRM_AMD_ISP)
@@ -254,9 +255,9 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
/* This region is read-only and reserved from system use */
- discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC);
+ discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC);
if (discv_regn) {
- memcpy(binary, discv_regn, adev->mman.discovery_tmr_size);
+ memcpy(binary, discv_regn, adev->discovery.size);
memunmap(discv_regn);
return 0;
}
@@ -298,10 +299,31 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
else
vram_size <<= 20;
+ /*
+ * If in VRAM, discovery TMR is marked for reservation. If it is in system mem,
+ * then it is not required to be reserved.
+ */
if (sz_valid) {
- uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
- amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
- adev->mman.discovery_tmr_size, false);
+ if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+ /* For SRIOV VFs with dynamic critical region enabled,
+ * we will get the IPD binary via below call.
+ * If dynamic critical is disabled, fall through to normal seq.
+ */
+ if (amdgpu_virt_get_dynamic_data_info(adev,
+ AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
+ &adev->discovery.size)) {
+ dev_err(adev->dev,
+ "failed to read discovery info from dynamic critical region.");
+ ret = -EINVAL;
+ goto exit;
+ }
+ } else {
+ uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+ amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+ adev->discovery.size, false);
+ adev->discovery.reserve_tmr = true;
+ }
} else {
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
@@ -310,7 +332,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
dev_err(adev->dev,
"failed to read discovery info from memory, vram size read: %llx",
vram_size);
-
+exit:
return ret;
}
@@ -389,6 +411,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
struct binary_header *bhdr)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct table_info *info;
uint16_t checksum;
uint16_t offset;
@@ -398,14 +421,14 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
checksum = le16_to_cpu(info->checksum);
struct nps_info_header *nhdr =
- (struct nps_info_header *)(adev->mman.discovery_bin + offset);
+ (struct nps_info_header *)(discovery_bin + offset);
if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
return -EINVAL;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
le32_to_cpu(nhdr->size_bytes),
checksum)) {
dev_dbg(adev->dev, "invalid nps info data table checksum\n");
@@ -417,8 +440,11 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
{
- if (amdgpu_discovery == 2)
+ if (amdgpu_discovery == 2) {
+ /* Assume there is valid discovery TMR in VRAM even if binary is sideloaded */
+ adev->discovery.reserve_tmr = true;
return "amdgpu/ip_discovery.bin";
+ }
switch (adev->asic_type) {
case CHIP_VEGA10:
@@ -447,49 +473,53 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
+ uint8_t *discovery_bin;
const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
int r;
- adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
- adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
- if (!adev->mman.discovery_bin)
+ adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
+ if (!adev->discovery.bin)
return -ENOMEM;
+ adev->discovery.size = DISCOVERY_TMR_SIZE;
+ adev->discovery.debugfs_blob.data = adev->discovery.bin;
+ adev->discovery.debugfs_blob.size = adev->discovery.size;
+ discovery_bin = adev->discovery.bin;
/* Read from file if it is the preferred option */
fw_name = amdgpu_discovery_get_fw_name(adev);
if (fw_name != NULL) {
drm_dbg(&adev->ddev, "use ip discovery information from file");
- r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
+ r = amdgpu_discovery_read_binary_from_file(adev, discovery_bin,
+ fw_name);
if (r)
goto out;
} else {
drm_dbg(&adev->ddev, "use ip discovery information from memory");
- r = amdgpu_discovery_read_binary_from_mem(
- adev, adev->mman.discovery_bin);
+ r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin);
if (r)
goto out;
}
/* check the ip discovery binary signature */
- if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
+ if (!amdgpu_discovery_verify_binary_signature(discovery_bin)) {
dev_err(adev->dev,
"get invalid ip discovery binary signature\n");
r = -EINVAL;
goto out;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = offsetof(struct binary_header, binary_checksum) +
sizeof(bhdr->binary_checksum);
size = le16_to_cpu(bhdr->binary_size) - offset;
checksum = le16_to_cpu(bhdr->binary_checksum);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- size, checksum)) {
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size,
+ checksum)) {
dev_err(adev->dev, "invalid ip discovery binary checksum\n");
r = -EINVAL;
goto out;
@@ -501,15 +531,16 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct ip_discovery_header *ihdr =
- (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
+ (struct ip_discovery_header *)(discovery_bin + offset);
if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
dev_err(adev->dev, "invalid ip discovery data table signature\n");
r = -EINVAL;
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le16_to_cpu(ihdr->size), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le16_to_cpu(ihdr->size),
+ checksum)) {
dev_err(adev->dev, "invalid ip discovery data table checksum\n");
r = -EINVAL;
goto out;
@@ -522,7 +553,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct gpu_info_header *ghdr =
- (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
+ (struct gpu_info_header *)(discovery_bin + offset);
if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
dev_err(adev->dev, "invalid ip discovery gc table id\n");
@@ -530,8 +561,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(ghdr->size), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+ le32_to_cpu(ghdr->size),
+ checksum)) {
dev_err(adev->dev, "invalid gc data table checksum\n");
r = -EINVAL;
goto out;
@@ -544,7 +576,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct harvest_info_header *hhdr =
- (struct harvest_info_header *)(adev->mman.discovery_bin + offset);
+ (struct harvest_info_header *)(discovery_bin + offset);
if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
@@ -552,8 +584,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- sizeof(struct harvest_table), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ sizeof(struct harvest_table), checksum)) {
dev_err(adev->dev, "invalid harvest data table checksum\n");
r = -EINVAL;
goto out;
@@ -566,7 +599,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (offset) {
struct vcn_info_header *vhdr =
- (struct vcn_info_header *)(adev->mman.discovery_bin + offset);
+ (struct vcn_info_header *)(discovery_bin + offset);
if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
dev_err(adev->dev, "invalid ip discovery vcn table id\n");
@@ -574,8 +607,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(vhdr->size_bytes), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(vhdr->size_bytes), checksum)) {
dev_err(adev->dev, "invalid vcn data table checksum\n");
r = -EINVAL;
goto out;
@@ -588,7 +622,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
if (0 && offset) {
struct mall_info_header *mhdr =
- (struct mall_info_header *)(adev->mman.discovery_bin + offset);
+ (struct mall_info_header *)(discovery_bin + offset);
if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
dev_err(adev->dev, "invalid ip discovery mall table id\n");
@@ -596,8 +630,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(mhdr->size_bytes), checksum)) {
+ if (!amdgpu_discovery_verify_checksum(
+ discovery_bin + offset,
+ le32_to_cpu(mhdr->size_bytes), checksum)) {
dev_err(adev->dev, "invalid mall data table checksum\n");
r = -EINVAL;
goto out;
@@ -607,8 +642,8 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
return 0;
out:
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
if ((amdgpu_discovery != 2) &&
(RREG32(mmIP_DISCOVERY_VERSION) == 4))
amdgpu_ras_query_boot_status(adev, 4);
@@ -620,8 +655,8 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
void amdgpu_discovery_fini(struct amdgpu_device *adev)
{
amdgpu_discovery_sysfs_fini(adev);
- kfree(adev->mman.discovery_bin);
- adev->mman.discovery_bin = NULL;
+ kfree(adev->discovery.bin);
+ adev->discovery.bin = NULL;
}
static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
@@ -646,6 +681,7 @@ static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
uint32_t *vcn_harvest_count)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
@@ -655,21 +691,21 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
uint8_t inst;
int i, j;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
/* scan harvest bit of all IP data structures */
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip *)(adev->mman.discovery_bin +
- ip_offset);
+ ip = (struct ip *)(discovery_bin + ip_offset);
inst = ip->number_instance;
hw_id = le16_to_cpu(ip->hw_id);
if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
@@ -711,13 +747,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
uint32_t *vcn_harvest_count,
uint32_t *umc_harvest_count)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct harvest_table *harvest_info;
u16 offset;
int i;
uint32_t umc_harvest_config = 0;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
if (!offset) {
@@ -725,7 +762,7 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
return;
}
- harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + offset);
+ harvest_info = (struct harvest_table *)(discovery_bin + offset);
for (i = 0; i < 32; i++) {
if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
@@ -1021,8 +1058,8 @@ static void ip_disc_release(struct kobject *kobj)
kobj);
struct amdgpu_device *adev = ip_top->adev;
- adev->ip_top = NULL;
kfree(ip_top);
+ adev->discovery.ip_top = NULL;
}
static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
@@ -1033,7 +1070,9 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
/* Until a uniform way is figured, get mask based on hwid */
switch (hw_id) {
case VCN_HWID:
- harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
+ /* VCN vs UVD+VCE */
+ if (!amdgpu_ip_version(adev, VCE_HWIP, 0))
+ harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
break;
case DMU_HWID:
if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
@@ -1060,6 +1099,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
const size_t _ip_offset, const int num_ips,
bool reg_base_64)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
int ii, jj, kk, res;
uint16_t hw_id;
uint8_t inst;
@@ -1077,7 +1117,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
struct ip_v4 *ip;
struct ip_hw_instance *ip_hw_instance;
- ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
inst = ip->instance_number;
hw_id = le16_to_cpu(ip->hw_id);
if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
@@ -1164,17 +1204,20 @@ next_ip:
static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
- struct kset *die_kset = &adev->ip_top->die_kset;
+ struct kset *die_kset = &ip_top->die_kset;
u16 num_dies, die_offset, num_ips;
size_t ip_offset;
int ii, res;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
@@ -1183,7 +1226,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
struct ip_die_entry *ip_die_entry;
die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
@@ -1217,30 +1260,32 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
+ struct ip_discovery_top *ip_top;
struct kset *die_kset;
int res, ii;
- if (!adev->mman.discovery_bin)
+ if (!discovery_bin)
return -EINVAL;
- adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL);
- if (!adev->ip_top)
+ ip_top = kzalloc(sizeof(*ip_top), GFP_KERNEL);
+ if (!ip_top)
return -ENOMEM;
- adev->ip_top->adev = adev;
-
- res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype,
+ ip_top->adev = adev;
+ adev->discovery.ip_top = ip_top;
+ res = kobject_init_and_add(&ip_top->kobj, &ip_discovery_ktype,
&adev->dev->kobj, "ip_discovery");
if (res) {
DRM_ERROR("Couldn't init and add ip_discovery/");
goto Err;
}
- die_kset = &adev->ip_top->die_kset;
+ die_kset = &ip_top->die_kset;
kobject_set_name(&die_kset->kobj, "%s", "die");
- die_kset->kobj.parent = &adev->ip_top->kobj;
+ die_kset->kobj.parent = &ip_top->kobj;
die_kset->kobj.ktype = &die_kobj_ktype;
- res = kset_register(&adev->ip_top->die_kset);
+ res = kset_register(&ip_top->die_kset);
if (res) {
DRM_ERROR("Couldn't register die_kset");
goto Err;
@@ -1254,7 +1299,7 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
return res;
Err:
- kobject_put(&adev->ip_top->kobj);
+ kobject_put(&ip_top->kobj);
return res;
}
@@ -1299,10 +1344,11 @@ static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry)
static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
{
+ struct ip_discovery_top *ip_top = adev->discovery.ip_top;
struct list_head *el, *tmp;
struct kset *die_kset;
- die_kset = &adev->ip_top->die_kset;
+ die_kset = &ip_top->die_kset;
spin_lock(&die_kset->list_lock);
list_for_each_prev_safe(el, tmp, &die_kset->list) {
list_del_init(el);
@@ -1311,8 +1357,8 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
spin_lock(&die_kset->list_lock);
}
spin_unlock(&die_kset->list_lock);
- kobject_put(&adev->ip_top->die_kset.kobj);
- kobject_put(&adev->ip_top->kobj);
+ kobject_put(&ip_top->die_kset.kobj);
+ kobject_put(&ip_top->kobj);
}
/* ================================================== */
@@ -1323,6 +1369,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
+ uint8_t *discovery_bin;
struct ip_v4 *ip;
uint16_t die_offset;
uint16_t ip_offset;
@@ -1338,22 +1385,23 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
r = amdgpu_discovery_init(adev);
if (r)
return r;
-
+ discovery_bin = adev->discovery.bin;
wafl_ver = 0;
adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0;
adev->jpeg.inst_mask = 0;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ bhdr = (struct binary_header *)discovery_bin;
+ ihdr = (struct ip_discovery_header
+ *)(discovery_bin +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
num_dies = le16_to_cpu(ihdr->num_dies);
DRM_DEBUG("number of dies: %d\n", num_dies);
for (i = 0; i < num_dies; i++) {
die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
- dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
+ dhdr = (struct die_header *)(discovery_bin + die_offset);
num_ips = le16_to_cpu(dhdr->num_ips);
ip_offset = die_offset + sizeof(*dhdr);
@@ -1367,7 +1415,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(dhdr->die_id), num_ips);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
+ ip = (struct ip_v4 *)(discovery_bin + ip_offset);
inst = ip->instance_number;
hw_id = le16_to_cpu(ip->hw_id);
@@ -1517,16 +1565,16 @@ next_ip:
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct ip_discovery_header *ihdr;
struct binary_header *bhdr;
int vcn_harvest_count = 0;
int umc_harvest_count = 0;
uint16_t offset, ihdr_ver;
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
- offset);
+ ihdr = (struct ip_discovery_header *)(discovery_bin + offset);
ihdr_ver = le16_to_cpu(ihdr->version);
/*
* Harvest table does not fit Navi1x and legacy GPUs,
@@ -1573,22 +1621,23 @@ union gc_info {
static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
union gc_info *gc_info;
u16 offset;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[GC].offset);
if (!offset)
return 0;
- gc_info = (union gc_info *)(adev->mman.discovery_bin + offset);
+ gc_info = (union gc_info *)(discovery_bin + offset);
switch (le16_to_cpu(gc_info->v1.header.version_major)) {
case 1:
@@ -1681,24 +1730,25 @@ union mall_info {
static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
union mall_info *mall_info;
u32 u, mall_size_per_umc, m_s_present, half_use;
u64 mall_size;
u16 offset;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
if (!offset)
return 0;
- mall_info = (union mall_info *)(adev->mman.discovery_bin + offset);
+ mall_info = (union mall_info *)(discovery_bin + offset);
switch (le16_to_cpu(mall_info->v1.header.version_major)) {
case 1:
@@ -1737,12 +1787,13 @@ union vcn_info {
static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct binary_header *bhdr;
union vcn_info *vcn_info;
u16 offset;
int v;
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
return -EINVAL;
}
@@ -1757,13 +1808,13 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
if (!offset)
return 0;
- vcn_info = (union vcn_info *)(adev->mman.discovery_bin + offset);
+ vcn_info = (union vcn_info *)(discovery_bin + offset);
switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
case 1:
@@ -1823,6 +1874,7 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
struct amdgpu_gmc_memrange **ranges,
int *range_cnt, bool refresh)
{
+ uint8_t *discovery_bin = adev->discovery.bin;
struct amdgpu_gmc_memrange *mem_ranges;
struct binary_header *bhdr;
union nps_info *nps_info;
@@ -1839,13 +1891,13 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
return r;
nps_info = &nps_data;
} else {
- if (!adev->mman.discovery_bin) {
+ if (!discovery_bin) {
dev_err(adev->dev,
"fetch mem range failed, ip discovery uninitialized\n");
return -EINVAL;
}
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ bhdr = (struct binary_header *)discovery_bin;
offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
if (!offset)
@@ -1855,8 +1907,7 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
return -ENOENT;
- nps_info =
- (union nps_info *)(adev->mman.discovery_bin + offset);
+ nps_info = (union nps_info *)(discovery_bin + offset);
}
switch (le16_to_cpu(nps_info->v1.header.version_major)) {
@@ -2359,6 +2410,21 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
amdgpu_ip_version(adev, SDMA0_HWIP, 0));
return -EINVAL;
}
+
+ return 0;
+}
+
+static int amdgpu_discovery_set_ras_ip_blocks(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ case IP_VERSION(13, 0, 14):
+ amdgpu_device_ip_block_add(adev, &ras_v1_0_ip_block);
+ break;
+ default:
+ break;
+ }
return 0;
}
@@ -2565,7 +2631,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
@@ -2592,7 +2660,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
@@ -2619,8 +2689,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init(adev);
vega10_reg_base_init(adev);
adev->sdma.num_instances = 1;
+ adev->sdma.sdma_mask = 1;
adev->vcn.num_vcn_inst = 1;
adev->gmc.num_umc = 2;
+ adev->gfx.xcc_mask = 1;
if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
@@ -2665,7 +2737,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init(adev);
vega20_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
adev->gmc.num_umc = 8;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
@@ -2693,8 +2767,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init(adev);
arct_reg_base_init(adev);
adev->sdma.num_instances = 8;
+ adev->sdma.sdma_mask = 0xff;
adev->vcn.num_vcn_inst = 2;
adev->gmc.num_umc = 8;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
@@ -2726,8 +2802,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_discovery_init(adev);
aldebaran_reg_base_init(adev);
adev->sdma.num_instances = 5;
+ adev->sdma.sdma_mask = 0x1f;
adev->vcn.num_vcn_inst = 2;
adev->gmc.num_umc = 4;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
@@ -2762,6 +2840,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
} else {
cyan_skillfish_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->sdma.sdma_mask = 3;
+ adev->gfx.xcc_mask = 1;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);
@@ -3125,6 +3205,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (r)
return r;
+ r = amdgpu_discovery_set_ras_ip_blocks(adev);
+ if (r)
+ return r;
+
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
!amdgpu_sriov_vf(adev)) ||
(adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index b44d56465c5b..4ce04486cc31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -24,9 +24,21 @@
#ifndef __AMDGPU_DISCOVERY__
#define __AMDGPU_DISCOVERY__
+#include <linux/debugfs.h>
+
#define DISCOVERY_TMR_SIZE (10 << 10)
#define DISCOVERY_TMR_OFFSET (64 << 10)
+struct ip_discovery_top;
+
+struct amdgpu_discovery_info {
+ struct debugfs_blob_wrapper debugfs_blob;
+ struct ip_discovery_top *ip_top;
+ uint32_t size;
+ uint8_t *bin;
+ bool reserve_tmr;
+};
+
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 51bab32fd8c6..b5d34797d606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -332,8 +332,6 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
if (crtc->enabled)
active = true;
- pm_runtime_mark_last_busy(dev->dev);
-
adev = drm_to_adev(dev);
/* if we have active crtcs and we don't have a power ref,
* take the current one
@@ -1365,6 +1363,64 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
};
+/**
+ * DOC: property for adaptive backlight modulation
+ *
+ * The 'adaptive backlight modulation' property is used for the compositor to
+ * directly control the adaptive backlight modulation power savings feature
+ * that is part of DCN hardware.
+ *
+ * The property will be attached specifically to eDP panels that support it.
+ *
+ * The property is by default set to 'sysfs' to allow the sysfs file 'panel_power_savings'
+ * to be able to control it.
+ * If set to 'off' the compositor will ensure it stays off.
+ * The other values 'min', 'bias min', 'bias max', and 'max' will control the
+ * intensity of the power savings.
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+static int amdgpu_display_setup_abm_prop(struct amdgpu_device *adev)
+{
+ const struct drm_prop_enum_list props[] = {
+ { ABM_SYSFS_CONTROL, "sysfs" },
+ { ABM_LEVEL_OFF, "off" },
+ { ABM_LEVEL_MIN, "min" },
+ { ABM_LEVEL_BIAS_MIN, "bias min" },
+ { ABM_LEVEL_BIAS_MAX, "bias max" },
+ { ABM_LEVEL_MAX, "max" },
+ };
+ struct drm_property *prop;
+ int i;
+
+ if (!adev->dc_enabled)
+ return 0;
+
+ prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_ENUM,
+ "adaptive backlight modulation",
+ 6);
+ if (!prop)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(props); i++) {
+ int ret;
+
+ ret = drm_property_add_enum(prop, props[i].type,
+ props[i].name);
+
+ if (ret) {
+ drm_property_destroy(adev_to_drm(adev), prop);
+
+ return ret;
+ }
+ }
+
+ adev->mode_info.abm_level_property = prop;
+
+ return 0;
+}
+
int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
{
int sz;
@@ -1411,7 +1467,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
"dither",
amdgpu_dither_enum_list, sz);
- return 0;
+ return amdgpu_display_setup_abm_prop(adev);
}
void amdgpu_display_update_priority(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 930c171473b4..49a29bf47a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -55,4 +55,11 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev);
int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
struct drm_scanout_buffer *sb);
+#define ABM_SYSFS_CONTROL -1
+#define ABM_LEVEL_OFF 0
+#define ABM_LEVEL_MIN 1
+#define ABM_LEVEL_BIAS_MIN 2
+#define ABM_LEVEL_BIAS_MAX 3
+#define ABM_LEVEL_MAX 4
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bff25ef3e2d0..3776901bbb1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
AMDGPU_DEBUG_SMU_POOL = BIT(7),
AMDGPU_DEBUG_VM_USERPTR = BIT(8),
- AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
+ AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
+ AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -353,22 +354,16 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
- * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
- * multiple values specified. 0 and negative values are invalidated. They will be adjusted
- * to the default timeout.
+ * The format can be [single value] for setting all timeouts at once or
+ * [GFX,Compute,SDMA,Video] to set individual timeouts.
+ * Negative values mean infinity.
*
- * - With one value specified, the setting will apply to all non-compute jobs.
- * - With multiple values specified, the first one will be for GFX.
- * The second one is for Compute. The third and fourth ones are
- * for SDMA and Video.
- *
- * By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
+ * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
*/
MODULE_PARM_DESC(lockup_timeout,
- "GPU lockup timeout in ms (default: 10000 for all jobs. "
- "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
- "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
-module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
+ "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout,
+ sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
@@ -2233,7 +2228,6 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
adev->pdev->bus->number, i);
if (p) {
pm_runtime_get_sync(&p->dev);
- pm_runtime_mark_last_busy(&p->dev);
pm_runtime_put_autosuspend(&p->dev);
pci_dev_put(p);
}
@@ -2289,6 +2283,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: disable kernel logs of correctable errors\n");
adev->debug_disable_ce_logs = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
+ pr_info("debug: allowing command submission to CE engine\n");
+ adev->debug_enable_ce_cs = true;
+ }
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2474,7 +2473,6 @@ retry_init:
pm_runtime_allow(ddev->dev);
- pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
pci_wake_from_d3(pdev, TRUE);
@@ -2558,7 +2556,8 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
*/
if (!amdgpu_passthrough(adev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
- amdgpu_device_ip_suspend(adev);
+ amdgpu_device_prepare(dev);
+ amdgpu_device_suspend(dev, true);
adev->mp1_state = PP_MP1_STATE_NONE;
}
@@ -2771,22 +2770,8 @@ static int amdgpu_runtime_idle_check_userq(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct drm_device *drm_dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
- int ret = 0;
-
- mutex_lock(&adev->userq_mutex);
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- ret = -EBUSY;
- goto done;
- }
- }
-done:
- mutex_unlock(&adev->userq_mutex);
- return ret;
+ return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
}
static int amdgpu_pmops_runtime_suspend(struct device *dev)
@@ -2933,7 +2918,6 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
ret = amdgpu_runtime_idle_check_userq(dev);
done:
- pm_runtime_mark_last_busy(dev);
pm_runtime_autosuspend(dev);
return ret;
}
@@ -2969,7 +2953,6 @@ long amdgpu_drm_ioctl(struct file *filp,
ret = drm_ioctl(filp, cmd, arg);
- pm_runtime_mark_last_busy(dev->dev);
out:
pm_runtime_put_autosuspend(dev->dev);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index fd8cca241da6..c7843e336310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -45,16 +45,11 @@
* Cast helper
*/
static const struct dma_fence_ops amdgpu_fence_ops;
-static const struct dma_fence_ops amdgpu_job_fence_ops;
static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
- if (__f->base.ops == &amdgpu_fence_ops ||
- __f->base.ops == &amdgpu_job_fence_ops)
- return __f;
-
- return NULL;
+ return __f;
}
/**
@@ -98,51 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @f: resulting fence object
* @af: amdgpu fence input
* @flags: flags to pass into the subordinate .emit_fence() call
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
- struct amdgpu_fence *af, unsigned int flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags)
{
struct amdgpu_device *adev = ring->adev;
struct dma_fence *fence;
- struct amdgpu_fence *am_fence;
struct dma_fence __rcu **ptr;
uint32_t seq;
int r;
- if (!af) {
- /* create a separate hw fence */
- am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
- if (!am_fence)
- return -ENOMEM;
- } else {
- am_fence = af;
- }
- fence = &am_fence->base;
- am_fence->ring = ring;
+ fence = &af->base;
+ af->ring = ring;
seq = ++ring->fence_drv.sync_seq;
- am_fence->seq = seq;
- if (af) {
- dma_fence_init(fence, &amdgpu_job_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx, seq);
- /* Against remove in amdgpu_job_{free, free_cb} */
- dma_fence_get(fence);
- } else {
- dma_fence_init(fence, &amdgpu_fence_ops,
- &ring->fence_drv.lock,
- adev->fence_context + ring->idx, seq);
- }
+ dma_fence_init(fence, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx, seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
- amdgpu_fence_save_wptr(fence);
+ amdgpu_fence_save_wptr(af);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -167,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
*/
rcu_assign_pointer(*ptr, dma_fence_get(fence));
- *f = fence;
-
return 0;
}
@@ -276,7 +250,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence);
dma_fence_put(fence);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
} while (last_seq != seq);
@@ -670,36 +643,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
- *
- * @ring: fence of the ring to be cleared
- *
- */
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
-{
- int i;
- struct dma_fence *old, **ptr;
-
- for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
- ptr = &ring->fence_drv.fences[i];
- old = rcu_dereference_protected(*ptr, 1);
- if (old && old->ops == &amdgpu_job_fence_ops) {
- struct amdgpu_job *job;
-
- /* For non-scheduler bad job, i.e. failed ib test, we need to signal
- * it right here or we won't be able to track them in fence_drv
- * and they will remain unsignaled during sa_bo free.
- */
- job = container_of(old, struct amdgpu_job, hw_fence.base);
- if (!job->base.s_fence && !dma_fence_is_signaled(old))
- dma_fence_signal(old);
- RCU_INIT_POINTER(*ptr, NULL);
- dma_fence_put(old);
- }
- }
-}
-
-/**
* amdgpu_fence_driver_set_error - set error code on fences
* @ring: the ring which contains the fences
* @error: the error code to set
@@ -755,21 +698,50 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
/**
* amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
*
- * @fence: fence of the ring to signal
+ * @af: fence of the ring to signal
*
*/
-void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence)
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
{
- dma_fence_set_error(&fence->base, -ETIME);
- amdgpu_fence_write(fence->ring, fence->seq);
- amdgpu_fence_process(fence->ring);
+ struct dma_fence *unprocessed;
+ struct dma_fence __rcu **ptr;
+ struct amdgpu_fence *fence;
+ struct amdgpu_ring *ring = af->ring;
+ unsigned long flags;
+ u32 seq, last_seq;
+
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+
+ /* mark all fences from the guilty context with an error */
+ spin_lock_irqsave(&ring->fence_drv.lock, flags);
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
+ rcu_read_lock();
+ unprocessed = rcu_dereference(*ptr);
+
+ if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
+ fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+ if (fence == af)
+ dma_fence_set_error(&fence->base, -ETIME);
+ else if (fence->context == af->context)
+ dma_fence_set_error(&fence->base, -ECANCELED);
+ }
+ rcu_read_unlock();
+ } while (last_seq != seq);
+ spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
+ /* signal the guilty fence */
+ amdgpu_fence_write(ring, (u32)af->base.seqno);
+ amdgpu_fence_process(ring);
}
-void amdgpu_fence_save_wptr(struct dma_fence *fence)
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
{
- struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base);
-
- am_fence->wptr = am_fence->ring->wptr;
+ af->wptr = af->ring->wptr;
}
static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
@@ -790,14 +762,19 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
struct dma_fence *unprocessed;
struct dma_fence __rcu **ptr;
struct amdgpu_fence *fence;
- u64 wptr, i, seqno;
+ u64 wptr;
+ u32 seq, last_seq;
- seqno = amdgpu_fence_read(ring);
+ last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+ seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
wptr = ring->fence_drv.signalled_wptr;
ring->ring_backup_entries_to_copy = 0;
- for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) {
- ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask];
+ do {
+ last_seq++;
+ last_seq &= ring->fence_drv.num_fences_mask;
+
+ ptr = &ring->fence_drv.fences[last_seq];
rcu_read_lock();
unprocessed = rcu_dereference(*ptr);
@@ -813,7 +790,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
wptr = fence->wptr;
}
rcu_read_unlock();
- }
+ } while (last_seq != seq);
}
/*
@@ -830,13 +807,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
return (const char *)to_amdgpu_fence(f)->ring->name;
}
-static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
-{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
- return (const char *)to_amdgpu_ring(job->base.sched)->name;
-}
-
/**
* amdgpu_fence_enable_signaling - enable signalling on fence
* @f: fence
@@ -854,23 +824,6 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
}
/**
- * amdgpu_job_fence_enable_signaling - enable signalling on job fence
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_enable_signaling above, it
- * only handles the job embedded fence.
- */
-static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
-{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
- if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
-
- return true;
-}
-
-/**
* amdgpu_fence_free - free up the fence memory
*
* @rcu: RCU callback head
@@ -886,21 +839,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
}
/**
- * amdgpu_job_fence_free - free up the job with embedded fence
- *
- * @rcu: RCU callback head
- *
- * Free up the job with embedded fence after the RCU grace period.
- */
-static void amdgpu_job_fence_free(struct rcu_head *rcu)
-{
- struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
-
- /* free job if fence has a parent job */
- kfree(container_of(f, struct amdgpu_job, hw_fence.base));
-}
-
-/**
* amdgpu_fence_release - callback that fence can be freed
*
* @f: fence
@@ -913,19 +851,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
call_rcu(&f->rcu, amdgpu_fence_free);
}
-/**
- * amdgpu_job_fence_release - callback that job embedded fence can be freed
- *
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_release above, it
- * only handles the job embedded fence.
- */
-static void amdgpu_job_fence_release(struct dma_fence *f)
-{
- call_rcu(&f->rcu, amdgpu_job_fence_free);
-}
-
static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
@@ -933,13 +858,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
.release = amdgpu_fence_release,
};
-static const struct dma_fence_ops amdgpu_job_fence_ops = {
- .get_driver_name = amdgpu_fence_get_driver_name,
- .get_timeline_name = amdgpu_job_fence_get_timeline_name,
- .enable_signaling = amdgpu_job_fence_enable_signaling,
- .release = amdgpu_job_fence_release,
-};
-
/*
* Fence debugfs
*/
@@ -1009,7 +927,6 @@ static int gpu_recover_get(void *data, u64 *val)
*val = atomic_read(&adev->reset_domain->reset_res);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index b2033f8352f5..83f3b94ed975 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -302,7 +302,6 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages)
{
unsigned t;
- unsigned p;
int i, j;
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
@@ -316,8 +315,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
return;
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- for (i = 0; i < pages; i++, p++) {
+ for (i = 0; i < pages; i++) {
page_base = adev->dummy_page_addr;
if (!adev->gart.ptr)
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index b7ebae289bea..3e38c5db2987 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -198,7 +198,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
amdgpu_hmm_unregister(aobj);
- ttm_bo_put(&aobj->tbo);
+ ttm_bo_fini(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
@@ -531,7 +531,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
struct drm_amdgpu_gem_userptr *args = data;
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_gem_object *gobj;
- struct hmm_range *range;
+ struct amdgpu_hmm_range *range;
struct amdgpu_bo *bo;
uint32_t handle;
int r;
@@ -572,10 +572,14 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto release_object;
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo, &range);
- if (r)
+ range = amdgpu_hmm_range_alloc(NULL);
+ if (unlikely(!range))
+ return -ENOMEM;
+ r = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (r) {
+ amdgpu_hmm_range_free(range);
goto release_object;
-
+ }
r = amdgpu_bo_reserve(bo, true);
if (r)
goto user_pages_done;
@@ -597,8 +601,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
user_pages_done:
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
-
+ amdgpu_hmm_range_free(range);
release_object:
drm_gem_object_put(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ebe2b4c68b0f..8b118c53f351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -33,6 +33,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_xcp.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_mes.h"
#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
@@ -1194,6 +1195,75 @@ failed_kiq_write:
dev_err(adev->dev, "failed to write reg:%x\n", reg);
}
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
+ return amdgpu_mes_hdp_flush(adev);
+
+ if (!ring->funcs->emit_hdp_flush) {
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_ring_alloc(ring, 32);
+ if (r)
+ goto failed_unlock;
+
+ amdgpu_ring_emit_hdp_flush(ring);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_hdp_flush;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ if (amdgpu_in_reset(adev))
+ goto failed_kiq_hdp_flush;
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
+ dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_hdp_flush:
+ dev_err(adev->dev, "failed to flush HDP via KIQ\n");
+ return r < 0 ? r : -EIO;
+}
+
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
{
if (amdgpu_num_kcq == -1) {
@@ -1600,7 +1670,6 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
ret = amdgpu_gfx_run_cleaner_shader(adev, value);
- pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
if (ret)
@@ -2485,3 +2554,4 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
&amdgpu_debugfs_compute_sched_mask_fops);
#endif
}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index fb5f7a0ee029..efd61a1ccc66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -615,6 +615,7 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index 2c6a6b858112..518ca3f4db2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -168,17 +168,13 @@ void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner,
- struct hmm_range **phmm_range)
+ struct amdgpu_hmm_range *range)
{
- struct hmm_range *hmm_range;
unsigned long end;
unsigned long timeout;
unsigned long *pfns;
int r = 0;
-
- hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
- if (unlikely(!hmm_range))
- return -ENOMEM;
+ struct hmm_range *hmm_range = &range->hmm_range;
pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
if (unlikely(!pfns)) {
@@ -221,28 +217,79 @@ retry:
hmm_range->start = start;
hmm_range->hmm_pfns = pfns;
- *phmm_range = hmm_range;
-
return 0;
out_free_pfns:
kvfree(pfns);
+ hmm_range->hmm_pfns = NULL;
out_free_range:
- kfree(hmm_range);
-
if (r == -EBUSY)
r = -EAGAIN;
return r;
}
-bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+/**
+ * amdgpu_hmm_range_valid - check if an HMM range is still valid
+ * @range: pointer to the &struct amdgpu_hmm_range to validate
+ *
+ * Determines whether the given HMM range @range is still valid by
+ * checking for invalidations via the MMU notifier sequence. This is
+ * typically used to verify that the range has not been invalidated
+ * by concurrent address space updates before it is accessed.
+ *
+ * Return:
+ * * true if @range is valid and can be used safely
+ * * false if @range is NULL or has been invalidated
+ */
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
{
- bool r;
+ if (!range)
+ return false;
- r = mmu_interval_read_retry(hmm_range->notifier,
- hmm_range->notifier_seq);
- kvfree(hmm_range->hmm_pfns);
- kfree(hmm_range);
+ return !mmu_interval_read_retry(range->hmm_range.notifier,
+ range->hmm_range.notifier_seq);
+}
- return r;
+/**
+ * amdgpu_hmm_range_alloc - allocate and initialize an AMDGPU HMM range
+ * @bo: optional buffer object to associate with this HMM range
+ *
+ * Allocates memory for amdgpu_hmm_range and associates it with the @bo passed.
+ * The reference count of the @bo is incremented.
+ *
+ * Return:
+ * Pointer to a newly allocated struct amdgpu_hmm_range on success,
+ * or NULL if memory allocation fails.
+ */
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ struct amdgpu_hmm_range *range;
+
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range)
+ return NULL;
+
+ range->bo = amdgpu_bo_ref(bo);
+ return range;
+}
+
+/**
+ * amdgpu_hmm_range_free - release an AMDGPU HMM range
+ * @range: pointer to the range object to free
+ *
+ * Releases all resources held by @range, including the associated
+ * hmm_pfns and the dropping reference of associated bo if any.
+ *
+ * Return: void
+ */
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range)
+{
+ if (!range)
+ return;
+
+ if (range->hmm_range.hmm_pfns)
+ kvfree(range->hmm_range.hmm_pfns);
+
+ amdgpu_bo_unref(&range->bo);
+ kfree(range);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index 953e1d06de20..140bc9cd57b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -31,13 +31,20 @@
#include <linux/interval_tree.h>
#include <linux/mmu_notifier.h>
+struct amdgpu_hmm_range {
+ struct hmm_range hmm_range;
+ struct amdgpu_bo *bo;
+};
+
int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
uint64_t start, uint64_t npages, bool readonly,
void *owner,
- struct hmm_range **phmm_range);
-bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+ struct amdgpu_hmm_range *range);
#if defined(CONFIG_HMM_MIRROR)
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
#else
@@ -47,7 +54,20 @@ static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
"add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
return -ENODEV;
}
+
static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
+
+static inline bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+ return false;
+}
+
+static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+ return NULL;
+}
+
+static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
#endif
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7d9bcb72e8dd..39229ece83f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -149,17 +149,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
if (job) {
vm = job->vm;
fence_ctx = job->base.s_fence ?
- job->base.s_fence->scheduled.context : 0;
+ job->base.s_fence->finished.context : 0;
shadow_va = job->shadow_va;
csa_va = job->csa_va;
gds_va = job->gds_va;
init_shadow = job->init_shadow;
- af = &job->hw_fence;
+ af = job->hw_fence;
/* Save the context of the job for reset handling.
* The driver needs this so it can skip the ring
* contents for guilty contexts.
*/
- af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+ af->context = fence_ctx;
+ /* the vm fence is also part of the job's context */
+ job->hw_vm_fence->context = fence_ctx;
} else {
vm = NULL;
fence_ctx = 0;
@@ -167,7 +169,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
csa_va = 0;
gds_va = 0;
init_shadow = false;
- af = NULL;
+ af = kzalloc(sizeof(*af), GFP_ATOMIC);
+ if (!af)
+ return -ENOMEM;
}
if (!ring->sched.ready) {
@@ -289,7 +293,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
}
- r = amdgpu_fence_emit(ring, f, af, fence_flags);
+ r = amdgpu_fence_emit(ring, af, fence_flags);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vmid)
@@ -297,6 +301,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_undo(ring);
return r;
}
+ *f = &af->base;
if (ring->funcs->insert_end)
ring->funcs->insert_end(ring);
@@ -317,7 +322,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
* fence so we know what rings contents to backup
* after we reset the queue.
*/
- amdgpu_fence_save_wptr(*f);
+ amdgpu_fence_save_wptr(af);
amdgpu_ring_ib_end(ring);
amdgpu_ring_commit(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d020a890a0ea..e08d837668f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -137,7 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ring->funcs->reset) {
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
- r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
+ r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
if (!r) {
atomic_inc(&ring->adev->gpu_reset_counter);
dev_err(adev->dev, "Ring %s reset succeeded\n",
@@ -186,6 +186,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned int num_ibs, struct amdgpu_job **job,
u64 drm_client_id)
{
+ struct amdgpu_fence *af;
+ int r;
+
if (num_ibs == 0)
return -EINVAL;
@@ -193,6 +196,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!*job)
return -ENOMEM;
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_job;
+ }
+ (*job)->hw_fence = af;
+
+ af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+ if (!af) {
+ r = -ENOMEM;
+ goto err_fence;
+ }
+ (*job)->hw_vm_fence = af;
+
(*job)->vm = vm;
amdgpu_sync_create(&(*job)->explicit_sync);
@@ -204,6 +221,13 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return drm_sched_job_init(&(*job)->base, entity, 1, owner,
drm_client_id);
+
+err_fence:
+ kfree((*job)->hw_fence);
+err_job:
+ kfree(*job);
+
+ return r;
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
@@ -251,11 +275,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
struct dma_fence *f;
unsigned i;
- /* Check if any fences where initialized */
+ /* Check if any fences were initialized */
if (job->base.s_fence && job->base.s_fence->finished.ops)
f = &job->base.s_fence->finished;
- else if (job->hw_fence.base.ops)
- f = &job->hw_fence.base;
+ else if (job->hw_fence && job->hw_fence->base.ops)
+ f = &job->hw_fence->base;
else
f = NULL;
@@ -271,11 +295,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->explicit_sync);
- /* only put the hw fence if has embedded fence */
- if (!job->hw_fence.base.ops)
- kfree(job);
- else
- dma_fence_put(&job->hw_fence.base);
+ kfree(job);
}
void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -304,10 +324,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
if (job->gang_submit != &job->base.s_fence->scheduled)
dma_fence_put(job->gang_submit);
- if (!job->hw_fence.base.ops)
- kfree(job);
- else
- dma_fence_put(&job->hw_fence.base);
+ kfree(job);
}
struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 4a6487eb6cb5..7abf069d17d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -64,7 +64,8 @@ struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
struct amdgpu_sync explicit_sync;
- struct amdgpu_fence hw_fence;
+ struct amdgpu_fence *hw_fence;
+ struct amdgpu_fence *hw_vm_fence;
struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 6b7d66b6d4cc..63ee6ba6a931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -371,7 +371,7 @@ static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val)
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
ring = &adev->jpeg.inst[i].ring_dec[j];
- if (val & (BIT_ULL(1) << ((i * adev->jpeg.num_jpeg_rings) + j)))
+ if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j)))
ring->sched.ready = true;
else
ring->sched.ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index a9327472c651..6ee77f431d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -758,7 +758,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VRAM_USAGE:
- ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+ ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
case AMDGPU_INFO_VIS_VRAM_USAGE:
ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
@@ -804,8 +805,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
mem.vram.usable_heap_size = adev->gmc.real_vram_size -
atomic64_read(&adev->vram_pin_size) -
AMDGPU_VM_RESERVED_VRAM;
- mem.vram.heap_usage =
- ttm_resource_manager_usage(vram_man);
+ mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(vram_man) : 0;
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
@@ -1470,7 +1471,6 @@ error_pasid:
kfree(fpriv);
out_suspend:
- pm_runtime_mark_last_busy(dev->dev);
pm_put:
pm_runtime_put_autosuspend(dev->dev);
@@ -1538,7 +1538,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
kfree(fpriv);
file_priv->driver_priv = NULL;
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 5bf9be073cdd..9c182ce501af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -105,8 +105,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
spin_lock_init(&adev->mes.ring_lock[i]);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
- adev->mes.vmid_mask_mmhub = 0xffffff00;
- adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00;
+ adev->mes.vmid_mask_mmhub = 0xFF00;
+ adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
@@ -409,7 +409,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
return -EINVAL;
/* Clear the doorbell array before detection */
- memset(adev->mes.hung_queue_db_array_cpu_addr, 0,
+ memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
adev->mes.hung_queue_db_array_size * sizeof(u32));
input.queue_type = queue_type;
input.detect_only = detect_only;
@@ -420,12 +420,17 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
dev_err(adev->dev, "failed to detect and reset\n");
} else {
*hung_db_num = 0;
- for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) {
+ for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
hung_db_array[i] = db_array[i];
*hung_db_num += 1;
}
}
+
+ /*
+ * TODO: return HQD info for MES scheduled user compute queue reset cases
+ * stored in hung_db_array hqd info offset to full array size
+ */
}
return r;
@@ -523,6 +528,18 @@ error:
return r;
}
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
+{
+ uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+
+ hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
+ hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
+ ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+ return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
+ ref_and_mask, ref_and_mask);
+}
+
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
@@ -686,14 +703,11 @@ out:
bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
{
uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
- bool is_supported = false;
-
- if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
- mes_rev >= 0x63)
- is_supported = true;
- return is_supported;
+ return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+ mes_rev >= 0x63) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0));
}
/* Fix me -- node_id is used to identify the correct MES instances in the future */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 6b506fc72f58..e989225b354b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -149,6 +149,7 @@ struct amdgpu_mes {
void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
int hung_queue_db_array_size;
+ int hung_queue_hqd_info_offset;
struct amdgpu_bo *hung_queue_db_array_gpu_obj;
uint64_t hung_queue_db_array_gpu_addr;
void *hung_queue_db_array_cpu_addr;
@@ -238,6 +239,7 @@ struct mes_add_queue_input {
struct mes_remove_queue_input {
uint32_t doorbell_offset;
uint64_t gang_context_addr;
+ bool remove_queue_after_reset;
};
struct mes_map_legacy_queue_input {
@@ -427,6 +429,7 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr,
uint32_t spi_gdbg_per_vmid_cntl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 20460cfd09bc..dc8d2f52c7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -326,6 +326,8 @@ struct amdgpu_mode_info {
struct drm_property *audio_property;
/* FMT dithering */
struct drm_property *dither_property;
+ /* Adaptive Backlight Modulation (power feature) */
+ struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
const struct drm_edid *bios_hardcoded_edid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 656b8a931dae..52c2d1731aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -96,6 +96,7 @@ struct amdgpu_bo_va {
* if non-zero, cannot unmap from GPU because user queues may still access it
*/
unsigned int queue_refcount;
+ atomic_t userq_va_mapped;
};
struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
index 123bcf5c2bb1..bacf888735db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
@@ -101,7 +101,6 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
}
amdgpu_gfx_off_ctrl(adev, true);
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e0ee21150860..c8b4dd3ea5c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -41,6 +41,7 @@
#include "atom.h"
#include "amdgpu_reset.h"
#include "amdgpu_psp.h"
+#include "amdgpu_ras_mgr.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -611,6 +612,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
return size;
}
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev);
+
/**
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
@@ -635,6 +638,11 @@ static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
(struct amdgpu_device *)file_inode(f)->i_private;
int ret;
+ if (amdgpu_uniras_enabled(adev)) {
+ ret = amdgpu_uniras_clear_badpages_info(adev);
+ return ret ? ret : size;
+ }
+
ret = amdgpu_ras_eeprom_reset_table(
&(amdgpu_ras_get_context(adev)->eeprom_control));
@@ -1542,9 +1550,51 @@ out_fini_err_data:
return ret;
}
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev)
+{
+ struct ras_cmd_dev_handle req = {0};
+ int ret;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CLEAR_BAD_PAGE_INFO,
+ &req, sizeof(req), NULL, 0);
+ if (ret) {
+ dev_err(adev->dev, "Failed to clear bad pages info, ret: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev,
+ struct ras_query_if *info)
+{
+ struct ras_cmd_block_ecc_info_req req = {0};
+ struct ras_cmd_block_ecc_info_rsp rsp = {0};
+ int ret;
+
+ if (!info)
+ return -EINVAL;
+
+ req.block_id = info->head.block;
+ req.subblock_id = info->head.sub_block_index;
+
+ ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BLOCK_ECC_STATUS,
+ &req, sizeof(req), &rsp, sizeof(rsp));
+ if (!ret) {
+ info->ce_count = rsp.ce_count;
+ info->ue_count = rsp.ue_count;
+ info->de_count = rsp.de_count;
+ }
+
+ return ret;
+}
+
int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
{
- return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_query_block_ecc(adev, info);
+ else
+ return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
}
int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
@@ -1596,6 +1646,27 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
return 0;
}
+static int amdgpu_uniras_error_inject(struct amdgpu_device *adev,
+ struct ras_inject_if *info)
+{
+ struct ras_cmd_inject_error_req inject_req;
+ struct ras_cmd_inject_error_rsp rsp;
+
+ if (!info)
+ return -EINVAL;
+
+ memset(&inject_req, 0, sizeof(inject_req));
+ inject_req.block_id = info->head.block;
+ inject_req.subblock_id = info->head.sub_block_index;
+ inject_req.address = info->address;
+ inject_req.error_type = info->head.type;
+ inject_req.instance_mask = info->instance_mask;
+ inject_req.value = info->value;
+
+ return amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__INJECT_ERROR,
+ &inject_req, sizeof(inject_req), &rsp, sizeof(rsp));
+}
+
/* wrapper of psp_ras_trigger_error */
int amdgpu_ras_error_inject(struct amdgpu_device *adev,
struct ras_inject_if *info)
@@ -1613,6 +1684,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
info->head.block,
info->head.sub_block_index);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_uniras_error_inject(adev, info);
+
/* inject on guest isn't allowed, return success directly */
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1757,7 +1831,9 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count);
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start);
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
{
@@ -1815,19 +1891,50 @@ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
unsigned int end = div64_ul(ppos + count - 1, element_size);
ssize_t s = 0;
struct ras_badpage *bps = NULL;
- unsigned int bps_count = 0;
+ int bps_count = 0, i, status;
+ uint64_t address;
memset(buf, 0, count);
- if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ bps_count = end - start;
+ bps = kmalloc_array(bps_count, sizeof(*bps), GFP_KERNEL);
+ if (!bps)
return 0;
- for (; start < end && start < bps_count; start++)
+ memset(bps, 0, sizeof(*bps) * bps_count);
+
+ if (amdgpu_uniras_enabled(adev))
+ bps_count = amdgpu_uniras_badpages_read(adev, bps, bps_count, start);
+ else
+ bps_count = amdgpu_ras_badpages_read(adev, bps, bps_count, start);
+
+ if (bps_count <= 0) {
+ kfree(bps);
+ return 0;
+ }
+
+ for (i = 0; i < bps_count; i++) {
+ address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT;
+ if (amdgpu_ras_check_critical_address(adev, address))
+ continue;
+
+ bps[i].size = AMDGPU_GPU_PAGE_SIZE;
+
+ status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
+ address);
+ if (status == -EBUSY)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+ else if (status == -ENOENT)
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ else
+ bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
+
s += scnprintf(&buf[s], element_size + 1,
"0x%08x : 0x%08x : %1s\n",
- bps[start].bp,
- bps[start].size,
- amdgpu_ras_badpage_flags_str(bps[start].flags));
+ bps[i].bp,
+ bps[i].size,
+ amdgpu_ras_badpage_flags_str(bps[i].flags));
+ }
kfree(bps);
@@ -1843,12 +1950,42 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
+static bool amdgpu_ras_get_version_info(struct amdgpu_device *adev, u32 *major,
+ u32 *minor, u32 *rev)
+{
+ int i;
+
+ if (!adev || !major || !minor || !rev || !amdgpu_uniras_enabled(adev))
+ return false;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_RAS) {
+ *major = adev->ip_blocks[i].version->major;
+ *minor = adev->ip_blocks[i].version->minor;
+ *rev = adev->ip_blocks[i].version->rev;
+ return true;
+ }
+ }
+
+ return false;
+}
+
static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, version_attr);
- return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
+ u32 major, minor, rev;
+ ssize_t size = 0;
+
+ size += sysfs_emit_at(buf, size, "table version: 0x%x\n",
+ con->eeprom_control.tbl_hdr.version);
+
+ if (amdgpu_ras_get_version_info(con->adev, &major, &minor, &rev))
+ size += sysfs_emit_at(buf, size, "ras version: %u.%u.%u\n",
+ major, minor, rev);
+
+ return size;
}
static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
@@ -2241,6 +2378,11 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
return;
+ if (amdgpu_uniras_enabled(adev)) {
+ amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
+ return;
+ }
+
if (adev->nbio.ras &&
adev->nbio.ras->handle_ras_controller_intr_no_bifring)
adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
@@ -2411,6 +2553,16 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_manager *obj;
struct ras_ih_data *data;
+ if (amdgpu_uniras_enabled(adev)) {
+ struct ras_ih_info ih_info;
+
+ memset(&ih_info, 0, sizeof(ih_info));
+ ih_info.block = info->head.block;
+ memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
+
+ return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
+ }
+
obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;
@@ -2605,62 +2757,83 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
}
}
-/* recovery begin */
-
-/* return 0 on success.
- * caller need free bps.
- */
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
- struct ras_badpage **bps, unsigned int *count)
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
- int i = 0;
- int ret = 0, status;
+ int r = 0;
+ uint32_t i;
if (!con || !con->eh_data || !bps || !count)
return -EINVAL;
mutex_lock(&con->recovery_lock);
data = con->eh_data;
- if (!data || data->count == 0) {
- *bps = NULL;
- ret = -EINVAL;
- goto out;
+ if (start < data->count) {
+ for (i = start; i < data->count; i++) {
+ if (!data->bps[i].ts)
+ continue;
+
+ bps[r].bp = data->bps[i].retired_page;
+ r++;
+ if (r >= count)
+ break;
+ }
}
+ mutex_unlock(&con->recovery_lock);
- *bps = kmalloc_array(data->count, sizeof(struct ras_badpage), GFP_KERNEL);
- if (!*bps) {
- ret = -ENOMEM;
- goto out;
- }
+ return r;
+}
- for (; i < data->count; i++) {
- if (!data->bps[i].ts)
- continue;
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage *bps, uint32_t count, uint32_t start)
+{
+ struct ras_cmd_bad_pages_info_req cmd_input;
+ struct ras_cmd_bad_pages_info_rsp *output;
+ uint32_t group, start_group, end_group;
+ uint32_t pos, pos_in_group;
+ int r = 0, i;
- (*bps)[i] = (struct ras_badpage){
- .bp = data->bps[i].retired_page,
- .size = AMDGPU_GPU_PAGE_SIZE,
- .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
- };
+ if (!bps || !count)
+ return -EINVAL;
- if (amdgpu_ras_check_critical_address(adev,
- data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
- continue;
+ output = kmalloc(sizeof(*output), GFP_KERNEL);
+ if (!output)
+ return -ENOMEM;
- status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
- data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT);
- if (status == -EBUSY)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
- else if (status == -ENOENT)
- (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+ memset(&cmd_input, 0, sizeof(cmd_input));
+
+ start_group = start / RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ end_group = (start + count + RAS_CMD_MAX_BAD_PAGES_PER_GROUP - 1) /
+ RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+
+ pos = start;
+ for (group = start_group; group < end_group; group++) {
+ memset(output, 0, sizeof(*output));
+ cmd_input.group_index = group;
+ if (amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BAD_PAGES,
+ &cmd_input, sizeof(cmd_input), output, sizeof(*output)))
+ goto out;
+
+ if (pos >= output->bp_total_cnt)
+ goto out;
+
+ pos_in_group = pos - group * RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+ for (i = pos_in_group; i < output->bp_in_group; i++, pos++) {
+ if (!output->records[i].ts)
+ continue;
+
+ bps[r].bp = output->records[i].retired_page;
+ r++;
+ if (r >= count)
+ goto out;
+ }
}
- *count = con->bad_page_num;
out:
- mutex_unlock(&con->recovery_lock);
- return ret;
+ kfree(output);
+ return r;
}
static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
@@ -3126,7 +3299,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
*new_cnt = unit_num;
/* only new entries are saved */
- if (unit_num > 0) {
+ if (unit_num && save_count) {
/*old asics only save pa to eeprom like before*/
if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
if (amdgpu_ras_eeprom_append(control,
@@ -3590,6 +3763,9 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
if (!con || amdgpu_sriov_vf(adev))
return 0;
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
control = &con->eeprom_control;
ret = amdgpu_ras_eeprom_init(control);
control->is_eeprom_valid = !ret;
@@ -3975,7 +4151,6 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
atomic_set(&con->ras_ue_count, ue_count);
}
- pm_runtime_mark_last_busy(dev->dev);
Out:
pm_runtime_put_autosuspend(dev->dev);
}
@@ -4584,6 +4759,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
struct ras_event_state *event_state;
int ret = 0;
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
+
if (type >= RAS_EVENT_TYPE_COUNT) {
ret = -EINVAL;
goto out;
@@ -4634,20 +4812,18 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type
return id;
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
- u64 event_id;
+ u64 event_id = RAS_EVENT_INVALID_ID;
- if (amdgpu_ras_mark_ras_event(adev, type)) {
- dev_err(adev->dev,
- "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n");
- return;
- }
+ if (amdgpu_uniras_enabled(adev))
+ return 0;
- event_id = amdgpu_ras_acquire_event_id(adev, type);
+ if (!amdgpu_ras_mark_ras_event(adev, type))
+ event_id = amdgpu_ras_acquire_event_id(adev, type);
RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
"(ERREVENT_ATHUB_INTERRUPT) detected!\n");
@@ -4656,6 +4832,8 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
+
+ return -EBUSY;
}
bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
@@ -5408,6 +5586,9 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_is_rma(adev);
+
if (!con)
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6cf0dfd38be8..556cf4d7b5ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -504,6 +504,7 @@ struct ras_critical_region {
};
struct amdgpu_ras {
+ void *ras_mgr;
/* ras infrastructure */
/* for ras itself. */
uint32_t features;
@@ -909,7 +910,7 @@ static inline void amdgpu_ras_intr_cleared(void)
atomic_set(&amdgpu_ras_in_intr, 0);
}
-void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 3eb3fb55ccb0..5a7bf0661dbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
/* These are memory addresses as would be seen by one or more EEPROM
* chips strung on the I2C bus, usually by manipulating pins 1-3 of a
@@ -556,6 +557,9 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ if (amdgpu_uniras_enabled(adev))
+ return amdgpu_ras_mgr_check_eeprom_safety_watermark(adev);
+
if (!__is_ras_eeprom_supported(adev) ||
!amdgpu_bad_page_threshold)
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 8f6ce948c684..43f769fed810 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -159,8 +159,16 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
*/
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- while (ib->length_dw & ring->funcs->align_mask)
- ib->ptr[ib->length_dw++] = ring->funcs->nop;
+ u32 align_mask = ring->funcs->align_mask;
+ u32 count = ib->length_dw & align_mask;
+
+ if (count) {
+ count = align_mask + 1 - count;
+
+ memset32(&ib->ptr[ib->length_dw], ring->funcs->nop, count);
+
+ ib->length_dw += count;
+ }
}
/**
@@ -811,7 +819,7 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
if (r)
return r;
- /* signal the fence of the bad job */
+ /* signal the guilty fence and set an error on all fences from the context */
if (guilty_fence)
amdgpu_fence_driver_guilty_force_completion(guilty_fence);
/* Re-emit the non-guilty commands */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index b6b649179776..87b962df5460 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -147,16 +147,14 @@ struct amdgpu_fence {
u64 wptr;
/* fence context for resets */
u64 context;
- uint32_t seq;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
-void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence);
-void amdgpu_fence_save_wptr(struct dma_fence *fence);
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -166,8 +164,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
- struct amdgpu_fence *af, unsigned int flags);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+ unsigned int flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index 41ebe690eeff..3739be1b71e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -159,7 +159,6 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
dev_err(adev->dev, "Invalid input: %s\n", str);
}
- pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index aa9ee5dffa45..9777c5c9cb26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -286,12 +286,13 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
* move and different for a BO to BO copy.
*
*/
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f)
+__attribute__((nonnull))
+static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ const struct amdgpu_copy_mem *src,
+ const struct amdgpu_copy_mem *dst,
+ uint64_t size, bool tmz,
+ struct dma_resv *resv,
+ struct dma_fence **f)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor src_mm, dst_mm;
@@ -365,9 +366,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
}
error:
mutex_unlock(&adev->mman.gtt_window_lock);
- if (f)
- *f = dma_fence_get(fence);
- dma_fence_put(fence);
+ *f = fence;
return r;
}
@@ -706,10 +705,11 @@ struct amdgpu_ttm_tt {
* memory and start HMM tracking CPU page table update
*
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
- * once afterwards to stop HMM tracking
+ * once afterwards to stop HMM tracking. Its the caller responsibility to ensure
+ * that range is a valid memory and it is freed too.
*/
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct hmm_range **range)
+ struct amdgpu_hmm_range *range)
{
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
@@ -719,9 +719,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
bool readonly;
int r = 0;
- /* Make sure get_user_pages_done() can cleanup gracefully */
- *range = NULL;
-
mm = bo->notifier.mm;
if (unlikely(!mm)) {
DRM_DEBUG_DRIVER("BO is not registered?\n");
@@ -756,38 +753,6 @@ out_unlock:
return r;
}
-/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
- */
-void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
-
- if (gtt && gtt->userptr && range)
- amdgpu_hmm_range_get_pages_done(range);
-}
-
-/*
- * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
- * Check if the pages backing this ttm range have been invalidated
- *
- * Returns: true if pages are still valid
- */
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
-
- if (!gtt || !gtt->userptr || !range)
- return false;
-
- DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
- gtt->userptr, ttm->num_pages);
-
- WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
-
- return !amdgpu_hmm_range_get_pages_done(range);
-}
#endif
/*
@@ -797,12 +762,12 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range)
{
unsigned long i;
for (i = 0; i < ttm->num_pages; ++i)
- ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_pfns[i]) : NULL;
+ ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL;
}
/*
@@ -1804,18 +1769,14 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
}
- if (!adev->gmc.is_app_apu) {
- ret = amdgpu_bo_create_kernel_at(
- adev, adev->gmc.real_vram_size - reserve_size,
- reserve_size, &adev->mman.fw_reserved_memory, NULL);
- if (ret) {
- dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
- amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory,
- NULL, NULL);
- return ret;
- }
- } else {
- DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
+ ret = amdgpu_bo_create_kernel_at(
+ adev, adev->gmc.real_vram_size - reserve_size, reserve_size,
+ &adev->mman.fw_reserved_memory, NULL);
+ if (ret) {
+ dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+ NULL);
+ return ret;
}
return 0;
@@ -1837,7 +1798,7 @@ static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
adev->gmc.mem_partitions[i].numa.node,
- false, false);
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
}
return 0;
}
@@ -1930,8 +1891,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
adev_to_drm(adev)->vma_offset_manager,
- adev->need_swiotlb,
- dma_addressing_limited(adev->dev));
+ (adev->need_swiotlb ?
+ TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) |
+ (dma_addressing_limited(adev->dev) ?
+ TTM_ALLOCATION_POOL_USE_DMA32 : 0) |
+ TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
if (r) {
dev_err(adev->dev,
"failed initializing buffer object driver(%d).\n", r);
@@ -1980,19 +1944,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
/*
- *The reserved vram for driver must be pinned to the specified
- *place on the VRAM, so reserve it early.
+ * The reserved VRAM for the driver must be pinned to a specific
+ * location in VRAM, so reserve it early.
*/
r = amdgpu_ttm_drv_reserve_vram_init(adev);
if (r)
return r;
/*
- * only NAVI10 and onwards ASIC support for IP discovery.
- * If IP discovery enabled, a block of memory should be
- * reserved for IP discovey.
+ * only NAVI10 and later ASICs support IP discovery.
+ * If IP discovery is enabled, a block of memory should be
+ * reserved for it.
*/
- if (adev->mman.discovery_bin) {
+ if (adev->discovery.reserve_tmr) {
r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 0be2728aa872..577ee04ce0bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -28,6 +28,7 @@
#include <drm/gpu_scheduler.h>
#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
+#include "amdgpu_hmm.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
@@ -82,9 +83,6 @@ struct amdgpu_mman {
uint64_t stolen_reserved_offset;
uint64_t stolen_reserved_size;
- /* discovery */
- uint8_t *discovery_bin;
- uint32_t discovery_tmr_size;
/* fw reserved memory */
struct amdgpu_bo *fw_reserved_memory;
struct amdgpu_bo *fw_reserved_memory_extend;
@@ -170,12 +168,6 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush, uint32_t copy_flags);
-int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
- const struct amdgpu_copy_mem *src,
- const struct amdgpu_copy_mem *dst,
- uint64_t size, bool tmz,
- struct dma_resv *resv,
- struct dma_fence **f);
int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct dma_resv *resv,
struct dma_fence **fence);
@@ -192,29 +184,16 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct hmm_range **range);
-void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range);
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range);
+ struct amdgpu_hmm_range *range);
#else
static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
- struct hmm_range **range)
+ struct amdgpu_hmm_range *range)
{
return -EPERM;
}
-static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
-}
-static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
- struct hmm_range *range)
-{
- return false;
-}
#endif
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct hmm_range *range);
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 1add21160d21..13cc5a686dfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -29,6 +29,8 @@
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_reset.h"
#include "amdgpu_userq_fence.h"
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
@@ -44,10 +46,29 @@ u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
return userq_ip_mask;
}
-int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
- u64 expected_size)
+static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
+ struct amdgpu_bo_va_mapping *va_map, u64 addr)
+{
+ struct amdgpu_userq_va_cursor *va_cursor;
+ struct userq_va_list;
+
+ va_cursor = kzalloc(sizeof(*va_cursor), GFP_KERNEL);
+ if (!va_cursor)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&va_cursor->list);
+ va_cursor->gpu_addr = addr;
+ atomic_set(&va_map->bo_va->userq_va_mapped, 1);
+ list_add(&va_cursor->list, &queue->userq_va_list);
+
+ return 0;
+}
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size)
{
struct amdgpu_bo_va_mapping *va_map;
+ struct amdgpu_vm *vm = queue->vm;
u64 user_addr;
u64 size;
int r = 0;
@@ -67,6 +88,7 @@ int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
/* Only validate the userq whether resident in the VM mapping range */
if (user_addr >= va_map->start &&
va_map->last - user_addr + 1 >= size) {
+ amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr);
amdgpu_bo_unreserve(vm->root.bo);
return 0;
}
@@ -77,6 +99,76 @@ out_err:
return r;
}
+static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ bool r;
+
+ if (amdgpu_bo_reserve(vm->root.bo, false))
+ return false;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+ if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+ r = true;
+ else
+ r = false;
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ return r;
+}
+
+static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ int r = 0;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr);
+ dev_dbg(queue->userq_mgr->adev->dev,
+ "validate the userq mapping:%p va:%llx r:%d\n",
+ queue, va_cursor->gpu_addr, r);
+ }
+
+ if (r != 0)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
+ struct amdgpu_userq_va_cursor *va_cursor)
+{
+ atomic_set(&mapping->bo_va->userq_va_mapped, 0);
+ list_del(&va_cursor->list);
+ kfree(va_cursor);
+}
+
+static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+ struct amdgpu_bo_va_mapping *mapping;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
+ if (!mapping) {
+ r = -EINVAL;
+ goto err;
+ }
+ dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
+ queue, va_cursor->gpu_addr);
+ amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
+ }
+err:
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ return r;
+}
+
static int
amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
@@ -159,19 +251,24 @@ amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
return r;
}
-static void
+static int
amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_usermode_queue *queue)
{
struct dma_fence *f = queue->last_fence;
- int ret;
+ int ret = 0;
if (f && !dma_fence_is_signaled(f)) {
- ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
- if (ret <= 0)
+ ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0) {
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
f->context, f->seqno);
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ return -ETIME;
+ }
}
+
+ return ret;
}
static void
@@ -182,16 +279,27 @@ amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_device *adev = uq_mgr->adev;
const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+
+ /* Drop the userq reference. */
+ amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
amdgpu_userq_fence_driver_free(queue);
- idr_remove(&uq_mgr->userq_idr, queue_id);
+ /* Use interrupt-safe locking since IRQ handlers may access these XArrays */
+ xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
+ xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
+ queue->userq_mgr = NULL;
+ list_del(&queue->userq_va_list);
kfree(queue);
+
+ up_read(&adev->reset_domain->sem);
}
static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
{
- return idr_find(&uq_mgr->userq_idr, qid);
+ return xa_load(&uq_mgr->userq_mgr_xa, qid);
}
void
@@ -319,17 +427,6 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
case AMDGPU_HW_IP_DMA:
db_size = sizeof(u64);
break;
-
- case AMDGPU_HW_IP_VCN_ENC:
- db_size = sizeof(u32);
- db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
- break;
-
- case AMDGPU_HW_IP_VPE:
- db_size = sizeof(u32);
- db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
- break;
-
default:
drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
db_info->queue_type);
@@ -391,7 +488,6 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
mutex_unlock(&uq_mgr->userq_mutex);
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
@@ -463,8 +559,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
struct amdgpu_db_info db_info;
char *queue_name;
bool skip_map_queue;
+ u32 qid;
uint64_t index;
- int qid, r = 0;
+ int r = 0;
int priority =
(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
@@ -487,7 +584,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
*
* This will also make sure we have a valid eviction fence ready to be used.
*/
- mutex_lock(&adev->userq_mutex);
amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
uq_funcs = adev->userq_funcs[args->in.ip_type];
@@ -505,14 +601,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
- /* Validate the userq virtual address.*/
- if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
- amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
- amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
- r = -EINVAL;
- kfree(queue);
- goto unlock;
- }
+ INIT_LIST_HEAD(&queue->userq_va_list);
queue->doorbell_handle = args->in.doorbell_handle;
queue->queue_type = args->in.ip_type;
queue->vm = &fpriv->vm;
@@ -523,6 +612,15 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
db_info.db_obj = &queue->db_obj;
db_info.doorbell_offset = args->in.doorbell_offset;
+ /* Validate the userq virtual address.*/
+ if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) ||
+ amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+ amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+ r = -EINVAL;
+ kfree(queue);
+ goto unlock;
+ }
+
/* Convert relative doorbell offset into absolute doorbell index */
index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
if (index == (uint64_t)-EINVAL) {
@@ -548,16 +646,27 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
goto unlock;
}
+ /* Wait for mode-1 reset to complete */
+ down_read(&adev->reset_domain->sem);
+ r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+ if (r) {
+ kfree(queue);
+ up_read(&adev->reset_domain->sem);
+ goto unlock;
+ }
- qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
- if (qid < 0) {
+ r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+ if (r) {
drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
amdgpu_userq_fence_driver_free(queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
kfree(queue);
r = -ENOMEM;
+ up_read(&adev->reset_domain->sem);
goto unlock;
}
+ up_read(&adev->reset_domain->sem);
+ queue->userq_mgr = uq_mgr;
/* don't map the queue if scheduling is halted */
if (adev->userq_halt_for_enforce_isolation &&
@@ -570,7 +679,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
r = amdgpu_userq_map_helper(uq_mgr, queue);
if (r) {
drm_file_err(uq_mgr->file, "Failed to map Queue\n");
- idr_remove(&uq_mgr->userq_idr, qid);
+ xa_erase(&uq_mgr->userq_mgr_xa, qid);
amdgpu_userq_fence_driver_free(queue);
uq_funcs->mqd_destroy(uq_mgr, queue);
kfree(queue);
@@ -595,7 +704,6 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
unlock:
mutex_unlock(&uq_mgr->userq_mutex);
- mutex_unlock(&adev->userq_mutex);
return r;
}
@@ -693,11 +801,19 @@ static int
amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
- int queue_id;
+ unsigned long queue_id;
int ret = 0, r;
/* Resume all the queues for this process */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+
+ if (!amdgpu_userq_buffer_vas_mapped(queue)) {
+ drm_file_err(uq_mgr->file,
+ "trying restore queue without va mapping\n");
+ queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+ continue;
+ }
+
r = amdgpu_userq_restore_helper(uq_mgr, queue);
if (r)
ret = r;
@@ -760,12 +876,21 @@ static int
amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ bool invalidated = false, new_addition = false;
+ struct ttm_operation_ctx ctx = { true, false };
struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_hmm_range *range;
struct amdgpu_vm *vm = &fpriv->vm;
+ unsigned long key, tmp_key;
struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
struct drm_exec exec;
+ struct xarray xa;
int ret;
+ xa_init(&xa);
+
+retry_lock:
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
ret = amdgpu_vm_lock_pd(vm, &exec, 1);
@@ -792,10 +917,72 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
goto unlock_all;
}
+ if (invalidated) {
+ xa_for_each(&xa, tmp_key, range) {
+ bo = range->bo;
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+ }
+ invalidated = false;
+ }
+
ret = amdgpu_vm_handle_moved(adev, vm, NULL);
if (ret)
goto unlock_all;
+ key = 0;
+ /* Validate User Ptr BOs */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+
+ if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+ continue;
+
+ range = xa_load(&xa, key);
+ if (range && range->bo != bo) {
+ xa_erase(&xa, key);
+ amdgpu_hmm_range_free(range);
+ range = NULL;
+ }
+
+ if (!range) {
+ range = amdgpu_hmm_range_alloc(bo);
+ if (!range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
+
+ xa_store(&xa, key, range, GFP_KERNEL);
+ new_addition = true;
+ }
+ key++;
+ }
+
+ if (new_addition) {
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (ret)
+ goto unlock_all;
+ }
+
+ invalidated = true;
+ new_addition = false;
+ goto retry_lock;
+ }
+
ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
goto unlock_all;
@@ -815,6 +1002,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
unlock_all:
drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->bo;
+ amdgpu_hmm_range_free(range);
+ }
+ xa_destroy(&xa);
return ret;
}
@@ -848,11 +1042,11 @@ static int
amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
- int queue_id;
+ unsigned long queue_id;
int ret = 0, r;
/* Try to unmap all the queues in this process ctx */
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
r = amdgpu_userq_preempt_helper(uq_mgr, queue);
if (r)
ret = r;
@@ -867,9 +1061,10 @@ static int
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
{
struct amdgpu_usermode_queue *queue;
- int queue_id, ret;
+ unsigned long queue_id;
+ int ret;
- idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
struct dma_fence *f = queue->last_fence;
if (!f || dma_fence_is_signaled(f))
@@ -922,44 +1117,30 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
struct amdgpu_device *adev)
{
mutex_init(&userq_mgr->userq_mutex);
- idr_init_base(&userq_mgr->userq_idr, 1);
+ xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
userq_mgr->adev = adev;
userq_mgr->file = file_priv;
- mutex_lock(&adev->userq_mutex);
- list_add(&userq_mgr->list, &adev->userq_mgr_list);
- mutex_unlock(&adev->userq_mutex);
-
INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
return 0;
}
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
{
- struct amdgpu_device *adev = userq_mgr->adev;
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- uint32_t queue_id;
+ unsigned long queue_id;
cancel_delayed_work_sync(&userq_mgr->resume_work);
- mutex_lock(&adev->userq_mutex);
mutex_lock(&userq_mgr->userq_mutex);
- idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
+ xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
amdgpu_userq_unmap_helper(userq_mgr, queue);
amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
}
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- if (uqm == userq_mgr) {
- list_del(&uqm->list);
- break;
- }
- }
- idr_destroy(&userq_mgr->userq_idr);
+ xa_destroy(&userq_mgr->userq_mgr_xa);
mutex_unlock(&userq_mgr->userq_mutex);
- mutex_unlock(&adev->userq_mutex);
mutex_destroy(&userq_mgr->userq_mutex);
}
@@ -967,57 +1148,50 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
- int ret = 0, r;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
if (!ip_mask)
return 0;
- mutex_lock(&adev->userq_mutex);
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
- mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (adev->in_s0ix)
- r = amdgpu_userq_preempt_helper(uqm, queue);
- else
- r = amdgpu_userq_unmap_helper(uqm, queue);
- if (r)
- ret = r;
- }
- mutex_unlock(&uqm->userq_mutex);
+ guard(mutex)(&uqm->userq_mutex);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ else
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ return r;
}
- mutex_unlock(&adev->userq_mutex);
- return ret;
+ return 0;
}
int amdgpu_userq_resume(struct amdgpu_device *adev)
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
- int ret = 0, r;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
+ int r;
if (!ip_mask)
return 0;
- mutex_lock(&adev->userq_mutex);
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (adev->in_s0ix)
- r = amdgpu_userq_restore_helper(uqm, queue);
- else
- r = amdgpu_userq_map_helper(uqm, queue);
- if (r)
- ret = r;
- }
- mutex_unlock(&uqm->userq_mutex);
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
+ guard(mutex)(&uqm->userq_mutex);
+ if (adev->in_s0ix)
+ r = amdgpu_userq_restore_helper(uqm, queue);
+ else
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ return r;
}
- mutex_unlock(&adev->userq_mutex);
- return ret;
+
+ return 0;
}
int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
@@ -1025,33 +1199,31 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
int ret = 0, r;
/* only need to stop gfx/compute */
if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
return 0;
- mutex_lock(&adev->userq_mutex);
if (adev->userq_halt_for_enforce_isolation)
dev_warn(adev->dev, "userq scheduling already stopped!\n");
adev->userq_halt_for_enforce_isolation = true;
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
cancel_delayed_work_sync(&uqm->resume_work);
mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
- (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
- (queue->xcp_id == idx)) {
- r = amdgpu_userq_preempt_helper(uqm, queue);
- if (r)
- ret = r;
- }
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_preempt_helper(uqm, queue);
+ if (r)
+ ret = r;
}
mutex_unlock(&uqm->userq_mutex);
}
- mutex_unlock(&adev->userq_mutex);
+
return ret;
}
@@ -1060,21 +1232,20 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
{
u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
- int queue_id;
+ struct amdgpu_userq_mgr *uqm;
+ unsigned long queue_id;
int ret = 0, r;
/* only need to stop gfx/compute */
if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
return 0;
- mutex_lock(&adev->userq_mutex);
if (!adev->userq_halt_for_enforce_isolation)
dev_warn(adev->dev, "userq scheduling already started!\n");
adev->userq_halt_for_enforce_isolation = false;
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ uqm = queue->userq_mgr;
mutex_lock(&uqm->userq_mutex);
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
(queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
(queue->xcp_id == idx)) {
@@ -1082,9 +1253,39 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
if (r)
ret = r;
}
- }
mutex_unlock(&uqm->userq_mutex);
}
- mutex_unlock(&adev->userq_mutex);
+
return ret;
}
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_bo_va *bo_va = mapping->bo_va;
+ struct dma_resv *resv = bo_va->base.bo->tbo.base.resv;
+ int ret = 0;
+
+ if (!ip_mask)
+ return 0;
+
+ dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr);
+ /**
+ * The userq VA mapping reservation should include the eviction fence,
+ * if the eviction fence can't signal successfully during unmapping,
+ * then driver will warn to flag this improper unmap of the userq VA.
+ * Note: The eviction fence may be attached to different BOs, and this
+ * unmap is only for one kind of userq VAs, so at this point suppose
+ * the eviction fence is always unsignaled.
+ */
+ if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) {
+ ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret <= 0)
+ return -EBUSY;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index c027dd916672..09da0617bfa2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -37,6 +37,7 @@ enum amdgpu_userq_state {
AMDGPU_USERQ_STATE_MAPPED,
AMDGPU_USERQ_STATE_PREEMPTED,
AMDGPU_USERQ_STATE_HUNG,
+ AMDGPU_USERQ_STATE_INVALID_VA,
};
struct amdgpu_mqd_prop;
@@ -47,6 +48,11 @@ struct amdgpu_userq_obj {
struct amdgpu_bo *obj;
};
+struct amdgpu_userq_va_cursor {
+ u64 gpu_addr;
+ struct list_head list;
+};
+
struct amdgpu_usermode_queue {
int queue_type;
enum amdgpu_userq_state state;
@@ -66,6 +72,8 @@ struct amdgpu_usermode_queue {
u32 xcp_id;
int priority;
struct dentry *debugfs_queue;
+
+ struct list_head userq_va_list;
};
struct amdgpu_userq_funcs {
@@ -88,11 +96,15 @@ struct amdgpu_userq_funcs {
/* Usermode queues for gfx */
struct amdgpu_userq_mgr {
- struct idr userq_idr;
+ /**
+ * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
+ * Key: queue_id (unique ID within the process's userq manager)
+ * Value: struct amdgpu_usermode_queue
+ */
+ struct xarray userq_mgr_xa;
struct mutex userq_mutex;
struct amdgpu_device *adev;
struct delayed_work resume_work;
- struct list_head list;
struct drm_file *file;
};
@@ -136,7 +148,9 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
-
-int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
- u64 expected_size);
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+ u64 addr, u64 expected_size);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t saddr);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 761bad98da3e..2aeeaa954882 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -537,7 +537,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
}
/* Retrieve the user queue */
- queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+ queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
if (!queue) {
r = -ENOENT;
goto put_gobj_write;
@@ -899,7 +899,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
*/
num_fences = dma_fence_dedup_array(fences, num_fences);
- waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
+ waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
if (!waitq) {
r = -EINVAL;
goto free_fences;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index dc8a17bcc3c8..82624b44e661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -100,7 +100,8 @@
#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
- uint32_t internal_reg_offset, addr; \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
@@ -161,7 +162,8 @@
#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
- uint32_t internal_reg_offset, addr; \
+ /* To avoid a -Wunused-but-set-variable warning. */ \
+ uint32_t internal_reg_offset __maybe_unused, addr; \
bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3328ab63376b..f2ce8f506aa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -44,6 +44,18 @@
vf2pf_info->ucode_info[ucode].version = ver; \
} while (0)
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+
+const char *amdgpu_virt_dynamic_crit_table_name[] = {
+ "IP DISCOVERY",
+ "VBIOS IMG",
+ "RAS TELEMETRY",
+ "DATA EXCHANGE",
+ "BAD PAGE INFO",
+ "INIT HEADER",
+ "LAST",
+};
+
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
/* By now all MMIO pages except mailbox are blocked */
@@ -150,9 +162,10 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
virt->ops->req_init_data(adev);
if (adev->virt.req_init_data_ver > 0)
- DRM_INFO("host supports REQ_INIT_DATA handshake\n");
+ dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+ adev->virt.req_init_data_ver);
else
- DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
+ dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
}
/**
@@ -205,12 +218,12 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
&adev->virt.mm_table.gpu_addr,
(void *)&adev->virt.mm_table.cpu_addr);
if (r) {
- DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
+ dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
return r;
}
memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
- DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+ dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
adev->virt.mm_table.gpu_addr,
adev->virt.mm_table.cpu_addr);
return 0;
@@ -390,7 +403,9 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GPU_PAGE_SIZE,
&bo, NULL))
- DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
+ dev_dbg(adev->dev,
+ "RAS WARN: reserve vram for retired page %llx fail\n",
+ bp);
data->bps_bo[i] = bo;
}
data->last_reserved = i + 1;
@@ -598,8 +613,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->driver_cert = 0;
vf2pf_info->os_info.all = 0;
- vf2pf_info->fb_usage =
- ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
+ vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+ ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
vf2pf_info->fb_vis_usage =
amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
@@ -658,10 +673,34 @@ out:
schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
}
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+ uint32_t dataexchange_offset =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+ uint32_t dataexchange_size =
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+ uint64_t pos = 0;
+
+ dev_info(adev->dev,
+ "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ dataexchange_offset, dataexchange_size);
+
+ if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+ dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ pos = (uint64_t)dataexchange_offset;
+ amdgpu_device_vram_access(adev, pos, pfvf_data,
+ dataexchange_size, false);
+
+ return 0;
+}
+
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
{
if (adev->virt.vf2pf_update_interval_ms != 0) {
- DRM_INFO("clean up the vf2pf work item\n");
+ dev_info(adev->dev, "clean up the vf2pf work item\n");
cancel_delayed_work_sync(&adev->virt.vf2pf_work);
adev->virt.vf2pf_update_interval_ms = 0;
}
@@ -669,13 +708,15 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
{
+ uint32_t *pfvf_data = NULL;
+
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
adev->virt.vf2pf_update_retry_cnt = 0;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
- DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+ dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
/* go through this logic in ip_init and reset to init workqueue*/
amdgpu_virt_exchange_data(adev);
@@ -684,11 +725,34 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
} else if (adev->bios != NULL) {
/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ pfvf_data =
+ kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+ GFP_KERNEL);
+ if (!pfvf_data) {
+ dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+ return;
+ }
- amdgpu_virt_read_pf2vf_data(adev);
+ if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+ goto free_pfvf_data;
+
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+ amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+ kfree(pfvf_data);
+ pfvf_data = NULL;
+ adev->virt.fw_reserve.p_pf2vf = NULL;
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+ amdgpu_virt_read_pf2vf_data(adev);
+ }
}
}
@@ -701,23 +765,38 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
if (adev->mman.fw_vram_usage_va) {
- adev->virt.fw_reserve.p_pf2vf =
- (struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
- adev->virt.fw_reserve.p_vf2pf =
- (struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
- adev->virt.fw_reserve.ras_telemetry =
- (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+ if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+ (AMD_SRIOV_MSG_SIZE_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va +
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+ } else {
+ adev->virt.fw_reserve.p_pf2vf =
+ (struct amd_sriov_msg_pf2vf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.p_vf2pf =
+ (struct amd_sriov_msg_vf2pf_info_header *)
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+ }
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
adev->virt.fw_reserve.ras_telemetry =
- (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
}
amdgpu_virt_read_pf2vf_data(adev);
@@ -816,7 +895,7 @@ static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
break;
default: /* other chip doesn't support SRIOV */
is_sriov = false;
- DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
+ dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
@@ -842,6 +921,215 @@ static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
adev->virt.ras.cper_rptr = 0;
}
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+ uint32_t sum = 0;
+
+ if (buf_start >= buf_end)
+ return 0;
+
+ for (; buf_start < buf_end; buf_start++)
+ sum += buf_start[0];
+
+ return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+ struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+ u64 init_hdr_offset = adev->virt.init_data_header.offset;
+ u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
+ u64 vram_size;
+ u64 end;
+ int r = 0;
+ uint8_t checksum = 0;
+
+ /* Skip below init if critical region version != v2 */
+ if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+ return 0;
+
+ if (init_hdr_offset < 0) {
+ dev_err(adev->dev, "Invalid init header offset\n");
+ return -EINVAL;
+ }
+
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ return -EINVAL;
+ vram_size <<= 20;
+
+ if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
+ dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+ return -EINVAL;
+ }
+
+ /* Allocate for init_data_hdr */
+ init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+ if (!init_data_hdr)
+ return -ENOMEM;
+
+ amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+ sizeof(struct amd_sriov_msg_init_data_header), false);
+
+ /* Table validation */
+ if (strncmp(init_data_hdr->signature,
+ AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+ AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+ dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+ init_data_hdr->signature);
+ r = -EINVAL;
+ goto out;
+ }
+
+ checksum = amdgpu_virt_crit_region_calc_checksum(
+ (uint8_t *)&init_data_hdr->initdata_offset,
+ (uint8_t *)init_data_hdr +
+ sizeof(struct amd_sriov_msg_init_data_header));
+ if (checksum != init_data_hdr->checksum) {
+ dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+ checksum, init_data_hdr->checksum);
+ r = -EINVAL;
+ goto out;
+ }
+
+ memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
+ memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
+
+ adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+ adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+
+ /* Validation and initialization for each table entry */
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
+ if (!init_data_hdr->ip_discovery_size_in_kb ||
+ init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
+ init_data_hdr->ip_discovery_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+ init_data_hdr->ip_discovery_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+ init_data_hdr->ip_discovery_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
+ if (!init_data_hdr->vbios_img_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
+ init_data_hdr->vbios_img_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+ init_data_hdr->vbios_img_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+ init_data_hdr->vbios_img_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
+ if (!init_data_hdr->ras_tele_info_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
+ init_data_hdr->ras_tele_info_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+ init_data_hdr->ras_tele_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+ init_data_hdr->ras_tele_info_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
+ if (!init_data_hdr->dataexchange_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
+ init_data_hdr->dataexchange_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+ init_data_hdr->dataexchange_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+ init_data_hdr->dataexchange_size_in_kb;
+ }
+
+ if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
+ if (!init_data_hdr->bad_page_size_in_kb) {
+ dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+ amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
+ init_data_hdr->bad_page_size_in_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+ init_data_hdr->bad_page_info_offset;
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+ init_data_hdr->bad_page_size_in_kb;
+ }
+
+ /* Validation for critical region info */
+ if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+ dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+ adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* reserved memory starts from crit region base offset with the size of 5MB */
+ adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+ adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+ dev_info(adev->dev,
+ "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
+ init_data_hdr->version,
+ adev->mman.fw_vram_usage_start_offset,
+ adev->mman.fw_vram_usage_size >> 10);
+
+ adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+ kfree(init_data_hdr);
+ init_data_hdr = NULL;
+
+ return r;
+}
+
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size)
+{
+ uint32_t data_offset = 0;
+ uint32_t data_size = 0;
+ enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+
+ if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+ return -EINVAL;
+
+ data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+ data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+ /* Validate on input params */
+ if (!binary || !size || *size < (uint64_t)data_size)
+ return -EINVAL;
+
+ /* Proceed to copy the dynamic content */
+ amdgpu_device_vram_access(adev,
+ (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+ *size = (uint64_t)data_size;
+
+ dev_dbg(adev->dev,
+ "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+ amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
+
+ return 0;
+}
+
void amdgpu_virt_init(struct amdgpu_device *adev)
{
bool is_sriov = false;
@@ -1289,7 +1577,7 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc
case AMDGPU_RAS_BLOCK__MPIO:
return RAS_TELEMETRY_GPU_BLOCK_MPIO;
default:
- DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+ dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
block);
return RAS_TELEMETRY_GPU_BLOCK_COUNT;
}
@@ -1304,7 +1592,7 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return 0;
tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
@@ -1383,7 +1671,7 @@ amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return -EINVAL;
cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
@@ -1515,7 +1803,7 @@ static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
checksum = host_telemetry->header.checksum;
used_size = host_telemetry->header.used_size;
- if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
return 0;
tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d1172c8e58c4..14d864be5800 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -54,6 +54,12 @@
#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
+/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
+#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
+#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN 4
+
+#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id)))
+
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
@@ -262,6 +268,11 @@ struct amdgpu_virt_ras {
DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+struct amdgpu_virt_region {
+ uint32_t offset;
+ uint32_t size_kb;
+};
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -289,6 +300,12 @@ struct amdgpu_virt {
bool ras_init_done;
uint32_t reg_access;
+ /* dynamic(v2) critical regions */
+ struct amdgpu_virt_region init_data_header;
+ struct amdgpu_virt_region crit_regn;
+ struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+ bool is_dynamic_crit_regn_enabled;
+
/* vf2pf message */
struct delayed_work vf2pf_work;
uint32_t vf2pf_update_interval_ms;
@@ -424,6 +441,10 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
void amdgpu_virt_init(struct amdgpu_device *adev);
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+ int data_id, uint8_t *binary, u32 *size);
+
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c1a801203949..9309830821b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -779,7 +779,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
struct dma_fence *fence = NULL;
- struct amdgpu_fence *af;
unsigned int patch;
int r;
@@ -842,12 +841,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
}
if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
- r = amdgpu_fence_emit(ring, &fence, NULL, 0);
+ r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
if (r)
return r;
- /* this is part of the job's context */
- af = container_of(fence, struct amdgpu_fence, base);
- af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+ fence = &job->hw_vm_fence->base;
}
if (vm_flush_needed) {
@@ -1952,6 +1949,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_vm *vm = bo_va->base.vm;
bool valid = true;
+ int r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -1972,6 +1970,17 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
return -ENOENT;
}
+ /* It's unlikely to happen that the mapping userq hasn't been idled
+ * during user requests GEM unmap IOCTL except for forcing the unmap
+ * from user space.
+ */
+ if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) {
+ r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
+ if (unlikely(r == -EBUSY))
+ dev_warn_once(adev->dev,
+ "Attempt to unmap an active userq buffer\n");
+ }
+
list_del(&mapping->list);
amdgpu_vm_it_remove(mapping, &vm->va);
mapping->bo_va = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 474bfe36c0c2..aa78c2ee9e21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -322,6 +322,26 @@ static int vpe_early_init(struct amdgpu_ip_block *ip_block)
return 0;
}
+static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev)
+{
+ switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+ case IP_VERSION(6, 1, 1):
+ return adev->pm.fw_version < 0x0a640500;
+ default:
+ return false;
+ }
+}
+
+static int vpe_get_dpm_level(struct amdgpu_device *adev)
+{
+ struct amdgpu_vpe *vpe = &adev->vpe;
+
+ if (!adev->pm.dpm_enabled)
+ return 0;
+
+ return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv));
+}
+
static void vpe_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
@@ -329,11 +349,17 @@ static void vpe_idle_work_handler(struct work_struct *work)
unsigned int fences = 0;
fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+ if (fences)
+ goto reschedule;
- if (fences == 0)
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
- else
- schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+ if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0)
+ goto reschedule;
+
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+ return;
+
+reschedule:
+ schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
}
static int vpe_common_init(struct amdgpu_vpe *vpe)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index a5adb2ed9b3c..9d934c07fa6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -234,6 +234,9 @@ static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
!adev->gmc.vram_vendor)
return 0;
+ if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager))
+ return 0;
+
return attr->mode;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 3a79ed7d8031..1cee083fb6bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -23,26 +23,84 @@
#ifndef AMDGV_SRIOV_MSG__H_
#define AMDGV_SRIOV_MSG__H_
-/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
-#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
-#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2
-#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64
+#define AMD_SRIOV_MSG_SIZE_KB 1
+
/*
- * layout
+ * layout v1
* 0 64KB 65KB 66KB 68KB 132KB
* | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
* | 64KB | 1KB | 1KB | 2KB | 64KB | ...
*/
-#define AMD_SRIOV_MSG_SIZE_KB 1
-#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
-#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
-#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0 1KB 64KB 65KB 66KB 68KB 132KB
+ * | INITD_H | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 1KB | 64KB | 1KB | 1KB | 2KB | 64KB | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1 2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1 0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1 AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 \
+ (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1 \
+ (AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+ AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
+
+enum amd_sriov_crit_region_version {
+ GPU_CRIT_REGION_V1 = 1,
+ GPU_CRIT_REGION_V2 = 2,
+};
+
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+ AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+ AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+ AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+ AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+ AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+ AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+ AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+ char signature[4]; /* "INDA" */
+ uint32_t version;
+ uint32_t checksum;
+ uint32_t initdata_offset; /* 0 */
+ uint32_t initdata_size_in_kb; /* 5MB */
+ uint32_t valid_tables;
+ uint32_t vbios_img_offset;
+ uint32_t vbios_img_size_in_kb;
+ uint32_t dataexchange_offset;
+ uint32_t dataexchange_size_in_kb;
+ uint32_t ras_tele_info_offset;
+ uint32_t ras_tele_info_size_in_kb;
+ uint32_t ip_discovery_offset;
+ uint32_t ip_discovery_size_in_kb;
+ uint32_t bad_page_info_offset;
+ uint32_t bad_page_size_in_kb;
+ uint32_t reserved[8];
+};
/*
* PF2VF history log:
diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
index 96616a865aac..ed1e25661706 100644
--- a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: MIT
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8841d7213de4..751732f3e883 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9951,6 +9951,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 66c47c466532..252517ce5d5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -2438,7 +2438,7 @@ static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
if (version_minor == 3)
gfx_v11_0_load_rlcp_rlcv_microcode(adev);
}
-
+
return 0;
}
@@ -3886,7 +3886,7 @@ static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
memcpy(fw, fw_data, fw_size);
-
+
amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
@@ -5862,8 +5862,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
- BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
-
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw | (vmid << 24);
@@ -7320,6 +7318,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
};
static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 710ec9c34e43..35d5a7e99a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4419,8 +4419,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
- BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
-
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
control |= ib->length_dw | (vmid << 24);
@@ -5597,6 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
.emit_wreg = gfx_v12_0_ring_emit_wreg,
.emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
};
static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 0856ff65288c..d3d0a4b0380c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6939,6 +6939,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v8_0_ring_emit_rreg,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dd19a97436db..f1a2efc2a8d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -7586,6 +7586,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 77f9d5b9a556..e0b50c690f8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -2152,7 +2152,8 @@ static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id)
return 0;
}
-static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, bool restore)
+static void gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id,
+ bool restore)
{
struct amdgpu_device *adev = ring->adev;
struct v9_mqd *mqd = ring->mqd_ptr;
@@ -2186,8 +2187,6 @@ static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id, b
atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
amdgpu_ring_clear_ring(ring);
}
-
- return 0;
}
static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id)
@@ -2220,7 +2219,7 @@ static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_ring *ring;
- int i, r;
+ int i;
gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
@@ -2228,9 +2227,7 @@ static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
ring = &adev->gfx.compute_ring[i + xcc_id *
adev->gfx.num_compute_rings];
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
- if (r)
- return r;
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
}
return amdgpu_gfx_enable_kcq(adev, xcc_id);
@@ -3605,11 +3602,8 @@ pipe_reset:
return r;
}
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
- if (r) {
- dev_err(adev->dev, "fail to init kcq\n");
- return r;
- }
+ gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
+
spin_lock_irqsave(&kiq->ring_lock, flags);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
if (r) {
@@ -4798,6 +4792,7 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = {
.emit_wreg = gfx_v9_4_3_ring_emit_wreg,
.emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait,
+ .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush,
};
static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index f4a19357ccbc..cad2d19105c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -312,9 +312,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
return;
}
- mutex_lock(&adev->mman.gtt_window_lock);
gmc_v12_0_flush_vm_hub(adev, vmid, vmhub, 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 93d7ccb7d013..0e5e54d0a9a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1068,7 +1068,7 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
@@ -1290,7 +1290,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1306,8 +1306,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index c5e2a2c41e06..e1509480dfc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1183,7 +1183,7 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
GFP_KERNEL);
if (!adev->gmc.vm_fault_info)
return -ENOMEM;
- atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
return 0;
}
@@ -1478,7 +1478,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
VMID);
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
- && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
u32 protections = REG_GET_FIELD(status,
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1494,8 +1494,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
info->prot_read = protections & 0x8 ? true : false;
info->prot_write = protections & 0x10 ? true : false;
info->prot_exec = protections & 0x20 ? true : false;
- mb();
- atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 0d1dd587db5f..e716097dfde4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1843,6 +1843,10 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) &&
+ adev->rev_id == 0x3)
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+
if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) {
vram_info = RREG32(regBIF_BIOS_SCRATCH_4);
adev->gmc.vram_vendor = vram_info & 0xF;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 2db9b2c63693..b1ee9473d628 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -205,13 +205,13 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
int db_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev);
struct mes_detect_and_reset_queue_input input;
struct amdgpu_usermode_queue *queue;
- struct amdgpu_userq_mgr *uqm, *tmp;
unsigned int hung_db_num = 0;
- int queue_id, r, i;
- u32 db_array[4];
+ unsigned long queue_id;
+ u32 db_array[8];
+ int r, i;
- if (db_array_size > 4) {
- dev_err(adev->dev, "DB array size (%d vs 4) too small\n",
+ if (db_array_size > 8) {
+ dev_err(adev->dev, "DB array size (%d vs 8) too small\n",
db_array_size);
return -EINVAL;
}
@@ -227,16 +227,14 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev,
if (r) {
dev_err(adev->dev, "Failed to detect and reset queues, err (%d)\n", r);
} else if (hung_db_num) {
- list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
- idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
- if (queue->queue_type == queue_type) {
- for (i = 0; i < hung_db_num; i++) {
- if (queue->doorbell_index == db_array[i]) {
- queue->state = AMDGPU_USERQ_STATE_HUNG;
- atomic_inc(&adev->gpu_reset_counter);
- amdgpu_userq_fence_driver_force_completion(queue);
- drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
- }
+ xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+ if (queue->queue_type == queue_type) {
+ for (i = 0; i < hung_db_num; i++) {
+ if (queue->doorbell_index == db_array[i]) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ atomic_inc(&adev->gpu_reset_counter);
+ amdgpu_userq_fence_driver_force_completion(queue);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE, NULL);
}
}
}
@@ -254,7 +252,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
struct drm_amdgpu_userq_in *mqd_user = args_in;
struct amdgpu_mqd_prop *userq_props;
- struct amdgpu_gfx_shadow_info shadow_info;
int r;
/* Structure to initialize MQD for userqueue using generic MQD init function */
@@ -280,8 +277,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->doorbell_index = queue->doorbell_index;
userq_props->fence_address = queue->fence_drv->gpu_addr;
- if (adev->gfx.funcs->get_gfx_shadow_info)
- adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
@@ -298,8 +293,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
goto free_mqd;
}
- if (amdgpu_userq_input_va_validate(queue->vm, compute_mqd->eop_va,
- max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE)))
+ r = amdgpu_userq_input_va_validate(queue, compute_mqd->eop_va,
+ 2048);
+ if (r)
goto free_mqd;
userq_props->eop_gpu_addr = compute_mqd->eop_va;
@@ -311,6 +307,14 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
kfree(compute_mqd);
} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+ struct amdgpu_gfx_shadow_info shadow_info;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow_info, true);
+ } else {
+ r = -EINVAL;
+ goto free_mqd;
+ }
if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
DRM_ERROR("Invalid GFX MQD\n");
@@ -330,8 +334,13 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
- if (amdgpu_userq_input_va_validate(queue->vm, mqd_gfx_v11->shadow_va,
- shadow_info.shadow_size))
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->shadow_va,
+ shadow_info.shadow_size);
+ if (r)
+ goto free_mqd;
+ r = amdgpu_userq_input_va_validate(queue, mqd_gfx_v11->csa_va,
+ shadow_info.csa_size);
+ if (r)
goto free_mqd;
kfree(mqd_gfx_v11);
@@ -350,9 +359,9 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
r = -ENOMEM;
goto free_mqd;
}
-
- if (amdgpu_userq_input_va_validate(queue->vm, mqd_sdma_v11->csa_va,
- shadow_info.csa_size))
+ r = amdgpu_userq_input_va_validate(queue, mqd_sdma_v11->csa_va,
+ 32);
+ if (r)
goto free_mqd;
userq_props->csa_addr = mqd_sdma_v11->csa_va;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index e82188431f79..3a52754b5cad 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -66,7 +66,8 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
#define GFX_MES_DRAM_SIZE 0x80000
#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
-#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4
+#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */
+#define MES11_HUNG_HQD_INFO_OFFSET 4
static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
{
@@ -368,6 +369,7 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
@@ -378,6 +380,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+ if (mes_rev >= 0x60)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
@@ -1720,8 +1725,9 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
- adev->mes.hung_queue_db_array_size =
- MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET;
+
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE)
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index aff06f06aeee..744e95d3984a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -47,7 +47,8 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
#define MES_EOP_SIZE 2048
-#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4
+#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */
+#define MES12_HUNG_HQD_INFO_OFFSET 4
static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring)
{
@@ -228,7 +229,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
pipe, x_pkt->header.opcode);
r = amdgpu_fence_wait_polling(ring, seq, timeout);
- if (r < 1 || !*status_ptr) {
+
+ /*
+ * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success).
+ * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information.
+ */
+ if (r < 1 || !(lower_32_bits(*status_ptr))) {
if (misc_op_str)
dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n",
@@ -355,6 +361,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input)
{
union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+ uint32_t mes_rev = mes->sched_version & AMDGPU_MES_VERSION_MASK;
memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
@@ -365,6 +372,9 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
+ if (mes_rev >= 0x5a)
+ mes_remove_queue_pkt.remove_queue_after_reset = input->remove_queue_after_reset;
+
return mes_v12_0_submit_pkt_and_poll_completion(mes,
AMDGPU_MES_SCHED_PIPE,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
@@ -1899,8 +1909,9 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
- adev->mes.hung_queue_db_array_size =
- MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE;
+ adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET;
+
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
r = amdgpu_mes_init_microcode(adev, pipe);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e5282a5d05d9..cd5b2f07edb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -222,12 +222,20 @@ send_request:
adev->virt.req_init_data_ver = 0;
} else {
if (req == IDH_REQ_GPU_INIT_DATA) {
- adev->virt.req_init_data_ver =
- RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1);
-
- /* assume V1 in case host doesn't set version number */
- if (adev->virt.req_init_data_ver < 1)
- adev->virt.req_init_data_ver = 1;
+ switch (RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1)) {
+ case GPU_CRIT_REGION_V2:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V2;
+ adev->virt.init_data_header.offset =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW2);
+ adev->virt.init_data_header.size_kb =
+ RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW3);
+ break;
+ default:
+ adev->virt.req_init_data_ver = GPU_CRIT_REGION_V1;
+ adev->virt.init_data_header.offset = -1;
+ adev->virt.init_data_header.size_kb = 0;
+ break;
+ }
}
}
@@ -285,7 +293,8 @@ static int xgpu_nv_release_full_gpu_access(struct amdgpu_device *adev,
static int xgpu_nv_request_init_data(struct amdgpu_device *adev)
{
- return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
+ return xgpu_nv_send_access_requests_with_param(adev, IDH_REQ_GPU_INIT_DATA,
+ 0, GPU_CRIT_REGION_V2, 0);
}
static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 1c22bc11c1f8..bdfd2917e3ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -41,19 +41,21 @@ static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev)
static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev)
{
- u32 tmp;
-
- tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
- /* If it is VF or subrevision holds a non-zero value, that should be used */
- if (tmp || amdgpu_sriov_vf(adev))
- return tmp;
+ u32 rev_id;
- /* If discovery subrev is not updated, use register version */
- tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
- tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
- STRAP_ATI_REV_ID_DEV0_F0);
+ /*
+ * fetch the sub-revision field from the IP-discovery table
+ * (returns zero if the table entry is not populated).
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ rev_id = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0));
+ } else {
+ rev_id = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0);
+ rev_id = REG_GET_FIELD(rev_id, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0,
+ STRAP_ATI_REV_ID_DEV0_F0);
+ }
- return tmp;
+ return rev_id;
}
static void nbio_v7_9_mc_access_enable(struct amdgpu_device *adev, bool enable)